爬虫之爬取“最好大学排名”并以Excel表格形式打印出来
#!usr/bin/python# -*- coding: utf-8 -*-import requestsfrom bs4 import BeautifulSoupfrom requests.exceptions import RequestExceptionimport refrom lxml import etreeimport pandasurl = "htt...
·
#!usr/bin/python # -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup from requests.exceptions import RequestException import re from lxml import etree import pandas
url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html"
ret = requests.get(url)
ret.encoding = 'utf-8'
html = etree.HTML(ret.content)
# mes = html.xpath('/html/body/div[3]/div/div[2]/div/div[3]/div/table/tbody/tr//text()')
a1 = []
a2 = []
a3 = []
a4 = []
a5 = []
a6 = []
a7 = []
a8 = []
a9 = []
a10 = []
a11 = []
a12 = []
a13 = []
a14 = []
message = {}
for i in range(1, 550):
mes0 = html.xpath('//tbody[@class="hidden_zhpm"]//tr[{}]//text()'.format(i))
print(mes0)
a1.append(mes0[0])
a2.append(mes0[1])
a3.append(mes0[2])
a4.append(mes0[3])
a5.append(mes0[4])
a6.append(mes0[5])
a7.append(mes0[6])
a8.append(mes0[7])
a9.append(mes0[8])
if len(mes0) >= 10:
a10.append(mes0[9])
else:
a10.append('None')
if len(mes0) >= 11:
a11.append(mes0[10])
else:
a11.append(None)
if len(mes0) >= 12:
a12.append(mes0[11])
else:
a12.append('None')
if len(mes0) >= 13:
a13.append(mes0[12])
else:
a13.append('None')
if len(mes0) == 14:
a14.append(mes0[13])
else:
a14.append('None')
message['排名'] = a1
message['学校名'] = a2
message['所在地'] = a3
message['总分'] = a4
message['生源质量'] = a5
message['培养结果'] = a6
message['社会声誉'] = a7
message['科研规模'] = a8
message['科研质量'] = a9
message['顶尖成果'] = a10
message['顶尖人才'] = a11
message['科技服务'] = a12
message['成果转化'] = a13
message['学生国际化'] = a14
print(message)
df = pandas.DataFrame(message, columns = ['排名', '学校名', '所在地', '总分', '生源质量', '培养结果', '社会声誉', '科研规模', '科研质量', '顶尖成果', '顶尖人才', '科技服务', '成果转化', '学生国际化'])
df.to_excel('中国最好的学排名.xlsx')
更多推荐



所有评论(0)