import requests
from bs4 import BeautifulSoup
import pandas as pd
url="http://top.baidu.com/buzz.php?p=top_keyword"
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/69.0.3497.100 Safari/537.36'}
r=requests.get(url)
r.encoding=r.apparent_encoding
x=r.text
soup=BeautifulSoup(x,'lxml')
a=[]
b=[]
for i in soup.find_all(class_="keyword"):
a.append(i.get_text().strip())
for l in soup.find_all(class_="icon-rise"):
b.append(l.get_text().strip())
data=[a,b]
print(data)
h=pd.DataFrame(data,index=["标题","热度"])
print(h.T)
[['翻译\nsearch', '淘宝\nsearch', '斗鱼\nsearch', '百度翻译\nsearch', '美国确诊超2600例\nsearch', '哔哩哔哩\nsearch', '知乎\nsearch', 'bilibili\nsearch', '微信\nsearch', '京东\nsearch', 'qq邮箱\nsearch', '微博\nsearch', '意大利报纸讣告\n search', '优酷\nsearch', '学习通\nsearch', '163\nsearch', '黄书豪出家\n search', '百度网盘\nsearch', '央行1000亿mlf\n search', '汽车之家\nsearch', '疫情\nsearch', '谷歌翻译\nsearch', '印度聚众喝牛尿\n search', '天眼查\nsearch', '员工救火用嘴吹\n search', '爱奇艺\nsearch', '今日新鲜事\nsearch', '宁津生院士逝世\nsearch', 'qq\nsearch', '英国229名科学家\n search', '腾讯入股新希望\n search', '虎牙\nsearch', '企查查\nsearch', '古巨基当爸\n search', '英女王迁离伦敦\n search', '钉钉\nsearch', '美国疫苗临床试验\n search', '吉利icon\nsearch', 'b站\nsearch', '6类行为定罪处罚\n search', '智联招聘\nsearch', '地图\nsearch', '腾讯视频\nsearch', '360\nsearch', '意大利一市长病逝\nsearch', '武大樱花直播日程\nsearch', '日历\nsearch', '韩国再现集体感染\n search', '百度地图\nsearch', '安家\nsearch', '亚马逊禁上口罩\nsearch', '淘宝网\nsearch', '微信网页版\nsearch', '163邮箱登录\nsearch', '电影天堂\nsearch', '谁都渴望遇见你\nsearch', '58同城\nsearch', '蝙蝠侠停拍\nsearch', '知网\nsearch', '微信公众平台\nsearch', '巴菲特女儿自我隔离\n search', '百度\nsearch', '4399\nsearch', '热血同行\nsearch', '微信公众号\nsearch', '顺丰\nsearch', '环球音乐ceo确诊\n search', '新型冠状病毒肺炎\nsearch', '京东商城\nsearch', '塞尔维亚紧急状态\n search', '腾讯会议\nsearch', '湖北籍舰艇出镜\n search', '在线翻译\nsearch', '捷克宣布全国隔离\n search', '球迷支持赛季无效\n search', 'steam\nsearch', 'wps\nsearch', 'lv生产洗手液\n search', '腾讯课堂\nsearch', '阿里巴巴\nsearch', '美联储利率降至零\n search', '国外确诊已超国内\n search', '有道翻译\nsearch', '西班牙将封锁全国\nsearch', '超星学习通\nsearch', '豆瓣\nsearch', '阿里云\nsearch', '意大利新增2547例\nsearch', '全国入境日均12万\n search', '战网\nsearch', '梅西呼吁抗击疫情\nsearch', '巴西总统检测结果\nsearch', '学信网\nsearch', '美股\n search', 'nga\nsearch', '上证指数\nsearch', '中国知网\nsearch', '智慧树\nsearch', '天猫\nsearch', '腾讯\nsearch'], ['343170', '317407', '239262', '227638', '224707', '220729', '213478', '210275', '190168', '182806', '174386', '159213', '131647', '124552', '122762', '120185', '116333', '111649', '109508', '107822', '103459', '99477', '98568', '97438', '97412', '96411', '95920', '94947', '91643', '91509', '90140', '84974', '83853', '80200', '79510', '78917', '77942', '77135', '76084', '75011', '74323', '74323', '71190', '69908', '69785', '68019', '67702', '65267', '60425', '59770', '58456', '58401', '57826', '57745', '57702', '57086', '56938', '56716', '56466', '56325', '55937', '55863', '54822', '54572', '54035', '53214', '53115', '52961', '52752', '52196', '52168', '50985', '50774', '50341', '50309', '50291', '50110', '49443', '48700', '47279', '45589', '45497', '44289', '43831', '43693', '43405', '42952', '42911', '42104', '41539']] 标题 热度 0 翻译\nsearch 343170 1 淘宝\nsearch 317407 2 斗鱼\nsearch 239262 3 百度翻译\nsearch 227638 4 美国确诊超2600例\nsearch 224707 .. ... ... 95 上证指数\nsearch None 96 中国知网\nsearch None 97 智慧树\nsearch None 98 天猫\nsearch None 99 腾讯\nsearch None [100 rows x 2 columns]
网页源代码中所需要的
<a class="list-title" target="_blank" href="http://www.baidu.com/baidu?cl=3&tn=SE_baiduhomet8_jmjb7mjw&rsv_dl=fyb_top&fr=top1000&wd=%B7%AD%D2%EB" href_top="./detail?b=2&c=12&w=%B7%AD%D2%EB">翻译</a>