# -*- coding: utf-8 -*-
import requests
from lxml import etree
import os
if __name__ == '__main__':
# headers = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
# }
# url='https://www.aqistudy.cn/historydata/'
# page_text=requests.get(url=url,headers=headers).text
# #解析热门城市
# tree = etree.HTML(page_text)
# hot_li_list = tree.xpath('//div[@class="bottom"]/ul/li')
# all_city_name=[]
# for li in hot_li_list:
# hot_city_name = li.xpath('./a/text()')[0]
# all_city_name.append(hot_city_name)
# #解析所有城市
# city_name_list=tree.xpath('//div[@class="bottom"]/ul/div[2]/li')
# for li in city_name_list:
# city_name = li.xpath('./a/text()')[0]
# all_city_name.append(city_name)
#
#
# print(all_city_name,len(all_city_name))
#用一个xpath获取两组数据
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
}
url='https://www.aqistudy.cn/historydata/'
page_text=requests.get(url=url,headers=headers).text
#解析热门城市
tree = etree.HTML(page_text)
#div/ul/li/a #热门城市
#div/ul/div[2]/li/a #全部城市
aLL_city_names=[]
a_list = tree.xpath('//div[@class="bottom"]/ul/li/a | //div[@class="bottom"]/ul/div[2]/li/a')
for a in a_list:
city_name = a.xpath('./text()')[0]
aLL_city_names.append(city_name)
print(aLL_city_names,len(aLL_city_names))
python爬虫学习(十四)xpath获取不同标签城市名
猜你喜欢
转载自blog.csdn.net/haimian_baba/article/details/103816978
今日推荐
周排行