首先分析URL结构
http://www.tianqi.com/beijing/
它是由前面http://www.tianqi.com/+地区的拼音+/所构成
import xpinyin
a = xpinyin.Pinyin()
city = pin.get_pinyin(str(input('请输入想要查询的城市:')), '')
这就是获取到你所输入的地区的拼音并将其赋值到city_pinyin对的这一个变量上面去 如果么有下载xpinyin这个包的可以先在终端里面输入pip install xpinyin安装这个包
url = 'https://www.tianqi.com/' + city_pinyin + '/'
将获得的城市拼音添加到https://www.tianqi.com/后就可通过requests库来进行请求,如果么有下载requests这个包的可以先在终端里面输入pip install requests安装这个包
def get_main():
a = xpinyin.Pinyin()
city = a.get_pinyin(str(input('请输入想要查询的城市:')), '') # 获取到该城市的拼音
url = 'https://www.tianqi.com/' + city_pinyin + '/' # 在将获取到的城市拼音拼接到网址上去
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
response = requests.get(url, headers=headers).text
return response
这里是对网址进行请求,得到一个HTML源码之后在通过Beautifulsoup库进行解析
def soups():
html = BeautifulSoup(get_main(), "html.parser")
dl = html.find('dl',class_='weather_info')
title = dl.find('img')['alt']
times = dl.find('dd', class_="week").get_text()
shidu = dl.find('dd', class_="shidu")
kq = str(shidu).split('<dd class="shidu"><b>')[1].split('</b></dd>')[0].split('</b><b>')
kongqiss = dl.find('dd', class_='kongqi').get_text()
printjg(title,times,kq,kongqiss)
最后获取到我们想要的数据
def printjg(title,times,kq,kongqiss):
print(times)
print(title)
for i in kq:
print(i)
kqzl = re.findall('空气质量:(.*?)PM',kongqiss)
PM = re.findall('PM:(.*?)日出',kongqiss)
RCSJ = re.findall('日出:(.*?)日落',kongqiss)
RLSJ = re.findall('日落:(.*)',kongqiss)
print('空气质量:'+str(kqzl[0]).strip()+'\n'+'PM:'+str(PM[0]).strip()+'\n'+'日出:'+str(RCSJ[0]).strip()+'\n'+'日落:'+str(RLSJ[0]).strip())
最终代码如下:
import requests
import xpinyin
from bs4 import BeautifulSoup
import re
def get_main():
pin = xpinyin.Pinyin()
city_pinyin = pin.get_pinyin(str(input('请输入想要查询的城市:')), '') # 获取到该城市的拼音
url = 'https://www.tianqi.com/' + city_pinyin + '/' # 在将获取到的城市拼音拼接到网址上去
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
response = requests.get(url, headers=headers).text
return response
def soups():
html = BeautifulSoup(get_main(), "html.parser")
dl = html.find('dl',class_='weather_info')
title = dl.find('img')['alt']
times = dl.find('dd', class_="week").get_text()
shidu = dl.find('dd', class_="shidu")
kq = str(shidu).split('<dd class="shidu"><b>')[1].split('</b></dd>')[0].split('</b><b>')
kongqiss = dl.find('dd', class_='kongqi').get_text()
printjg(title,times,kq,kongqiss)
def printjg(title,times,kq,kongqiss):
print(times)
print(title)
for i in kq:
print(i)
kqzl = re.findall('空气质量:(.*?)PM',kongqiss)
PM = re.findall('PM:(.*?)日出',kongqiss)
RCSJ = re.findall('日出:(.*?)日落',kongqiss)
RLSJ = re.findall('日落:(.*)',kongqiss)
print('空气质量:'+str(kqzl[0]).strip()+'\n'+'PM:'+str(PM[0]).strip()+'\n'+'日出:'+str(RCSJ[0]).strip()+'\n'+'日落:'+str(RLSJ[0]).strip())
soups()
制作不易,不喜勿喷,还望支持