爬取上海临沂两地天气和污染指数
import urllib.request
from pyquery import PyQuery as pq
import re
city= ['上海','临沂']
city_num = ['101020100','101120901']
def find_weather(country):
url = 'http://www.weather.com.cn/weather/' + country + '.shtml'
html = urllib.request.urlopen(url).read().decode('utf-8')
parse = re.compile('class="sky skyid.*?<h1>(.*?)</h1>.*?<span>(.*?)</span>.*?<i>(.*?)</i>'
+'.*?<p class="win">.*?<i>(.*?)</i>',re.S)
doc = pq(html)
polute = list(doc('.li6 span').text())
item = re.findall(parse,html)
print(polute)
for each in item:
yield{
'日期': each[0],
'温度': each[1]+'/' + each[2],
'风级': each[3],
}
def main():
for i in range(2):
weater =[]
print(city[i]+'\n')
for each in find_weather(city_num[i]):
weater.append(each)
for i in range(7):
print(weater[i])
print('\n')
if __name__ == '__main__':
main()