西安天气爬取
代码
"""
Created on Wed Mar 31 22:19:37 2021
@author: ASUS
"""
import requests
from lxml import etree
if __name__ == "__main__":
url = 'https://lishi.tianqi.com/xian/%d/'
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
fp =open('./天气.txt','w',encoding='utf-8')
for month in range(201901,201913):
new_url = format(url%month)
page_text = requests.get(url=new_url,headers=headers).content
tree = etree.HTML(page_text)
div_list = tree.xpath('//div[@class="tian_three"]/ul/li/div')
xian_weathers = []
i = 0
for div in div_list:
xian_weather = div.xpath('./text()')[0]
i+=1
if i == 5:
i = 0
xian_weathers.append(xian_weather+'\n')
else:
xian_weathers.append(xian_weather+',')
fp.write(''.join(xian_weathers))
csv代码
"""
Created on Wed Mar 31 22:19:37 2021
@author: ASUS
"""
import requests
from lxml import etree
import csv
import sys
if __name__ == "__main__":
url = 'https://lishi.tianqi.com/xian/%d/'
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
fp =open('./天气.txt','w',encoding='utf-8')
for month in range(201901,201913):
new_url = format(url%month)
page_text = requests.get(url=new_url,headers=headers).text
tree = etree.HTML(page_text)
div_list = tree.xpath('//div[@class="tian_three"]/ul/li/div')
xian_weathers = []
i = 0
for div in div_list:
xian_weather = div.xpath('./text()')[0]
i+=1
if i == 5:
i = 0
xian_weathers.append(xian_weather+'\n')
else:
xian_weathers.append(xian_weather+',')
fp.write(''.join(xian_weathers))
input_file = sys.argv[1]
output_file = sys.argv[2]
with open(input_file,'r',newline='') as csv_in_file:
with open(output_file,'w',newline='') as csv_out_file:
filereader = csv.reader(csv_in_file)
filerwriter = csv.writer(csv_out_file)
for row_list in filereader:
filewriter.writerow(row_list)
"""
Created on Wed Mar 31 22:19:37 2021
@author: ASUS
"""
import requests
from lxml import etree
import numpy as np
import pandas as pd
if __name__ == "__main__":
url = 'https://lishi.tianqi.com/xian/%d/'
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}
fp =open('./天气.txt','w',encoding='utf-8')
for month in range(201901,201913):
new_url = format(url%month)
page_text = requests.get(url=new_url,headers=headers).text
tree = etree.HTML(page_text)
div_list = tree.xpath('//div[@class="tian_three"]/ul/li/div')
xian_weathers = []
i = 0
for div in div_list:
xian_weather = div.xpath('./text()')[0]
i+=1
if i == 5:
i = 0
xian_weathers.append(xian_weather+'\n')
else:
xian_weathers.append(xian_weather+',')
fp.write(''.join(xian_weathers))
txt = np.loadtxt('天气.txt')
txtDF = pd.DataFrame(txt)
txtDF.to_csv('天气.csv',index=False)