# -*- coding:utf-8 -*- from urllib.request import urlopen from bs4 import BeautifulSoup import re import pandas as pd import pymysql conn = pymysql.connect(host='127.0.0.1',port=3306,user='root',passwd='',db='weather') cursor = conn.cursor() # 获取游标 命令行 def year_month(): urllist = [] for i in range(2017,2018): for j in range(1,13): if j <10: url_text = 'http://lishi.tianqi.com/tianjin/'+str(i)+'0'+str(j)+'.html' else: url_text = 'http://lishi.tianqi.com/tianjin/'+str(i)+str(j)+'.html' urllist.append(url_text) return urllist url_list = year_month() #到现今共可取url_list[0:79] # print(url_list[1:5]) #获取天气网下天气情况部分的数据 def weather_scraping(url): html = urlopen(url) bsObj = BeautifulSoup(html.read()) div = bsObj.find("div",'tqtongji2') ul = div.find_all('ul') return ul weul = weather_scraping(url_list[0]) isinstance(weul , list) #解析获取的ul标签内数据 , 返回datafram 格式数据 def analysis_ul(url): ul = weather_scraping(url) weather_list = [] for ul_text in ul[1:]: li_text = ul_text.find_all('li') day_data = [] for text in li_text: data= text.get_text() day_data.append(data) # print(day_data[0],day_data[1],day_data[2],day_data[3])day_data[0],times, # sql = "INSERT INTO app01_cond(ti,max_c,min_c,con,c_id_id) VALUES('%s','%s','%s','%s','%s')" %(day_data[0],int(day_data[1]),int(day_data[2]),day_data[3],int(734)) cursor.execute(sql) weather_list.append(day_data) columns = [ i.get_text() for i in ul[0].find_all('li')] weather_pd = pd.DataFrame(weather_list , columns=columns ) return weather_pd #爬取数据 weather_data = analysis_ul(url_list[0]) for i in url_list[1:79]: data_month = analysis_ul(i) weather_data = weather_data.append(data_month,ignore_index=1) # pdc = pd.DataFrame(weather_data,columns=[data_month,]) # print(pdc) # print(weather_data) # # #保存数据 到本地文件 # weather_data.to_csv('weather_201101_201707.txt' , index = False , sep = ',') # # weather_data.head(7) conn.commit() # commit 提交操作 cursor.close() # 关闭连接 conn.close()
获取历史天气 存储到mysql
猜你喜欢
转载自blog.csdn.net/weixin_42100915/article/details/80913943
今日推荐
周排行