简单的爬取信息与短时间并用图表表示

简单地爬取信息与趋势预测

import datetime
import pandas as pd
import xlsxwriter as xlw
from urllib import request
from bs4 import BeautifulSoup as bs

datetime,先转换为datetime类型,再做加减

def dateRange(start, end): # start=’2014-09’
strptime, strftime = datetime.datetime.strptime, datetime.datetime.strftime
days = (strptime(end, “%Y-%m”) - strptime(start, “%Y-%m”)).days
datelist1 = [strftime(strptime(start, “%Y-%m”) + datetime.timedelta(i), “%Y%m”) for i in range(0, days, 1)]
datelist = sorted(list(set(datelist1)))
return datelist
# 爬取“天气网”天气预报
def getCommentsById(city, start, end): # city为字符串,year为列表,month为列表
weather_result = []
datelist = dateRange(start, end)
for i in datelist:
url = ‘http://lishi.tianqi.com/’ + city + ‘/’ + i + ‘.html’
opener = request.Request(url)
opener.add_header(‘User-Agent’, ‘Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)’)
req = request.urlopen(opener).read()
soup = bs(req, ‘html.parser’)
weather_m = soup.select(‘div .tqtongji2 > ul’) # .表示class; ‘#tongji’表示id等价于a[id=’tongji’]

    for i in weather_m[1:]:  # 因为第一个为表头,所以筛除掉
        tt = []
        for j in range(6):
            t = i.find_all('li')[j].string
            if t is not None:  # 存在None值的进行处理,否则不能写入到excel
                tt.append(t)
            else:
                tt.append('None')
        weather_result.append(tt)
return weather_result
#  list数据写入到本地excel中

def list_to_excel(weather_result, filename):
workbook = xlw.Workbook(‘E:\%s.xlsx’ % filename)
sheet = workbook.add_worksheet(‘weather_report’)
title = [‘日期’, ‘最高气温’, ‘最低气温’, ‘天气’, ‘风向’, ‘风力’]
for i in range(len(title)):
sheet.write_string(0, i, title[i], workbook.add_format({‘bold’: True})) # 写入表头,字体加粗
row, col = 1, 0
for a, b, c, d, e, f in weather_result:
sheet.write_string(row, col, a)
sheet.write_string(row, col + 1, b)
sheet.write_string(row, col + 2, c)
sheet.write_string(row, col + 3, d)
sheet.write_string(row, col + 4, e)
sheet.write_string(row, col + 5, f)
row += 1
workbook.close()
if name == ‘main‘:
data = getCommentsById(‘zhaotong’, ‘2016-05’, ‘2017-07’)
list_to_excel(data, ‘昭通天气201605-201707’)

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from dateutil import parser
import pandas as pd
import numpy as np

df_kunming = pd.read_excel(‘E://昆明天气201605-201707.xlsx’)
df_dali = pd.read_excel(‘E://大理天气201605-201707.xlsx’)
df_lijiang = pd.read_excel(‘E://丽江天气201605-201707.xlsx’)
df_xianggelila = pd.read_excel(‘E://香格里拉天气201605-201707.xlsx’)
df_xishuangbanna = pd.read_excel(‘E://西双版纳天气201605-201707.xlsx’)
df_yuxi = pd.read_excel(‘E://玉溪天气201605-201707.xlsx’)
df_wenshan = pd.read_excel(‘E://文山天气201605-201707.xlsx’)
df_qujing = pd.read_excel(‘E://曲靖天气201605-201707.xlsx’)
df_zhaotong = pd.read_excel(‘E://昭通天气201605-201707.xlsx’)

读取城市气象数据

取出要分析的温度和日期数据

y1 = df_kunming[‘最低气温’]
x1 = df_kunming[‘日期’]
y2 = df_dali[‘最低气温’]
x2 = df_dali[‘日期’]
y3 = df_lijiang[‘最低气温’]
x3 = df_lijiang[‘日期’]
y4 = df_xianggelila[‘最低气温’]
x4 = df_xianggelila[‘日期’]
y5 = df_xishuangbanna[‘最低气温’]
x5 = df_xishuangbanna[‘日期’]
y6 = df_yuxi[‘最低气温’]
x6 = df_yuxi[‘日期’]

把日期数据转换成 datetime 的格式

day_kunming = [parser.parse(x) for x in x1]
day_dali = [parser.parse(x) for x in x2]
day_lijiang = [parser.parse(x) for x in x3]
day_xianggelila = [parser.parse(x) for x in x4]
day_xishuangbanna = [parser.parse(x) for x in x5]
day_yuxi = [parser.parse(x) for x in x6]

调用 subplot 函数, fig 是图像对象,ax 是坐标轴对象

fig, ax = plt.subplots()

调整x轴坐标刻度,使其旋转70度,方便查看

plt.xticks(rotation=70)

设定时间的格式

hours = mdates.DateFormatter(‘%Y-%m-%d’)

设定X轴显示的格式

ax.xaxis.set_major_formatter(hours)

这里需要画出三根线,所以需要三组参数

ax.plot(day_kunming,y1,’r’,day_dali,y2,’r’,day_lijiang,y3,’r’)
ax.plot(day_xianggelila,y4,’g’,day_xishuangbanna,y5,’g’,day_yuxi,y6,’g’)

显示图像

fig
这里写图片描述

猜你喜欢

转载自blog.csdn.net/weixin_43028756/article/details/82142483