import time, json, requests import csv #文件名称 ExcelName = 'E:/2.9日疫情状况.csv' #当前日期时间戳 number = format(time.time() * 100, '.0f') url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=&_=%s' % number datas = json.loads(requests.get(url=url).json()['data']) print('更新时间:' + datas['lastUpdateTime']) #写入表头 with open(ExcelName, 'w', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(["省份","城市","确诊","死亡","治愈","时间"]) for contry in datas['areaTree']: if contry['name'] == '中国': for province in contry['children']: #print(province['name']) #with open(ExcelName, 'a', encoding='utf-8', newline='') as csvfile: #writer = csv.writer(csvfile) #writer.writerow([province['name']]) for city in province['children']: #print(city['name'], '确诊:' + str(city['total']['confirm']), '死亡:' + str(city['total']['dead']), '治愈:' + str(city['total']['heal'])) # 写入市的名称,确诊、死亡、治愈的人数 with open(ExcelName, 'a', encoding='utf-8', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow([province['name'],city['name'], str(city['total']['confirm']),str(city['total']['dead']), str(city['total']['heal']),datas['lastUpdateTime']])
from lxml import etree import re import requests #导入requests包 import 肺炎.SQL as SQL #url = 'http://www.hebwst.gov.cn/index.do?id=397505&templet=content&cid=45' #url ='http://www.hebwst.gov.cn/index.do?id=397291&templet=content&cid=45' #url='http://www.hebwst.gov.cn/index.do?id=395538&templet=content&cid=326' hrefs = [] def ULS(): urls = [] for i in range(6): url='http://www.hebwst.gov.cn/index.do?templet=search_list&searchType=1&searchText=河北省新型冠状病毒感染的肺炎疫情情况&type=search&cid=0&page='+str(i) print(url) strhtml = requests.get(url) tree = etree.HTML(strhtml.text) urls.append(tree.xpath('//td[@class=\'sy_new_list\']/a//@href')) print(urls) for href1 in urls: for href in href1: print(href) href = 'http://www.hebwst.gov.cn/'+ href a = re.match(r'.*?&cid=45', href) if (a): if (href !='http://www.hebwst.gov.cn/index.do?id=395795&templet=content&cid=45'): hrefs.append(href) print(hrefs) def info(url): print(url) strhtml = requests.get(url) # Get方式获取网页数据 tree = etree.HTML(strhtml.text) text=tree.xpath('//p//text()') text[0]=re.sub(r'\u3000','',text[0]) #print(text) #新增 date=re.findall(r"(.+?日)", text[0]) print("时间",date) xin_que_num=re.findall(r"新增确诊病例(.+?例)", text[0]) mid = text[0].split("其中", 1)[1] num=len(mid.split("其中", 1)) if num>1: mid=mid.split("其中", 1)[0] xin_shi_num=re.findall(r"[,,、](.+?市)(.+?例)", mid) xin_chu_num = re.findall(r"新增治愈出院病例(.+?例)", text[0]) xin_yi_num = re.findall(r"新增疑似病例(.+?例)", text[0]) print("新增确诊病例",xin_que_num) print("详细新增确诊病例\n",xin_shi_num) print("新增治愈出院病例",xin_chu_num) print("新增疑似病例\n",xin_yi_num) #确诊 que_num=re.findall(r"累计报告确诊病例(.+?例)", text[1]) si_num=re.findall(r"例,其中死亡(.+?例)", text[1]) zhong_num=re.findall(r",现有重症病例(.+?例)", text[1]) yu_num=re.findall(r",累计治愈出院(.+?例)", text[1]) print("累计确诊病例",que_num) print("死亡病例",si_num) print("重症病例",zhong_num) print("出院病例",yu_num) #详细 que_xi_num=[] si_xi_num=[] zhong_xi_num=[] chu_xi_num=[] num=len(text[1].split("确诊病例中",1)) if num>1: mid = text[1].split("确诊病例中", 1)[1] num = len(mid.split("死亡病例中",1)) if num > 1: que=mid.split("死亡病例中",1)[0] que_xi_num = re.findall(r"[,、](.+?市)(.+?例)", que) si=mid.split("死亡病例中",1)[1] mid=si num = len(mid.split("重症病例中", 1)) if num > 1: si=mid.split("重症病例中",1)[0] si_xi_num = re.findall(r"[,、](.+?市)(.+?例)", si) zhong=mid.split("重症病例中",1)[1] mid=zhong num = len(mid.split("出院病例中", 1)) if num > 1: zhong=mid.split("出院病例中",1)[0] zhong_xi_num = re.findall(r"[,、](.+?市)(.+?例)", zhong) chu=mid.split("出院病例中",1)[1] chu_xi_num = re.findall(r"[,、](.+?市)(.+?例)", chu) else: zhong_xi_num = re.findall(r"[,、](.+?市)(.+?例)", zhong) else: si_xi_num = re.findall(r"[,、](.+?市)(.+?例)", si) print("详细确诊病例",que_xi_num) print("详细死亡病例",si_xi_num) print("详细重症病例",zhong_xi_num) print("详细出院病例",chu_xi_num) #疑似 yisi_num=re.findall(r"疑似病例(.+?例)", text[1]) print("疑似病例",yisi_num) #密切接触者 接触医学观察 正在隔离 miqie_num=re.findall(r"密切接触者(.+?人)", text[2]) jie_num=re.findall(r"解除隔离医学观察(.+?人)", text[2]) guan_num=re.findall(r"现有(.+?人)", text[2]) print("密切接触者",miqie_num) print("接触医学观察",jie_num) print("现有医学观察人数",guan_num) SQL.insert_province(date[0], "".join(xin_que_num),"".join(xin_chu_num), "".join(xin_yi_num), "".join(que_num), "".join(si_num), "".join(zhong_num), "".join(yu_num),"".join(yisi_num), "".join(miqie_num), "".join(jie_num), "".join(guan_num), url) for mid_value in que_xi_num: City=mid_value[0] Num=mid_value[1] print("CITY:",City) print("num:",Num) SQL.insert_city(date[0],City,Num,url) # ,,,,Ur for mid_value in xin_shi_num: City=mid_value[0] Num=mid_value[1] print("CITY:",City) print("num:",Num) SQL.update_db(City,"New_Confirmed_num",Num) for mid_value in si_xi_num: City=mid_value[0] Num=mid_value[1] print("CITY:",City) print("num:",Num) SQL.update_db(City,"Dead_num",Num) for mid_value in zhong_xi_num: City=mid_value[0] Num=mid_value[1] print("CITY:",City) print("num:",Num) SQL.update_db(City,"Zhong_num",Num) for mid_value in chu_xi_num: City=mid_value[0] Num=mid_value[1] print("CITY:",City) print("num:",Num) SQL.update_db(City,"Cured_num",Num) if __name__ =='__main__': #ULS() #SQL.delete_db("hebei_info") #SQL.delete_db("hebei_city_info") #for url in hrefs: #info(url) info('http://wsjkw.hebei.gov.cn/content/content_45/397632.jhtml')