1. enumerate()方法可以同时拿到index和value。
2. python匿名函数,lambda表达式,可以简化代码。详见2.2可视化过程。
3. map()函数返回的是map类型,需要转换成list类型。
示例代码如下:
# coding:utf-8 import requests from bs4 import BeautifulSoup import json from pyecharts import Bar ALL_DATA = [] def parse_page(url): # 1.get方法请求网页 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36" } response = requests.get(url, headers=headers) text = response.content # 2.BeautifulSoup方法解析html # lxml解析器:容错能力一般。html5lib解析器:相当于浏览器自带的解析器,容错能力很强 # 安装方式:pip install html5lib。 # pip list 可以查看安装了哪些包 soup = BeautifulSoup(text, "html5lib") divs = soup.find("div", class_="conMidtab") tables = divs.find_all("table") for table in tables: trs = table.find_all("tr")[2:] # enumerate()方法可以同时拿到index和value for index, tr in enumerate(trs): tds = tr.find_all("td") if index == 0: city = list(tds[1].stripped_strings)[0] else: city = list(tds[0].stripped_strings)[0] temperature = list(tds[-2].stripped_strings)[0] citys = {"city": city, "Lowest_temperature": int(temperature)} ALL_DATA.append(citys) # 使用json.dumps()方法解决print打印编码问题 # print json.dumps(citys, encoding="utf-8", ensure_ascii=False) # print "="*50 def main(): # 1.获取数据 urls = [ "http://www.weather.com.cn/textFC/hb.shtml", "http://www.weather.com.cn/textFC/db.shtml", "http://www.weather.com.cn/textFC/hd.shtml", "http://www.weather.com.cn/textFC/hz.shtml", "http://www.weather.com.cn/textFC/hn.shtml", "http://www.weather.com.cn/textFC/xb.shtml", "http://www.weather.com.cn/textFC/xn.shtml", "http://www.weather.com.cn/textFC/gat.shtml" ] for url in urls: parse_page(url) # 2.数据分析 # 2.1根据最低气温进行排序 # 使用python中的匿名函数,lambda表达式 ALL_DATA.sort(key=lambda list_data: list_data["Lowest_temperature"]) # 使用json.dumps()方法解决print打印编码问题 # print json.dumps(ALL_DATA, encoding="utf-8", ensure_ascii=False) # 2.2可视化过程:pyecharts.Bar data = ALL_DATA[0:10] # 匿名函数lambda表达式,map()函数返回的是map类型,需要转换成list类型 cities = list(map(lambda c: c["city"], data)) temperatures = list(map(lambda c: c["Lowest_temperature"], data)) chart = Bar("中国最低气温表") # 添加横坐标和纵坐标 chart.add("", cities, temperatures) # render渲染 chart.render("china_temperature.html") print "Successful" if __name__ == '__main__': main() # ALL_DATA = [ # {"city": "台北", "Lowest_temperature": "25"}, # {"city": "高雄", "Lowest_temperature": "27"}, # {"city": "台中", "Lowest_temperature": "25"} # ] # # 使用python中的匿名函数,lambda表达式 # ALL_DATA.sort(key=lambda list_data: list_data["Lowest_temperature"]) # # 使用json.dumps()方法解决print打印编码问题 # print json.dumps(ALL_DATA, encoding="utf-8", ensure_ascii=False)