scrapy基于数据库的数据分析及图表展示

小知识点

缓存
#缓存的注意事项
#1.什么样的数据才应该缓存?
#大小不能太大
#数据要常用
#数据获取比较费时

#1.基于内存,用字典
#张三 20 李四 30 王五 15
cache = {}
if "张三" in cache:
    age=cache["张三"]
    print(f"cache{age}")
else:
    cache["张三"]=20
    print(f"手动输入:20")
if "张三" in cache:
    age=cache["张三"]
    print(f"cache{age}")
else:
    cache["张三"]=20
    print(f"手动输入:20")
"""
初始化缓存对象                      cache = {}
如果 数据在缓存中                   if "张三" in cache:
    从缓存中拿数据                       age=cache["张三"]
否则                                else:
    手动拿数据                           20
    并缓存数据                           cache["张三"]=20
"""
#基于redis的缓存

import redis
cache=redis.Redis()
if cache.exists("张三"):
    age=cache.get("张三")
else:
    cache.set("张三",20)

数据分析并绘柱状图

import sqlite3

db = sqlite3.connect("blog.sqlite")
cursor = db.cursor()
sql1 = """
SELECT COUNT(*) as blog_count,blog_wx_name FROM blog GROUP BY blog_wx_name ORDER BY blog_count DESC LIMIT 7;
"""
sql1 = """
SELECT COUNT(*),blog_wx_name FROM blog GROUP BY blog_wx_name;
"""
cursor.execute(sql1)
result = cursor.fetchall()
new_result = sorted(result, key=lambda x:x[0], reverse=True)[:7]

wx_name_list = []
blog_count_list = []
for blog_count, wx_name in new_result:
    blog_count_list.append(blog_count)
    wx_name_list.append(wx_name)

from pyecharts import Bar

bar = Bar("公众号博客数对比图")  # 新建柱状图
bar.add("公众号",  # 图例名称
        wx_name_list,  # x
        blog_count_list,  # y
        bar_category_gap="20%",
        is_more_utils=True,
        )
bar.render("柱状图.html")

数据分析并绘折线图

import sqlite3

db = sqlite3.connect("blog.sqlite")
cursor = db.cursor()

sql1 = """
SELECT COUNT(*),blog_date FROM blog GROUP BY blog_date;
"""
cursor.execute(sql1)
result = cursor.fetchall()
print(result)
blog_count_list = []
date_list = []
for blog_count, date in result:
    blog_count_list.append(blog_count)
    date_list.append(date)


from pyecharts import Line
attr = date_list
v1 = blog_count_list
bar = Line("折线图")
bar.add("博客数量", attr, v1,
        # mark_point=["average"],
        # mark_point_symbol="arrow",
        # mark_point_textcolor="#cf0",
        # mark_point_symbolsize=30,
        # is_fill=False,#是否填充
        # line_opacity=0.8,#线的不透明度
        # area_opacity=0.4,#填充区域的不透明度
       )
bar.render("折线图.html")

涉及小知识点
group by 以某个字段分组
数据库查询出的数据排序(升序) order by 按某种方式
数据库查询出的数据排序(降序) order by 按某种方式 desc
limt 数字(取前几个)

SELECT COUNT(*) as blog_count,blog_wx_name FROM blog GROUP BY blog_wx_name ORDER BY blog_count DESC LIMIT 7

猜你喜欢

转载自blog.csdn.net/qq_41150890/article/details/100084796