import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib as mlt
import jieba
from pyecharts import Geo
from wordcloud import WordCloud
print(os.getcwd())
import os
from matplotlib.font_manager import FontProperties
指定字体
mlt.rcParams['font.sans-serif'] = ['SimHei']
mlt.rcParams['font.family']='sans-serif' #这个必须指定
设置绘图风格
plt.rcParams["axes.labelsize"] = 16.
plt.rcParams["xtick.labelsize"] = 14.
plt.rcParams["ytick.labelsize"] = 14.
plt.rcParams["legend.fontsize"] = 12.
plt.rcParams["figure.figsize"] = [15., 9.]
headers = ['positionName', 'workYear', 'education', 'jobNature', 'financeStage',
'city','salary','positionAdvantage','companyFullName'] #头部
# 读入数据并显示前几个
data = pd.read_csv("lagou.csv",names=headers,encoding="gbk") # 一定要指定encoding
data.dropna() # 去除None值data.head()
# 画图 横
data['workYear'].value_counts().plot(kind='barh')
# 竖
data['workYear'].value_counts().plot(kind='bar')
结巴分词
list(jieba.cut('python工程师')) # ['python','‘工程师’]
final = ''
stopwords = ['PYTHON','python','Python','工程师','(',')','/'] # 停止词
for n in range(data.shape[0]):
seg_list = list(jieba.cut(data['positionName'][n]))
for seg in seg_list:
if seg not in stopwords:
final = final + seg + ' '
# final 得到的词汇
# 指定可以显示中文的字体,否则不能显示中文
my_wordcloud = WordCloud(font_path = r'C:\Windows\Fonts\simkai.ttf',width=1000,height=600).generate(final)
plt.imshow(my_wordcloud) # 显示图片
plt.axis('off')
plt.rcParams["figure.figsize"] = [10., 10.] # 指定为圆和大小
data['workYear'].value_counts().plot(kind='pie',autopct='%1.2f%%',explode=np.linspace(0,0.2,6))
# explode 中6是块数必须一致 0.5 是程度可以自行测试
import re
# 提取数据框
data2 = list(map(lambda x:(data['city'][x],eval(re.split('k|K',data['salary'][x])[0])*1000),range(len(data))))
# 提取价格信息
data3 = pd.DataFrame(data2)
# 转化成Geo需要的格式
data4 = list(map(lambda x:(data3.groupby(0).mean()[1].index[x],data3.groupby(0).mean()[1].values[x]),range(len(data3.groupby(0)))))
# 地理位置展示
geo = Geo("全国Python工资布局", "制作人:挖掘机小王子", title_color="#fff", title_pos="left", width=1200, height=600,background_color='#404a59')
attr, value = geo.cast(data4)
geo.add("", attr, value, type="heatmap", is_visualmap=True, maptype='china', visual_range=[0, 300], visual_text_color='#fff')
# 中国地图Python工资,此分布是最低薪资
geo.render()
geo = Geo("全国Python工资布局", "制作人:chase", title_color="#fff", title_pos="left", width=1200, height=600,background_color='#404a59')
attr, value = geo.cast(data4)
geo.add("", attr, value, type="effectScatter", is_visualmap=True, maptype='china', visual_range=[0, 300], visual_text_color='#fff')
# 中国地图Python工资,此分布是最低薪资
geo.render()
使用之前还要安装三个包
pip install echarts-countries-pypkg
pip install echarts-china-provinces-pypkg
pip install echarts-china-cities-pypkg 地图包