仅仅从网上爬下数据当然是不够用的,主要还得对数据进行分析与展示,大部分人都看重薪资,但是薪资数据有的是*k/月,有的是*万/月,还有*万/年等等,就要对数据进行清理
将所有单位统一化,全部换算成统一单位,然后分类薪资范围,在计算各个范围的数量,最后绘图展示
import pymysql
import numpy as np
from pyecharts import Bar
from pyecharts import Pie
class Mysqlhelper(object):
config = {
"host": "localhost",
"user": "root",
"password": "123456",
"db": "test",
"charset": "utf8"
}
def __init__(self):
self.connection = None
self.cursor = None
# 从数据库中查询多行数据
def getlist(self, sql, *args):
try:
self.connection = pymysql.connect(**Mysqlhelper.config) # **接函数所有参数
self.cursor = self.connection.cursor()
self.cursor.execute(sql, args)
return self.cursor.fetchall()
except Exception as ex:
print(ex, ex)
finally:
self.close()
def close(self):
if self.cursor:
self.cursor.close()
if self.connection:
self.connection.close()
if __name__ == "__main__":
count=0
list = []
list1 = []
list2 = [5000,10000,15000,20000,25000,30000,35000,40000]
salary0 = []
salary1 = []
salary2 = []
salary3 = []
salary4 = []
salary5 = []
salary6 = []
salary7 = []
city=[]
helper = Mysqlhelper()
rows = helper.getlist("select * from t_job")
#print(rows)
for n in rows:
if n[4][-1]=='月':
list.append(n[4])
elif n[4][-1]=='年':
pass
elif n[4][-1]=='天':
pass
else:
pass
for sale in list:
#print(sale)
money = sale.split('/')
#print(money[0])
money1 = money[0].split('-')
#print(money1)
if money[0][-1] == '万':
a = float(money1[0]) * 10000
b = float(money1[1][:-1]) * 10000
aveage = (a + b) / 2
count+=1
list1.append(aveage)
elif money[0][-1]=='千':
a = float(money1[0]) * 1000
b = float(money1[1][:-1]) * 1000
#print(a)
#print(b)
aveage = (a + b) / 2
#print(aveage)
count += 1
list1.append(aveage)
#print(count)
#print(list1)
for i in list1:
print(i)
if 0 < i <= 5000:
salary0.append(i)
elif 5000 < i <= 10000:
salary1.append(i)
elif 10000 < i <= 15000:
salary2.append(i)
elif 15000 < i <= 20000:
salary3.append(i)
elif 20000 < i <= 25000:
salary4.append(i)
elif 25000 < i <= 30000:
salary5.append(i)
elif 30000 < i <= 35000:
salary6.append(i)
elif 35000 < i <= 40000:
salary7.append(i)
print(min(list1))
print(max(list1))
a = len(salary0)
b = len(salary1)
c = len(salary2)
d = len(salary3)
e = len(salary4)
f = len(salary5)
g = len(salary6)
h = len(salary7)
list3=[a,b,c,d,e,f,g,h]
print(list2) #x轴
print(a,b,c,d,e,f,g,h)
print(list3) #数量
bar = Bar('Python平均工资')
bar.add("月薪", list2,list3)
# bar.show_config()
bar.render('Python工资柱状图.html')
pie = Pie()
pie.add("", list2, list3, is_label_show=True)
#pie.show_config()
pie.render('Python工资饼状图.html')
'''
#print(rows)
citycount=[]
cityname=['北京','异地招聘','海淀区','朝阳区','丰台区','昌平区','东城区','延庆区',
'房山区','通州区','顺义区','大兴区','怀柔区','西城区','平谷区','门头沟区']
beijing=[]
yidi=[]
haidian=[]
chaoyang=[]
fengtai=[]
changping=[]
dongcheng=[]
yanqing=[]
fangshan=[]
tongzhou=[]
shunyi=[]
daxing=[]
huairou=[]
xicheng=[]
pinggu=[]
mentougou=[]
for n in rows:
#print(n[3])
area=n[3].split('-')
print(area)
if len(area)==1:
print(area[0])
city.append(area[0])
else:
print(area[1])
city.append(area[1])
print(city)
print(len(city))
for i in city:
if i=='北京':
beijing.append(i)
elif i=='异地招聘':
yidi.append(i)
elif i=='海淀区':
haidian.append(i)
elif i == '朝阳区':
chaoyang.append(i)
elif i=='丰台区':
fengtai.append(i)
elif i=='昌平区':
changping.append(i)
elif i=='东城区':
dongcheng.append(i)
elif i=='延庆区':
yanqing.append(i)
elif i=='房山区':
fangshan.append(i)
elif i=='通州区':
tongzhou.append(i)
elif i=='顺义区':
shunyi.append(i)
elif i=='大兴区':
daxing.append(i)
elif i=='怀柔区':
huairou.append(i)
elif i=='西城区':
xicheng.append(i)
elif i=='平谷区':
pinggu.append(i)
elif i=='门头沟区':
mentougou.append(i)
#print(beijing)
#print(len(beijing))
a = len(beijing)
b = len(yidi)
c = len(haidian)
d = len(chaoyang)
e = len(fengtai)
f = len(changping)
g = len(dongcheng)
h = len(yanqing)
j = len(fangshan)
k = len(tongzhou)
l = len(shunyi)
m = len(daxing)
n = len(huairou)
o = len(xicheng)
p = len(pinggu)
q = len(mentougou)
citycount=[a,b,c,d,e,f,g,h,j,k,l,m,n,o,p,q]
print(cityname)
print(citycount)
pie = Pie()
pie.add("", cityname, citycount, is_label_show=True)
# pie.show_config()
pie.render('北京各区Python职位占比饼状图.html')
bar = Bar('北京各区职位数量')
bar.add("数量", cityname, citycount)
# bar.show_config()
bar.render('北京各区Python职位占比柱状图.html')
'''
前面写的是数据库的操作函数,其实可以封装成一个py文件,以后使用直接调用即可。
结果。:
我也分析了boss直聘网站的一些数据,类似于经验要求和学历要求等等,也可以自己分析想要的数据。
import pymysql
import numpy as np
from pyecharts import Bar
from pyecharts import Pie
import jieba
from collections import Counter
from os import path
class Mysqlhelper(object):
config={
"host":"localhost",
"user":"root",
"password":"123456",
"db":"test",
"charset":"utf8"
}
def __init__(self):
self.connection=None
self.cursor=None
# 从数据库中查询多行数据
def getlist(self, sql, *args):
try:
self.connection = pymysql.connect(**Mysqlhelper.config) # **接函数所有参数
self.cursor = self.connection.cursor()
self.cursor.execute(sql, args)
return self.cursor.fetchall()
except Exception as ex:
print(ex,ex)
finally:
self.close()
def close(self):
if self.cursor:
self.cursor.close()
if self.connection:
self.connection.close()
if __name__=="__main__":
sale=[]
exp=[]
edu=[]
one = []
three = []
five = []
onein = []
noexp = []
qita=[]
benke=[]
dazhuan=[]
noedu=[]
boshi=[]
other=[]
helper = Mysqlhelper()
rows = helper.getlist("select * from boss_job")
#print(rows)
for data in rows:
#print(data[2])
#print(data[5])
#print(data[6])
sale.append(data[2])
exp.append(data[5])
edu.append(data[6])
if data[5]=='1-3年':
one.append(data[5])
elif data[5]=='3-5年':
three.append(data[5])
elif data[5]=='5-10年':
five.append(data[5])
elif data[5]=='经验不限':
noexp.append(data[5])
elif data[5]=='1年以内':
onein.append(data[5])
else:
qita.append(data[5])
pass
if data[6]=='本科':
benke.append(data[6])
elif data[6]=='大专':
dazhuan.append(data[6])
elif data[6]=='博士':
boshi.append(data[6])
elif data[6]=='学历不限':
noedu.append(data[6])
else:
other.append(data[6])
# with open('./data/jingyan.txt', 'a', encoding='utf-8') as fp:
# fp.write(data[5])
# fp.write(',')
# fp.flush()
# fp.close()
print(exp)
print(edu)
print(len(exp))
print(len(edu))
'''
d = path.dirname(__file__)
jingyan_text = open(path.join(d, "data//jingyan.txt"), encoding='utf-8').read()
print(len(jingyan_text))
jieba.load_userdict("data//jingyan_dict.txt")
seg_list = jieba.cut_for_search(jingyan_text)
print(u"[全模式]: ", "/ ".join(seg_list))
'''
# sanguo_words = [x for x in jieba.cut(jingyan_text)if x!=','and len(x) >=2]
# c = Counter(sanguo_words).most_common(20)
# print(c)
# print(''.join(jieba.cut(jingyan_text)))
print(one)
print(three)
print(five)
print(noexp)
print(onein)
print(qita)
a=len(one)
b=len(three)
c=len(five)
d=len(noexp)
e=len(onein)
f=len(qita)
expcount=[f,e,a,b,c,d]
expfenlei=['应届生','1年以内','1-3年','3-5年','5-10年','经验不限']
print(expcount)
print(a+b+c+d+e+f)
print(other)
g=len(benke)
h=len(dazhuan)
j=len(boshi)
k=len(noedu)
m=len(other)
educount=[h,g,k,j,m]
edufenlei=['大专','本科','硕士','博士','学历不限']
print(educount)
'''
bar = Bar('工作年限')
bar.add("要求", expfenlei, expcount)
# bar.show_config()
bar.render('工作年限柱状图.html')
pie = Pie()
pie.add("工作", expfenlei, expcount, is_label_show=True)
# pie.show_config()
pie.render('工作年限饼状图.html')
'''
bar = Bar('学历要求')
bar.add("学历", edufenlei, educount)
# bar.show_config()
bar.render('学历要求柱状图.html')
pie = Pie()
pie.add("学历", edufenlei, educount, is_label_show=True)
# pie.show_config()
pie.render('学历要求饼状图.html')
我使用的是最基本的数组方法,不知道有什么简单方法么,例如jieba分词模块,等等
可以看出本科生需求还是很大的。。。