import collections
import pandas as pd
import chardet
data=pd.read_csv('task1_1.csv',encoding="GB2312")
array=data.values
Dict=collections.defaultdict(lambda:("",0,0))
for a in array:
if a[15]=='否':
Dict[a[4]]=(a[4],float(Dict[a[4]][1])+float(a[13]),float(Dict[a[4]][2]))
else:
Dict[a[4]]=(a[4],float(Dict[a[4]][1]),float(Dict[a[4]][2])+float(a[13]))
#print(Dict)
keys=list(Dict.keys())
values=list(Dict.values())
#dataframe=pd.DataFrame({'bianma':keys[1:],'name':values2,'xiaoshou':values1})
values1=[a[0] for a in values[1:]]
values2=[a[1] for a in values[1:]]
values3=[a[2] for a in values[1:]]
#print(values1)
dataframe=pd.DataFrame({'名称':values1,'非促销商品销售金额':values2,'促销商品销售金额':values3})
dataframe.to_csv("task1_3.csv",index=False,sep=',',encoding='GBK')
"""f=open('fujian.csv','rb')
data=f.read()
print(chardet.detect(data))
"""
注意打开的编码方式是UTF-8还是GB2312,不然用python打开一些CSV会乱码
创建一个字典,这个字典是用“”为键,(“”,0,0)为对应的值,用defaultdict表明调用dict[4]时若没有这个键则创建这个键
循环计算,若当前记录是非促销,则增加该大类的非促销金额 ,否则增加其促销金额为当前记录的销售价格
取这个字典的值,注意这个values有括号,pd.read_csv的values无括号
然后可看其shape
index=false则生成的csv,左边第一列不会是索引,用encoding保存成GBK则用excel打开不会乱码
# -*- coding: utf-8 -*-
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
alldf=pd.read_csv('task1_1.csv',encoding="GB2312")
data=alldf.values
fishdf=alldf[alldf["商品类型"]=="生鲜"]
normaldf=alldf[alldf["商品类型"]=="一般商品"]
days=[]
for a in data:
if a[7] not in days:
days.append(a[7])
fishmoneys=[]
normalmoneys=[]
for day in days:
fishdfday = fishdf[fishdf["销售日期"].isin([day])]
fishmoney=fishdfday['销售金额'].sum()
fishmoneys.append(fishmoney)
normalday = normaldf[normaldf["销售日期"].isin([day])]
normalmoney=normalday["销售金额"].sum()
normalmoneys.append(normalmoney)
print(days)
print(fishmoneys)
print(normalmoneys)
names = days
x = range(len(names))
y = fishmoneys
y1=normalmoneys
plt.plot(x, y, marker='o', mec='r', mfc='w',label=u'生鲜')
#plt.plot(x, y1, marker='*', ms=10,label=u'y=x^3曲线图')
plt.legend() # 让图例生效
plt.xticks(x, names, rotation=45)
plt.margins(0)
plt.subplots_adjust(bottom=0.15)
plt.xlabel(u"日期") #X轴标签
plt.ylabel("每天销售金额/元") #Y轴标签
plt.title("生鲜类商品每天销售金额折线图") #标题
plt.show()
中文不会报错
画饼状图
import collections
from matplotlib import pyplot as plt
import pandas as pd
data=pd.read_csv('task1_1.csv',encoding="GB2312")
array=data.values
plt.rcParams['font.sans-serif']=['SimHei']
Dict={201501:None,201502:None,201503:None,201504:None}
for key in list(Dict.keys()):
Dict[key]=collections.defaultdict(lambda:(0,"")) dict[201401~201504]
for a in array:
Dict[a[8]][a[1]]=(Dict[a[8]][a[1]][0]+a[13],a[2])
i=0
for V in Dict.values():
plt.figure(figsize=(6,6))
sizes=[value[0] for value in V.values()]
plt.pie(sizes,labels=[value[1] for value in V.values()],autopct='%3.2f%%')
plt.axis('equal')
plt.savefig("%i.png"%i)
plt.title('第'+str(i)+'月')
i+=1
dict创建了一个字典
然后遍历这个字典,把他们的值又赋值成一个新字典
plt.savefig("%i.png"%i)
保存图片
# -*- coding:utf-8 -*-
#! python3
import collections
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d
import numpy as np
import pandas as pd
import chardet
import datetime
def autolabel(rects):
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x()+rect.get_width()/2.-0.2, 1.03*height, '%s' % float(height))
data=pd.read_csv('task2_3_2.csv',encoding="GB2312")
array=data.values
#Dict=collections.defaultdict(lambda:("",0,0))
b=np.zeros(18,dtype=float)
c=np.zeros(18,dtype=float)
y_data=np.zeros(17,dtype=float)
y_data2=np.zeros(17,dtype=float)
for a in array:
if a[15]=='否':
b[a[16]]=a[13]+b[a[16]]
else:
c[a[16]]=a[13]+c[a[16]]
#print(b,c)
for i in range(0,17):
y_data[i]=round((b[i+1]-b[i])/b[i],2)
y_data2[i]=round((c[i+1]-c[i])/c[i],2)
x_data = [ int(i) for i in range (0,17)]
#name= [ '1', '2', '3','4', '5', '6','7', '8', '9','10', '11', '12','13', '14', '15','16', '17']
name=[str(i) for i in range(1,18)]
total_width=0.8
n = 2
width = total_width / n
plt.rc('font', family='SimHei', size=12)
a=plt.bar(x_data, y_data, width=width, label='非促销商品',fc = 'y')
print(x_data)
for i in range(len(x_data)):
print(x_data[i])
x_data[i] = int(x_data[i]) + width
b=plt.bar(x_data, y_data2, width=width, label='促销商品',tick_label = name,fc = 'r')
autolabel(a)
autolabel(b)
plt.xlabel('周数')
plt.ylabel('商品销售金额周环比增长率')
plt.title('非促销商品和促销商品周环比增长率')
plt.legend()
plt.show()
生成柱状图
name是x轴坐标的名字,