使用pandas进行年,年-月,年-月-日,数据分组
from google.colab import files
uploaded = files.upload()
import pandas as pd
data=pd.read_csv('hair_dryer.tsv',sep='\t')
data['review_date']=pd.to_datetime(data['review_date'])
def year_month(x):
a=x.year
b=x.month
return a*100+b
data5=data4.groupby(data4['review_day'].apply(year_month)).sum()
sale_count
review_day
200203 1
200204 1
200205 0
200206 0
200207 1
... ...
201504 334
201505 321
201506 337
201507 365
201508 378
[162 rows x 1 columns]
data7=data4.groupby(data4['review_day'].apply(lambda x:x.year)).sum()
data3=data3['star_rating'].groupby(data3['review_date']).count()
2002-03-02 1
2002-04-20 1
2002-07-13 1
2002-08-13 1
2002-08-21 1
..
2015-08-27 10
2015-08-28 8
2015-08-29 10
2015-08-30 8
2015-08-31 10
pdates=pd.date_range(start='2002-03-02',end='2015-08-31')
data3_new=data3.reindex(pdates,fill_value=0)
data3_new.head()
2002-03-02 1
2002-03-03 0
2002-03-04 0
2002-03-05 0
2002-03-06 0
Freq: D, Name: star_rating, dtype: int64