import pandas as pd
# 读取sensors数据集
sensors = pd.read_csv('data/sensors.csv')
sensors
|
Group |
Property |
2012 |
2013 |
2014 |
2015 |
2016 |
0 |
A |
Pressure |
928 |
873 |
814 |
973 |
870 |
1 |
A |
Temperature |
1026 |
1038 |
1009 |
1036 |
1042 |
2 |
A |
Flow |
819 |
806 |
861 |
882 |
856 |
3 |
B |
Pressure |
817 |
877 |
914 |
806 |
942 |
4 |
B |
Temperature |
1008 |
1041 |
1009 |
1002 |
1013 |
5 |
B |
Flow |
887 |
899 |
837 |
824 |
873 |
# 用melt清理数据
sensors.melt(id_vars=['Group', 'Property'], var_name='Year').head(6)
|
Group |
Property |
Year |
value |
0 |
A |
Pressure |
2012 |
928 |
1 |
A |
Temperature |
2012 |
1026 |
2 |
A |
Flow |
2012 |
819 |
3 |
B |
Pressure |
2012 |
817 |
4 |
B |
Temperature |
2012 |
1008 |
5 |
B |
Flow |
2012 |
887 |
# 用pivot_table,将Property列转化为新的列名
sensors.melt(id_vars=['Group', 'Property'], var_name='Year') \
.pivot_table(index=['Group', 'Year'], columns='Property', values='value')\
.reset_index() \
.rename_axis(None, axis='columns')
|
Group |
Year |
Flow |
Pressure |
Temperature |
0 |
A |
2012 |
819 |
928 |
1026 |
1 |
A |
2013 |
806 |
873 |
1038 |
2 |
A |
2014 |
861 |
814 |
1009 |
3 |
A |
2015 |
882 |
973 |
1036 |
4 |
A |
2016 |
856 |
870 |
1042 |
5 |
B |
2012 |
887 |
817 |
1008 |
6 |
B |
2013 |
899 |
877 |
1041 |
7 |
B |
2014 |
837 |
914 |
1009 |
8 |
B |
2015 |
824 |
806 |
1002 |
9 |
B |
2016 |
873 |
942 |
1013 |
# 用stack和unstack实现上述方法
sensors.set_index(['Group', 'Property']) \
.stack() \
.unstack('Property') \
.rename_axis(['Group', 'Year'], axis='index') \
.rename_axis(None, axis='columns') \
.reset_index()
|
Group |
Year |
Flow |
Pressure |
Temperature |
0 |
A |
2012 |
819 |
928 |
1026 |
1 |
A |
2013 |
806 |
873 |
1038 |
2 |
A |
2014 |
861 |
814 |
1009 |
3 |
A |
2015 |
882 |
973 |
1036 |
4 |
A |
2016 |
856 |
870 |
1042 |
5 |
B |
2012 |
887 |
817 |
1008 |
6 |
B |
2013 |
899 |
877 |
1041 |
7 |
B |
2014 |
837 |
914 |
1009 |
8 |
B |
2015 |
824 |
806 |
1002 |
9 |
B |
2016 |
873 |
942 |
1013 |