# -*- coding: utf-8 -*- import pandas as pd from pandas import Series,DataFrame import numpy as np ''' 数据分类 ''' data=Series(np.random.randn(10),index=[['a','a','a','b','b','b','c','c','d','d'],[1,2,3,1,2,3,1,2,6,2]]) print(data) # a 1 -0.130178 # 2 0.137005 # 3 1.941608 # b 1 0.768033 # 2 0.739661 # 3 -0.227032 # c 1 -0.106498 # 2 0.158814 # d 6 -0.291577 # 2 0.170918 #获取子集 print(data['a']) # 1 -0.322706 # 2 0.474766 # 3 -1.054212 # dtype: float64 print(data['a':'c']) # a 1 0.080364 # 2 0.216486 # 3 -0.038789 # b 1 1.365482 # 2 -0.283908 # 3 1.202427 # c 1 1.054704 # 2 0.120473 print(data[['a','c']]) # a 1 1.753260 # 2 -0.169620 # 3 -0.263574 # c 1 -0.840767 # 2 -0.901693 #获取内层 print(data[:,2]) # a 0.137005 # b 0.739661 # c 0.158814 # d 0.170918 #生成表格 print(data.unstack()) # 1 2 3 6 # a -0.130178 0.137005 1.941608 NaN # b 0.768033 0.739661 -0.227032 NaN # c -0.106498 0.158814 NaN NaN # d NaN 0.170918 NaN -0.291577 #生成表格的逆运算 print(data.unstack().stack()) # a 1 -0.130178 # 2 0.137005 # 3 1.941608 # b 1 0.768033 # 2 0.739661 # 3 -0.227032 # c 1 -0.106498 # 2 0.158814 # d 2 0.170918 # 6 -0.291577 ''' dataFrame分类 ''' data=DataFrame(np.arange(12).reshape(4,3),index=[['a','a','b','c'],['1','2','1','2']],columns=[['hhb','hhb','zjx'],['man','men','man']]) print(data) # hhb zjx # man men wemen # a 1 0 1 2 # 2 3 4 5 # b 1 6 7 8 # c 2 9 10 11 #给各层目录指定名称 data.index.names=['str','num'] data.columns.names=['name','sex'] print(data) # name hhb zjx # sex man men wemen # str num # a 1 0 1 2 # 2 3 4 5 # b 1 6 7 8 # c 2 9 10 11 #进行筛查获取 print(data['hhb']) # sex man men # str num # a 1 0 1 # 2 3 4 # b 1 6 7 # c 2 9 10 print(data.T) # str a b c # num 1 2 1 2 # name sex # hhb man 0 3 6 9 # men 1 4 7 10 # zjx wemen 2 5 8 11 ''' 重新指定分级顺序,有时需求会调换分级顺序 ''' print(data.swaplevel('str','num')) # sex man men wemen # num str # 1 a 0 1 2 # 2 a 3 4 5 # 1 b 6 7 8 # 2 c 9 10 11 #层次重排进行汇总 print(data.sortlevel(1)) # name hhb zjx # sex man men wemen # str num # a 1 0 1 2 # b 1 6 7 8 # a 2 3 4 5 # c 2 9 10 11 print(data.swaplevel(0,1).sortlevel(0)) # name hhb zjx # sex man men wemen # num str # 1 a 0 1 2 # b 6 7 8 # 2 a 3 4 5 # c 9 10 11 ''' 统计 ''' print(data.sum(level='num')) # name hhb zjx # sex man men man # num # 1 6 8 10 # 2 12 14 16 print(data.sum(level='sex',axis=1)) # sex man men # str num # a 1 2 1 # 2 8 4 # b 1 14 7 # c 2 20 10 ''' Dataframe的列 ''' frame=DataFrame({'a':range(7),'b':range(7,0,-1),'c':['one','one','one','two','two','two','two'],'d':[0,1,2,0,1,2,3]}) print(frame) # a b c d # 0 0 7 one 0 # 1 1 6 one 1 # 2 2 5 one 2 # 3 3 4 two 0 # 4 4 3 two 1 # 5 5 2 two 2 # 6 6 1 two 3 #将set_index函数将一个或多个列索引转换为行索引,并创建一个新的Dataframe data=frame.set_index(['c','d']) print(data) # a b # c d # one 0 0 7 # 1 1 6 # 2 2 5 # two 0 3 4 # 1 4 3 # 2 5 2 # 3 6 1 #保存原有的值 data2=frame.set_index(['c','d'],drop=False) print(data2) # a b c d # c d # one 0 0 7 one 0 # 1 1 6 one 1 # 2 2 5 one 2 # two 0 3 4 two 0 # 1 4 3 two 1 # 2 5 2 two 2 # 3 6 1 two 3 #还原层次化的列 frame=data.reset_index() print(frame) # c d a b # 0 one 0 0 7 # 1 one 1 1 6 # 2 one 2 2 5 # 3 two 0 3 4 # 4 two 1 4 3 # 5 two 2 5 2 # 6 two 3 6 1 # from pandas_datareader import data, wb # # #我们使用pandas_datareader获得苹果股票信息: # # import pandas_datareader as pdr # # print(pdr.get_data_yahoo('AAPL'))
python数据分析九:pandas层次化索引
猜你喜欢
转载自blog.csdn.net/qq_38788128/article/details/80695563
今日推荐
周排行