多层索引_stack_concat_append_merge
start
from pandas import DataFrame, Series
import pandas as pd
import numpy as np
声明
df = DataFrame( data= np. random. normal( 100 , scale= 20 , size= ( 4 , 3 ) ) , dtype= np. uint8, index= pd. MultiIndex. from_tuples( [ ( 'A' , '期中' ) , ( 'A' , '期末' ) , ( 'B' , '期中' ) , ( 'B' , '期末' ) ] ) , columns= [ 'yu' , 'shu' , 'ying' ] )
df = DataFrame( data= np. random. normal( 100 , scale= 20 , size= ( 20 , 3 ) ) , dtype= np. uint8, index= pd. MultiIndex. from_product( [ list ( 'ABCDEFGHIJ' ) , [ '期中' , '期末' ] ] ) , columns= [ 'yuwen' , 'math' , 'yingyu' ] , )
堆stack
df. stack( level= 0 )
A 期中 yuwen 114 math 76 yingyu 114 期末 yuwen 125 math 100 yingyu 135 B 期中 yuwen 64 math 126 yingyu 139 期末 yuwen 100 math 101 yingyu 112 C 期中 yuwen 113 math 108 yingyu 116 期末 yuwen 60 math 135 yingyu 89 …
df. unstack( level = 0 )
yuwen … yingyu A B C D E F G H I J | … | A B C D E F G H I J 期中 | 114 64 113 118 94 88 75 95 92 96 … 114 139 116 112 120 61 100 110 104 91 期末 125 100 60 96 68 122 81 114 82 92 … 135 112 89 97 98 98 149 107 72 101
concat
df1 = DataFrame( data= np. random. normal( 100 , scale= 20 , size= ( 20 , 3 ) ) , dtype= np. uint8, index= pd. MultiIndex. from_product( [ list ( 'ABCDEFGHIJ' ) , [ '期中' , '期末' ] ] ) , columns= [ 'yuwen' , 'math' , 'yingyu' ] )
df2 = DataFrame( data= np. random. normal( 100 , scale= 20 , size= ( 6 , 3 ) ) , dtype= np. uint8, index= pd. MultiIndex. from_product( [ list ( 'XYZ' ) , [ '期中' , '期末' ] ] ) , columns= [ 'yuwen' , 'math' , 'yingyu' ] )
df3 = DataFrame( data= np. random. normal( 100 , scale= 20 , size= ( 22 , 3 ) ) , dtype= np. uint8, index= pd. MultiIndex. from_product( [ list ( 'ABCDEFGHIJZ' ) , [ '期中' , '期末' ] ] ) , columns= [ 'art' , 'nusic' , 'fayu' ] , )
增加index
pd. concat( [ df1, df2] )
增加columns
pd. concat( [ df1, df3] , axis= 1 )
使用keys添加index或colums,区分数据
pd. concat( [ df1, df2] , keys= [ '一班' , '二班' ] )
pd. concat( [ df1, df3] , keys= [ 'main' , 'ext' ] , axis= 1 )
df1. unstack( level= 0 ) . stack( )
自定义结合的columns
pd. concat( [ df1, df3] , join_axes= [ df1. columns] )
pd. concat( [ df1, df2] , join_axes= [ pd. Index( [ 'yuwen' , 'math' ] ) ] )
join
pd. concat( [ df1, df3] , join= 'inner' , axis= 1 )
append
df1. append( df2)
merge
df5 = DataFrame( {
'id' : [ 1 , 3 , 5 ] ,
'salary' : [ 3000 , 4000 , 5000 ]
} )
df6 = DataFrame( {
'id' : [ 2 , 3 , 5 ] ,
'age' : [ 30 , 40 , 50 ]
} )
df5. merge( df6)
out: id salary age 0 3 4000 40 1 5 5000 50
根据相同的id项进行合并
df7 = DataFrame( {
'name' : [ 1 , 3 , 5 ] ,
'salary' : [ 3000 , 4000 , 5000 ]
} )
df8 = DataFrame( {
'name' : [ 2 , 3 , 3 ] ,
'age' : [ 30 , 40 , 50 ]
} )
df7. merge( df8)
out:name salary age 0 3 4000 40 1 3 4000 50
merge方法会自动根据合并的依赖项,进行数据之间的关联
df7 = DataFrame( {
'name' : [ 'A' , 'B' , 'C' ] ,
'prov' : [ 'bei' , 'xi' , 'sha' ] ,
'salary' : [ 3000 , 4000 , 5000 ]
} )
df8 = DataFrame( {
'name' : [ 'A' , 'B' , 'C' ] ,
'prov' : [ 'xi' , 'sha' , 'bei' ] ,
'age' : [ 30 , 40 , 50 ]
} )
df7. merge( df8, on= 'name' , suffixes= [ 'old' , 'new' ] )
因为要合并的DataFrame中有多项可以作为合并依赖
可以指定一项依赖,并重命名(suffixes加后缀)其他依赖以解决列冲突
out: name provold salary provnew age 0 A bei 3000 xi 30 1 B xi 4000 sha 40 2 C sha 5000 bei 50
df7. merge( df8, how= 'left' )
也可以使用how,
out:name prov salary age 0 A bei 3000 NaN 1 B xi 4000 NaN 2 C sha 5000 NaN
df7. merge( df8, how= 'outer' )
out:name prov salary age 0 A bei 3000.0 NaN 1 B xi 4000.0 NaN 2 C sha 5000.0 NaN 3 A xi NaN 30.0 4 B sha NaN 40.0 5 C bei NaN 50.0
df7 = DataFrame( {
'name' : [ 'A' , 'B' , 'C' ] ,
'prov' : [ 'bei' , 'xi' , 'sha' ] ,
'salary' : [ 3000 , 4000 , 5000 ]
} )
df8 = DataFrame( {
'Name' : [ 'A' , 'B' , 'C' ] ,
'age' : [ 30 , 40 , 50 ]
} )
df7. merge( df8, left_on= 'name' , right_on= 'Name' )
当两个DataFrame不存在相同columns时,可以分别指定依赖
out:name prov salary Name age 0 A bei 3000 A 30 1 B xi 4000 B 40 2 C sha 5000 C 50