# Combine,后一个对象补齐前一个对象
# Series
s1 = Series([2,np.nan,4,np.nan], index=['A','B','C','D'])
s1
Out[29]:
A 2.0
B NaN
C 4.0
D NaN
dtype: float64
s2 = Series([1,2,3,4], index=['A','B','C','D'])
s2
Out[31]:
A 1
B 2
C 3
D 4
dtype: int64
# s1中没有的值被s2补齐了
s1.combine_first(s2)
Out[32]:
A 2.0
B 2.0
C 4.0
D 4.0
dtype: float64
# DataFrame,和Series类似
df1 = DataFrame({'X':[1,np.nan,3,np.nan], 'Y':[5,np.nan,7,np.nan], 'Z':[9,np.nan,11,np.nan]})
df1
Out[36]:
X Y Z
0 1.0 5.0 9.0
1 NaN NaN NaN
2 3.0 7.0 11.0
3 NaN NaN NaN
df2 = DataFrame({'Z':[np.nan,10,np.nan,12], 'A':[1,2,3,4]})
df2
Out[38]:
A Z
0 1 NaN
1 2 10.0
2 3 NaN
3 4 12.0
df1.combine_first(df2)
Out[39]:
A X Y Z
0 1.0 1.0 5.0 9.0
1 2.0 NaN NaN 10.0
2 3.0 3.0 7.0 11.0
3 4.0 NaN NaN 12.0
import pandas as pd
from numpy import NaN
data1 = [{'a': '1', 'b': NaN}, {'a': NaN, 'b': '2'}]
data2 = [{'a': '2', 'b': '3'}, {'a': '4', 'b': NaN}]
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
# 用df2的数据填补df1的缺失值
df3 = df1.combine_first(df2)
print(df1)
print("######")
print(df2)
print("######")
print(df3)
a b
0 1 NaN
1 NaN 2
######
a b
0 2 3
1 4 NaN
######
a b
0 1 3
1 4 2