Python 数据分析 之numpy的nan
含有nan时,若要进行求均值等操作,先将nan替换为改列(行)的均值
代码如下
import numpy as np
t1 = np.arange(12).reshape(3, 4).astype('float')
t1[1, 2:] = np.nan
def fill_ndarray(t1):
for i in range(t1.shape[1]):
tmp_col = t1[:, i] # 当前的一列
nan_num = np.count_nonzero(tmp_col != tmp_col) # 当前一列不为nan的array
if nan_num != 0: # 说明这一列有nan
tmp_not_nan_clo = tmp_col[tmp_col == tmp_col] # 当前列中不是nan的array
# 选中当前为nan的位置,赋其值为不为nan元素的均值
tmp_col[np.isnan(tmp_col)] = tmp_not_nan_clo.mean()
return t1
print(t1)
'''
[[ 0. 1. 2. 3.]
[ 4. 5. nan nan]
[ 8. 9. 10. 11.]]
'''
print(fill_ndarray(t1))
'''
[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 10. 11.]]
'''