创建DataFrame
import pandas as pd
import numpy as np
a = np.arange(50).reshape(10,5)
# print a
test_df = pd.DataFrame(
data=a,
index=['index0', 'index1', 'index2', 'index3', 'index4',
'index5', 'index6', 'index7', 'index8', 'index9'],
columns=['L1', 'L2', 'L3', 'L4', 'L5']
)
L1 L2 L3 L4 L5
index0 0 1 2 3 4
index1 5 6 7 8 9
index2 10 11 12 13 14
index3 15 16 17 18 19
index4 20 21 22 23 24
index5 25 26 27 28 29
index6 30 31 32 33 34
index7 35 36 37 38 39
index8 40 41 42 43 44
index9 45 46 47 48 49
将列的名称映射为值
test_df1 = pd.DataFrame({'A': [0, 1, 2,3], 'B': [ 4, 5,6,7]})
print test_df1
A B
0 0 4
1 1 5
2 2 6
3 3 7
获取元素
#获取行
print test_df.iloc[0]
print test_df.loc['index3']
L1 0
L2 1
L3 2
L4 3
L5 4
Name: index0, dtype: int32
L1 15
L2 16
L3 17
L4 18
L5 19
Name: index3, dtype: int32
#获取列
print test_df['L3']
index0 2
index1 7
index2 12
index3 17
index4 22
index5 27
index6 32
index7 37
index8 42
index9 47
Name: L3, dtype: int32
#获取某个元素
print test_df.iloc[1, 3] #8
#获取多行
print test_df.iloc[1:4]
L1 L2 L3 L4 L5
index1 5 6 7 8 9
index2 10 11 12 13 14
index3 15 16 17 18 19
#获取多列
print test_df[['L1', 'L2']]
L1 L2
index0 0 1
index1 5 6
index2 10 11
index3 15 16
index4 20 21
index5 25 26
index6 30 31
index7 35 36
index8 40 41
index9 45 46
pandas axis
#列的sum值
print test_df.sum()
L1 225
L2 235
L3 245
L4 255
L5 265
#行的sum值
print test_df.sum(axis=1)
index0 10
index1 35
index2 60
index3 85
index4 110
index5 135
index6 160
index7 185
index8 210
index9 235
#总的sum值,其中test_df.values返回一个numpy形式的数组
print test_df.values.sum() #1225