from pandas import Series,DataFrame
import pandas as pd
'''
series 是类似一维数组的对象
数组和索引组成
可以看成是一个定长的有序字典
索引不会变,进行计算时会匹配索引
'''
obj=Series([4,7,-5,3])
obj
0 4 1 7 2 -5 3 3 dtype: int64
obj.values
array([ 4, 7, -5, 3], dtype=int64)
obj.index
Index([‘a’, ‘s’, ‘d’, ‘f’], dtype=’object’)
obj=Series([4,7,-5,3],index=('a','s','d','f'))
obj
a 4 s 7 d -5 f 3 dtype: int64
obj['d']
-5
obj[obj>0]
a 4 s 7 f 3 dtype: int64
'''
DataFrame是一个表格型的数据结构
包括行索引和列索引
++++可以指定列序列
'''
data={'a':obj,'b':obj,'c':obj}
objframe=DataFrame(data)
type(data)
dict
objframe
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
a |
b |
c |
a |
4 |
4 |
4 |
s |
7 |
7 |
7 |
d |
-5 |
-5 |
-5 |
f |
3 |
3 |
3 |
objframe['a']
a 4 s 7 d -5 f 3 Name: a, dtype: int64
import numpy as np
objframe['d']=np.random.randint(0,2,size=4)
''' Series会对索引配对'''
colval=Series([1,2,3,4],index=['a','d','f','g'])
objframe['e']=colval
objframe
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
a |
b |
c |
d |
e |
a |
4 |
4 |
4 |
0 |
1.0 |
s |
7 |
7 |
7 |
1 |
NaN |
d |
-5 |
-5 |
-5 |
0 |
2.0 |
f |
3 |
3 |
3 |
0 |
3.0 |
del objframe['a']
objframe
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
b |
c |
d |
e |
a |
4 |
4 |
0 |
1.0 |
s |
7 |
7 |
1 |
NaN |
d |
-5 |
-5 |
0 |
2.0 |
f |
3 |
3 |
0 |
3.0 |
np.where(objframe>3,6,-1)
array([[ 6, 6, -1, -1], [ 6, 6, -1, -1], [-1, -1, -1, -1], [-1, -1, -1, -1]])
objcopy=objframe>8
objcopy
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
b |
c |
d |
e |
a |
False |
False |
False |
False |
s |
False |
False |
False |
False |
d |
False |
False |
False |
False |
f |
False |
False |
False |
False |
frame= DataFrame(np.arange(9).reshape(3,3),index=['a','b','c'],columns=['ohin','leis','cal'])
frame.T
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
a |
b |
c |
ohin |
0 |
3 |
6 |
leis |
1 |
4 |
7 |
cal |
2 |
5 |
8 |
frame['leis'][:2]
a 1 b 4 Name: leis, dtype: int32
'''
+++++++pd的基本功能++++++++
1,reindex重新索引
2,drop 丢弃指定值
3,索引,选取,过滤
Series索引类似数组,除了标签切片不同。
DataFrame索引 obj.ix[]是行,obj[]是列
4,算术运算和数据对齐(add,sub,div,mul)
在算术运算中加入填充值,fill_value=0等,并进行传播
DataFrame和Series之间进行运算,Series会自动广播
5,函数应用与映射
'''
frame2=frame.reindex(['a','b','c','d'],fill_value=0)
frame3=frame.reindex(list('abcde'),fill_value=0)
print(frame)
frame3
ohin leis cal a 0 1 2 b 3 4 5 c 6 7 8
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
ohin |
leis |
cal |
a |
0 |
1 |
2 |
b |
3 |
4 |
5 |
c |
6 |
7 |
8 |
d |
0 |
0 |
0 |
e |
0 |
0 |
0 |
sates =['ohin','leis','ca']
frame2.reindex(columns=sates)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
ohin |
leis |
ca |
a |
0 |
1 |
NaN |
b |
3 |
4 |
NaN |
c |
6 |
7 |
NaN |
d |
0 |
0 |
NaN |
frame3 =frame2.drop('a')
frame3
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
ohin |
leis |
cal |
b |
3 |
4 |
5 |
c |
6 |
7 |
8 |
d |
0 |
0 |
0 |
frame2.drop('leis',axis=1)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
ohin |
cal |
a |
0 |
2 |
b |
3 |
5 |
c |
6 |
8 |
d |
0 |
0 |
frame2.mean(axis=0)
ohin 2.25 leis 3.00 cal 3.75 dtype: float64
frame2.mean(axis=1)
a 1.0 b 4.0 c 7.0 d 0.0 dtype: float64
frame2.drop('d',axis=0)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
ohin |
leis |
cal |
a |
0 |
1 |
2 |
b |
3 |
4 |
5 |
c |
6 |
7 |
8 |
frame2.drop('cal',axis=1)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
ohin |
leis |
a |
0 |
1 |
b |
3 |
4 |
c |
6 |
7 |
d |
0 |
0 |
frame2.ix['a']
ohin 0 leis 1 cal 2 Name: a, dtype: int32
frame2[:1]
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
frame2[frame2['cal']>2]
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
ohin |
leis |
cal |
b |
3 |
4 |
5 |
c |
6 |
7 |
8 |
print(frame2.ix[:1,:1])
ohin a 0
frame3=DataFrame(np.arange(15).reshape((3,5)),index=list('abc'),columns=list('hello'))
frame3
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
h |
e |
l |
l |
o |
a |
0 |
1 |
2 |
3 |
4 |
b |
5 |
6 |
7 |
8 |
9 |
c |
10 |
11 |
12 |
13 |
14 |
serie=Series(frame3.ix['a'])
serie
h 0 e 1 l 2 l 3 o 4 Name: a, dtype: int32
frame3 + serie
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
h |
e |
l |
l |
o |
a |
0 |
2 |
4 |
6 |
8 |
b |
5 |
7 |
9 |
11 |
13 |
c |
10 |
12 |
14 |
16 |
18 |
serie2=Series(frame3['h'])
serie2
a 0 b 5 c 10 Name: h, dtype: int32
frame3.add(serie2 ,axis=0)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
|
h |
e |
l |
l |
o |
a |
0 |
1 |
2 |
3 |
4 |
b |
10 |
11 |
12 |
13 |
14 |
c |
20 |
21 |
22 |
23 |
24 |