版权声明:https://blog.csdn.net/thfyshz版权所有 https://blog.csdn.net/thfyshz/article/details/83692129
必要库的导入:
import pandas as pd
import numpy as np
import functools
1 Idioms(习语)
df = pd.DataFrame(
...: {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
1.1 if-then(满足条件就)
#满足条件的就赋值,能改变原dataframe
df.loc[df.BBB>25, 'CCC'] = 0; df
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 0
3 7 40 0
df.loc[df.AAA>5, ['BBB', 'CCC']] = -1; df
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 -1 -1
3 7 -1 -1
#设定改变值的标志,True和False
df_mask = pd.DataFrame({'AAA' : [True] * 4, 'BBB' : [False] * 4,'CCC' : [True,False] * 2})
df.where(df_mask,-1000)
AAA BBB CCC
0 4 -1000 100
1 5 -1000 -1000
2 6 -1000 -1
3 7 -1000 -1000
df = pd.DataFrame(
...: {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
#以另一列是否满足条件为标志设定这一列的值
df['logic'] = np.where(df['AAA'] > 5,'high','low'); df
AAA BBB CCC logic
0 4 10 100 low
1 5 20 50 low
2 6 30 -30 high
3 7 40 -50 high
1.2 Splitting(切片)
df = pd.DataFrame(
...: {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
#按条件切片,不用改变原dataframe
dflow = df[df.AAA <= 5]; dflow
AAA BBB CCC
0 4 10 100
1 5 20 50
dfhigh = df[df.AAA > 5]; dfhigh
AAA BBB CCC
2 6 30 -30
3 7 40 -50
1.3 Building Criteria(由个人制定选择数据的标准)
df = pd.DataFrame(
....: {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
#用&和|代表“和”、“或”制定条件
newseries = df.loc[(df['BBB'] < 25) & (df['CCC'] >= -40), 'AAA']; newseries
0 4
1 5
Name: AAA, dtype: int64
df.loc[(df['BBB'] > 25) | (df['CCC'] >= 75), 'AAA'] = 0.1; df
AAA BBB CCC
0 0.1 10 100
1 5.0 20 50
2 0.1 30 -30
3 0.1 40 -50
df
AAA BBB CCC
0 0.1 10 100
1 5.0 20 50
2 0.1 30 -30
3 0.1 40 -50
#argsort的用法:按大小排序并返回对应索引值
aValue = 43.0
df.loc[(df.CCC-aValue).abs().argsort()]
AAA BBB CCC
1 5.0 20 50
0 0.1 10 100
2 0.1 30 -30
3 0.1 40 -50
df = pd.DataFrame(
....: {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df
AAA BBB CCC
0 4 10 100
1 5 20 50
2 6 30 -30
3 7 40 -50
#挑选出满足以下所有条件的行:
Crit1 = df.AAA <= 5.5
Crit2 = df.BBB == 10.0
Crit3 = df.CCC > -40.0
CritList = [Crit1,Crit2,Crit3]
AllCrit = functools.reduce(lambda x,y: x & y, CritList)
df[AllCrit]
AAA BBB CCC
0 4 10 100