pandas切片上的运算和赋值比较

x =pd.Series(np.arange(0,10))

y =pd.Series(np.arange(100,110))

df =pd.DataFrame({'x':x, 'y':y})

x y

0 0 100

1 1 101

2 2 102

3 3 103

4 4 104

5 5 105

6 6 106

7 7 107

8 8 108

9 9 109

运算	变量[]	变量.loc[] !!! 包含末尾 !!!	df.loc[] !!! 包含末尾 !!!	df[]
取整列	df.x 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9		df.loc[:,'x'] 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9	df['x'] 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9
取整行			df.loc[2:5] x y 2 2 102 3 3 103 4 4 104 5 5 105	df[2:5] x y 2 2 102 3 3 103 4 4 104 同一个[]，既可以取列，也可以取行！！！
取值	df.x[2:5] 2 2 3 3 4 4	df.x.loc[2:5] 2 2 3 3 4 4 5 5	df.loc[2:5, 'x'] 同左	df[2:5, 'x'] TypeError: unhashable type: 'slice'
加法（index一致）	df.x[2:5] + df.y[2:5] 2 104 3 106 4 108	df.x.loc[2:5] + df.y.loc[2:5] 2 104 3 106 4 108 5 110	df.loc[2:5, 'x'] + df.loc[2:5, 'y'] 同左
纵向 concat	pd.concat([df.x[2:5], df.y[3:6]]) 2 2 3 3 4 4 3 103 4 104 5 105	pd.concat([df.x.loc[2:5], df.y.loc[3:6]]) 2 2 3 3 4 4 5 5 3 103 4 104 5 105 6 106	pd.concat([df.loc[2:5, 'x'], df.loc[3:6, 'y']]) 同左
横向 concat	pd.concat([df.x[2:5], df.y[3:6]], axis=1) x y 2 2.0 NaN 3 3.0 103.0 4 4.0 104.0 5 NaN 105.0	pd.concat([df.x.loc[2:5], df.y.loc[3:6]], axis=1) x y 2 2.0 NaN 3 3.0 103.0 4 4.0 104.0 5 5.0 105.0 6 NaN 106.0	pd.concat([df.loc[2:5, 'x'], df.loc[3:6, 'y']], axis=1) 同左
加法（index不一致）	df.x[2:5] + df.y[3:6] 2 NaN 3 106.0 4 108.0 5 NaN	df.x.loc[2:5] + df.y.loc[3:6] 2 NaN 3 106.0 4 108.0 5 110.0 6 NaN	df.loc[2:5, 'x'] + df.loc[3:6, 'y'] 同左
加法（index不一致，取values）	df.x[2:5].values + df.y[3:6].values array([105, 107, 109])	df.x.loc[2:5].values + df.y.loc[3:6].values array([105, 107, 109, 111])	df.loc[2:5, 'x'].values + df.loc[3:6, 'y'].values 同左
加法（reset index）	df.x[2:5].reset_index(drop=True) + df.y[3:6].reset_index(drop=True) 0 105 1 107 2 109	df.x.loc[2:5].reset_index(drop=True) + df.y.loc[3:6].reset_index(drop=True) 0 105 1 107 2 109 3 111	df.loc[2:5, 'x'].reset_index(drop=True) + df.loc[3:6, 'y'].reset_index(drop=True) 同左
赋值（index不一致）	df1 = df.copy() df1.x[2:5] = df1.y[3:6] df1 x y 0 0 100 1 1 101 2 103 102 3 104 103 4 105 104 5 5 105 6 6 106 7 7 107 8 8 108 9 9 109 ！！！不会对应索引！！！	df1 = df.copy() df1.x.loc[2:5] = df1.y.loc[3:6] df1 x y 0 0.0 100 1 1.0 101 2 NaN 102 3 103.0 103 4 104.0 104 5 105.0 105 6 6.0 106 7 7.0 107 8 8.0 108 9 9.0 109	df1 = df.copy() df1.loc[2:5,'x'] = df1.loc[3:6,'y'] df1 同左
赋值（index不一致，取values）	df1 = df.copy() df1.x[2:5] = df1.y[3:6].values df1 x y 0 0 100 1 1 101 2 103 102 3 104 103 4 105 104 5 5 105 6 6 106 7 7 107 8 8 108 9 9 109	df1 = df.copy() df1.x.loc[2:5] = df1.y.loc[3:6].values df1 x y 0 0 100 1 1 101 2 103 102 3 104 103 4 105 104 5 106 105 6 6 106 7 7 107 8 8 108 9 9 109	df1 = df.copy() df1.loc[2:5,'x'] = df1.loc[3:6,'y'].values df1 同左
赋值（index不一致， reset index）	df1 = df.copy() df1.x[2:5] = df1.y[3:6].reset_index(drop=True) df1 x y 0 0 100 1 1 101 2 103 102 3 104 103 4 105 104 5 5 105 6 6 106 7 7 107 8 8 108 9 9 109	df1 = df.copy() df1.x.loc[2:5] = df1.y.loc[3:6].reset_index(drop=True) df1 x y 0 0.0 100 1 1.0 101 2 105.0 102 3 106.0 103 4 NaN 104 5 NaN 105 6 6.0 106 7 7.0 107 8 8.0 108 9 9.0 109 ！！！对应到0开头的索引！！！	df1 = df.copy() df1.loc[2:5,'x'] = df1.loc[3:6,'y'].reset_index(drop=True) df1 同左

运算

变量[]

变量.loc[]

!!! 包含末尾 !!!

df.loc[]

!!! 包含末尾 !!!

df[]

取整列

df.x

0 0

1 1

2 2

3 3

4 4

5 5

6 6

7 7

8 8

9 9

df.loc[:,'x']

0 0

1 1

2 2

3 3

4 4

5 5

6 6

7 7

8 8

9 9

df['x']

0    0

1    1

2    2

3    3

4    4

5    5

6    6

7    7

8    8

9    9

取整行

df.loc[2:5]

x y

2 2 102

3 3 103

4 4 104

5 5 105

df[2:5]

x y

2 2 102

3 3 103

4 4 104

同一个[]，既可以取列，也可以取行！！！

取值

df.x[2:5]

2 2

3 3

4 4

df.x.loc[2:5]

2 2

3 3

4 4

5 5

df.loc[2:5, 'x']

同左

df[2:5, 'x']

TypeError: unhashable type: 'slice'

加法

（index一致）

df.x[2:5] + df.y[2:5]

2 104

3 106

4 108

df.x.loc[2:5] + df.y.loc[2:5]

2 104

3 106

4 108

5 110

df.loc[2:5, 'x'] + df.loc[2:5, 'y']

同左

纵向

concat

pd.concat([df.x[2:5], df.y[3:6]])

2 2

3 3

4 4

3 103

4 104

5 105

pd.concat([df.x.loc[2:5], df.y.loc[3:6]])

2 2

3 3

4 4

5 5

3 103

4 104

5 105

6 106

pd.concat([df.loc[2:5, 'x'], df.loc[3:6, 'y']])

同左

横向

concat

pd.concat([df.x[2:5], df.y[3:6]], axis=1)

     x      y

2  2.0    NaN

3  3.0  103.0

4  4.0  104.0

5  NaN  105.0

pd.concat([df.x.loc[2:5], df.y.loc[3:6]], axis=1)

x y

2 2.0 NaN

3 3.0 103.0

4 4.0 104.0

5 5.0 105.0

6 NaN 106.0

pd.concat([df.loc[2:5, 'x'], df.loc[3:6, 'y']], axis=1)

同左

加法

（index不一致）

df.x[2:5] + df.y[3:6]

2 NaN

3 106.0

4 108.0

5 NaN

df.x.loc[2:5] + df.y.loc[3:6]

2 NaN

3 106.0

4 108.0

5 110.0

6 NaN

df.loc[2:5, 'x'] + df.loc[3:6, 'y']

同左

加法

（index不一致，

取values）

df.x[2:5].values + df.y[3:6].values

array([105, 107, 109])

df.x.loc[2:5].values + df.y.loc[3:6].values

array([105, 107, 109, 111])

df.loc[2:5, 'x'].values + df.loc[3:6, 'y'].values

同左

加法

（reset index）

df.x[2:5].reset_index(drop=True) + df.y[3:6].reset_index(drop=True)

0 105

1 107

2 109

df.x.loc[2:5].reset_index(drop=True) + df.y.loc[3:6].reset_index(drop=True)

0 105

1 107

2 109

3 111

df.loc[2:5, 'x'].reset_index(drop=True) + df.loc[3:6, 'y'].reset_index(drop=True)

同左

赋值

（index不一致）

df1 = df.copy()

df1.x[2:5] = df1.y[3:6]

df1

x y

0 0 100

1 1 101

2 103 102

3 104 103

4 105 104

5 5 105

6 6 106

7 7 107

8 8 108

9 9 109

！！！不会对应索引！！！

df1 = df.copy()

df1.x.loc[2:5] = df1.y.loc[3:6]

df1

x y

0 0.0 100

1 1.0 101

2 NaN 102

3 103.0 103

4 104.0 104

5 105.0 105

6 6.0 106

7 7.0 107

8 8.0 108

9 9.0 109

df1 = df.copy()

df1.loc[2:5,'x'] = df1.loc[3:6,'y']

df1

同左

赋值

（index不一致，

取values）

df1 = df.copy()

df1.x[2:5] = df1.y[3:6].values

df1

x y

0 0 100

1 1 101

2 103 102

3 104 103

4 105 104

5 5 105

6 6 106

7 7 107

8 8 108

9 9 109

df1 = df.copy()

df1.x.loc[2:5] = df1.y.loc[3:6].values

df1

x y

0 0 100

1 1 101

2 103 102

3 104 103

4 105 104

5 106 105

6 6 106

7 7 107

8 8 108

9 9 109

df1 = df.copy()

df1.loc[2:5,'x'] = df1.loc[3:6,'y'].values

df1

同左

赋值

（index不一致，

reset index）

df1 = df.copy()

df1.x[2:5] = df1.y[3:6].reset_index(drop=True)

df1

x y

0 0 100

1 1 101

2 103 102

3 104 103

4 105 104

5 5 105

6 6 106

7 7 107

8 8 108

9 9 109

df1 = df.copy()

df1.x.loc[2:5] = df1.y.loc[3:6].reset_index(drop=True)

df1

x y

0 0.0 100

1 1.0 101

2 105.0 102

3 106.0 103

4 NaN 104

5 NaN 105

6 6.0 106

7 7.0 107

8 8.0 108

9 9.0 109

！！！对应到0开头的索引！！！

df1 = df.copy()

df1.loc[2:5,'x'] = df1.loc[3:6,'y'].reset_index(drop=True)

df1

同左

pandas切片上的运算和赋值比较

猜你喜欢