import pandas as pd
import numpy as np
df = pd. DataFrame( { 'key1' : [ 'a' , 'a' , 'b' , 'b' , 'a' ] ,
'key2' : [ 'one' , 'two' , 'one' , 'two' , 'one' ] ,
'data1' : np. random. randn( 5 ) ,
'data2' : np. random. randn( 5 ) } )
df
key1
key2
data1
data2
0
a
one
1.364596
0.352792
1
a
two
1.685626
0.236429
2
b
one
-0.537077
-0.018004
3
b
two
1.389866
0.826195
4
a
one
0.849733
1.619383
grouped = df[ 'data1' ] . groupby( df[ 'key1' ] )
grouped
<pandas.core.groupby.groupby.SeriesGroupBy object at 0x00000261B3E2CEB8>
grouped. mean( )
key1
a 1.299985
b 0.426394
Name: data1, dtype: float64
means = df[ 'data1' ] . groupby( [ df[ 'key1' ] , df[ 'key2' ] ] ) . mean( )
means
key1 key2
a one 1.107165
two 1.685626
b one -0.537077
two 1.389866
Name: data1, dtype: float64
means. unstack( )
key2
one
two
key1
a
1.107165
1.685626
b
-0.537077
1.389866
states = np. array( [ 'Ohio' , 'California' , 'California' , 'Ohio' , 'Ohio' ] )
years = np. array( [ 2005 , 2005 , 2006 , 2005 , 2006 ] )
df[ 'data1' ] . groupby( [ states, years] ) . mean( )
California 2005 1.685626
2006 -0.537077
Ohio 2005 1.377231
2006 0.849733
Name: data1, dtype: float64
df. groupby( 'key1' ) . mean( )
data1
data2
key1
a
1.299985
0.736201
b
0.426394
0.404096
df. groupby( [ 'key1' , 'key2' ] ) . mean( )
data1
data2
key1
key2
a
one
1.107165
0.986087
two
1.685626
0.236429
b
one
-0.537077
-0.018004
two
1.389866
0.826195
df. groupby( [ 'key1' , 'key2' ] ) . size( )
key1 key2
a one 2
two 1
b one 1
two 1
dtype: int64
for name, group in df. groupby( 'key1' ) :
print ( name)
print ( group)
a
key1 key2 data1 data2
0 a one 1.364596 0.352792
1 a two 1.685626 0.236429
4 a one 0.849733 1.619383
b
key1 key2 data1 data2
2 b one -0.537077 -0.018004
3 b two 1.389866 0.826195
for ( k1, k2) , group in df. groupby( [ 'key1' , 'key2' ] ) :
print ( ( k1, k2) )
print ( group)
('a', 'one')
key1 key2 data1 data2
0 a one 1.364596 0.352792
4 a one 0.849733 1.619383
('a', 'two')
key1 key2 data1 data2
1 a two 1.685626 0.236429
('b', 'one')
key1 key2 data1 data2
2 b one -0.537077 -0.018004
('b', 'two')
key1 key2 data1 data2
3 b two 1.389866 0.826195
pieces = dict ( list ( df. groupby( 'key1' ) ) )
pieces[ 'b' ]
key1
key2
data1
data2
2
b
one
-0.537077
-0.018004
3
b
two
1.389866
0.826195
df. dtypes
key1 object
key2 object
data1 float64
data2 float64
dtype: object
grouped = df. groupby( df. dtypes, axis= 1 )
for dtype, group in grouped:
print ( dtype)
print ( group)
float64
data1 data2
0 1.364596 0.352792
1 1.685626 0.236429
2 -0.537077 -0.018004
3 1.389866 0.826195
4 0.849733 1.619383
object
key1 key2
0 a one
1 a two
2 b one
3 b two
4 a one
df[ 'data1' ] . groupby( df[ 'key1' ] )
<pandas.core.groupby.groupby.SeriesGroupBy object at 0x00000261B3E74FD0>
df. groupby( [ 'key1' , 'key2' ] ) [ [ 'data2' ] ] . mean( )
data2
key1
key2
a
one
0.986087
two
0.236429
b
one
-0.018004
two
0.826195
people = pd. DataFrame( np. random. randn( 5 , 5 ) ,
columns= [ 'a' , 'b' , 'c' , 'd' , 'e' ] ,
index= [ 'Joe' , 'Steve' , 'Wes' , 'Jim' , 'Travis' ] )
people. iloc[ 2 : 3 , [ 1 , 2 ] ] = np. nan
people
a
b
c
d
e
Joe
0.037796
1.631083
0.455609
-1.405327
0.495940
Steve
0.304393
0.326005
0.146350
0.075903
-0.263559
Wes
-0.055827
NaN
NaN
1.268622
-0.541199
Jim
2.034394
0.818811
0.333991
0.158734
1.187207
Travis
2.719235
-0.459516
-0.292250
0.158169
1.102169
mapping = { 'a' : 'red' , 'b' : 'red' , 'c' : 'blue' ,
'd' : 'blue' , 'e' : 'red' , 'f' : 'orange' }
by_column = people. groupby( mapping, axis= 1 )
by_column. sum ( )
blue
red
Joe
-0.949718
2.164819
Steve
0.222253
0.366838
Wes
1.268622
-0.597026
Jim
0.492726
4.040412
Travis
-0.134082
3.361889
map_series = pd. Series( mapping)
map_series
a red
b red
c blue
d blue
e red
f orange
dtype: object
people. groupby( map_series, axis= 1 ) . count( )
blue
red
Joe
2
3
Steve
2
3
Wes
1
2
Jim
2
3
Travis
2
3
people. groupby( len ) . sum ( )
a
b
c
d
e
3
2.016363
2.449894
0.789601
0.022029
1.141948
5
0.304393
0.326005
0.146350
0.075903
-0.263559
6
2.719235
-0.459516
-0.292250
0.158169
1.102169
key_list = [ 'one' , 'one' , 'one' , 'two' , 'two' ]
people. groupby( [ len , key_list] ) . min ( )
a
b
c
d
e
3
one
-0.055827
1.631083
0.455609
-1.405327
-0.541199
two
2.034394
0.818811
0.333991
0.158734
1.187207
5
one
0.304393
0.326005
0.146350
0.075903
-0.263559
6
two
2.719235
-0.459516
-0.292250
0.158169
1.102169
columns = pd. MultiIndex. from_arrays( [ [ 'US' , 'US' , 'US' , 'JP' , 'JP' ] ,
[ 1 , 3 , 5 , 1 , 3 ] ] ,
names= [ 'cty' , 'tenor' ] )
hier_df = pd. DataFrame( np. random. randn( 4 , 5 ) , columns= columns)
hier_df
cty
US
JP
tenor
1
3
5
1
3
0
-1.865861
0.145454
1.476689
0.798411
0.546048
1
1.887149
0.218613
0.951165
2.790860
-2.419909
2
-0.681806
-0.490238
-2.215909
0.375548
0.145144
3
0.501322
1.358101
-1.069453
1.916614
-0.018305
hier_df. groupby( level= 'cty' , axis= 1 ) . count( )
cty
JP
US
0
2
3
1
2
3
2
2
3
3
2
3
df
key1
key2
data1
data2
0
a
one
1.364596
0.352792
1
a
two
1.685626
0.236429
2
b
one
-0.537077
-0.018004
3
b
two
1.389866
0.826195
4
a
one
0.849733
1.619383
grouped = df. groupby( 'key1' )
grouped[ 'data1' ] . quantile( 0.9 )
key1
a 1.621420
b 1.197171
Name: data1, dtype: float64
def peak_to_peak ( arr) :
return arr. max ( ) - arr. min ( )
grouped. agg( peak_to_peak)
data1
data2
key1
a
0.835893
1.382954
b
1.926943
0.844199
grouped. describe( )
data1
data2
count
mean
std
min
25%
50%
75%
max
count
mean
std
min
25%
50%
75%
max
key1
a
3.0
1.299985
0.421675
0.849733
1.107165
1.364596
1.525111
1.685626
3.0
0.736201
0.767068
0.236429
0.294610
0.352792
0.986087
1.619383
b
2.0
0.426394
1.362554
-0.537077
-0.055342
0.426394
0.908130
1.389866
2.0
0.404096
0.596939
-0.018004
0.193046
0.404096
0.615145
0.826195
tips = pd. read_csv( 'examples/tips.csv' )
tips[ 'tip_pct' ] = tips[ 'tip' ] / tips[ 'total_bill' ]
tips[ : 6 ]
total_bill
tip
smoker
day
time
size
tip_pct
0
16.99
1.01
No
Sun
Dinner
2
0.059447
1
10.34
1.66
No
Sun
Dinner
3
0.160542
2
21.01
3.50
No
Sun
Dinner
3
0.166587
3
23.68
3.31
No
Sun
Dinner
2
0.139780
4
24.59
3.61
No
Sun
Dinner
4
0.146808
5
25.29
4.71
No
Sun
Dinner
4
0.186240
grouped = tips. groupby( [ 'day' , 'smoker' ] )
grouped_pct = grouped[ 'tip_pct' ]
grouped_pct. agg( 'mean' )
day smoker
Fri No 0.151650
Yes 0.174783
Sat No 0.158048
Yes 0.147906
Sun No 0.160113
Yes 0.187250
Thur No 0.160298
Yes 0.163863
Name: tip_pct, dtype: float64
grouped_pct. agg( [ 'mean' , 'std' , peak_to_peak] )
mean
std
peak_to_peak
day
smoker
Fri
No
0.151650
0.028123
0.067349
Yes
0.174783
0.051293
0.159925
Sat
No
0.158048
0.039767
0.235193
Yes
0.147906
0.061375
0.290095
Sun
No
0.160113
0.042347
0.193226
Yes
0.187250
0.154134
0.644685
Thur
No
0.160298
0.038774
0.193350
Yes
0.163863
0.039389
0.151240
grouped_pct. agg( [ ( 'foo' , 'mean' ) , ( 'bar' , np. std) ] )
foo
bar
day
smoker
Fri
No
0.151650
0.028123
Yes
0.174783
0.051293
Sat
No
0.158048
0.039767
Yes
0.147906
0.061375
Sun
No
0.160113
0.042347
Yes
0.187250
0.154134
Thur
No
0.160298
0.038774
Yes
0.163863
0.039389
functions = [ 'count' , 'mean' , 'max' ]
result = grouped[ 'tip_pct' , 'total_bill' ] . agg( functions)
result
tip_pct
total_bill
count
mean
max
count
mean
max
day
smoker
Fri
No
4
0.151650
0.187735
4
18.420000
22.75
Yes
15
0.174783
0.263480
15
16.813333
40.17
Sat
No
45
0.158048
0.291990
45
19.661778
48.33
Yes
42
0.147906
0.325733
42
21.276667
50.81
Sun
No
57
0.160113
0.252672
57
20.506667
48.17
Yes
19
0.187250
0.710345
19
24.120000
45.35
Thur
No
45
0.160298
0.266312
45
17.113111
41.19
Yes
17
0.163863
0.241255
17
19.190588
43.11
ftuples = [ ( 'Durchschnitt' , 'mean' ) , ( 'Abweichung' , np. var) ]
grouped[ 'tip_pct' , 'total_bill' ] . agg( ftuples)
tip_pct
total_bill
Durchschnitt
Abweichung
Durchschnitt
Abweichung
day
smoker
Fri
No
0.151650
0.000791
18.420000
25.596333
Yes
0.174783
0.002631
16.813333
82.562438
Sat
No
0.158048
0.001581
19.661778
79.908965
Yes
0.147906
0.003767
21.276667
101.387535
Sun
No
0.160113
0.001793
20.506667
66.099980
Yes
0.187250
0.023757
24.120000
109.046044
Thur
No
0.160298
0.001503
17.113111
59.625081
Yes
0.163863
0.001551
19.190588
69.808518
grouped. agg( { 'tip' : np. max , 'size' : 'sum' } )
grouped. agg( { 'tip_pct' : [ 'min' , 'max' , 'mean' , 'std' ] ,
'size' : 'sum' } )
tip_pct
size
min
max
mean
std
sum
day
smoker
Fri
No
0.120385
0.187735
0.151650
0.028123
9
Yes
0.103555
0.263480
0.174783
0.051293
31
Sat
No
0.056797
0.291990
0.158048
0.039767
115
Yes
0.035638
0.325733
0.147906
0.061375
104
Sun
No
0.059447
0.252672
0.160113
0.042347
167
Yes
0.065660
0.710345
0.187250
0.154134
49
Thur
No
0.072961
0.266312
0.160298
0.038774
112
Yes
0.090014
0.241255
0.163863
0.039389
40
def top ( df, n= 5 , column= 'tip_pct' ) :
return df. sort_values( by= column) [ - n: ]
top( tips, n= 6 )
total_bill
tip
smoker
day
time
size
tip_pct
109
14.31
4.00
Yes
Sat
Dinner
2
0.279525
183
23.17
6.50
Yes
Sun
Dinner
4
0.280535
232
11.61
3.39
No
Sat
Dinner
2
0.291990
67
3.07
1.00
Yes
Sat
Dinner
1
0.325733
178
9.60
4.00
Yes
Sun
Dinner
2
0.416667
172
7.25
5.15
Yes
Sun
Dinner
2
0.710345
tips. groupby( 'smoker' ) . apply ( top)
total_bill
tip
smoker
day
time
size
tip_pct
smoker
No
88
24.71
5.85
No
Thur
Lunch
2
0.236746
185
20.69
5.00
No
Sun
Dinner
5
0.241663
51
10.29
2.60
No
Sun
Dinner
2
0.252672
149
7.51
2.00
No
Thur
Lunch
2
0.266312
232
11.61
3.39
No
Sat
Dinner
2
0.291990
Yes
109
14.31
4.00
Yes
Sat
Dinner
2
0.279525
183
23.17
6.50
Yes
Sun
Dinner
4
0.280535
67
3.07
1.00
Yes
Sat
Dinner
1
0.325733
178
9.60
4.00
Yes
Sun
Dinner
2
0.416667
172
7.25
5.15
Yes
Sun
Dinner
2
0.710345
tips. groupby( [ 'smoker' , 'day' ] ) . apply ( top, n= 1 , column= 'total_bill' )
total_bill
tip
smoker
day
time
size
tip_pct
smoker
day
No
Fri
94
22.75
3.25
No
Fri
Dinner
2
0.142857
Sat
212
48.33
9.00
No
Sat
Dinner
4
0.186220
Sun
156
48.17
5.00
No
Sun
Dinner
6
0.103799
Thur
142
41.19
5.00
No
Thur
Lunch
5
0.121389
Yes
Fri
95
40.17
4.73
Yes
Fri
Dinner
4
0.117750
Sat
170
50.81
10.00
Yes
Sat
Dinner
3
0.196812
Sun
182
45.35
3.50
Yes
Sun
Dinner
3
0.077178
Thur
197
43.11
5.00
Yes
Thur
Lunch
4
0.115982
result = tips. groupby( 'smoker' ) [ 'tip_pct' ] . describe( )
result
count
mean
std
min
25%
50%
75%
max
smoker
No
151.0
0.159328
0.039910
0.056797
0.136906
0.155625
0.185014
0.291990
Yes
93.0
0.163196
0.085119
0.035638
0.106771
0.153846
0.195059
0.710345
result. unstack( 'smoker' )
smoker
count No 151.000000
Yes 93.000000
mean No 0.159328
Yes 0.163196
std No 0.039910
Yes 0.085119
min No 0.056797
Yes 0.035638
25% No 0.136906
Yes 0.106771
50% No 0.155625
Yes 0.153846
75% No 0.185014
Yes 0.195059
max No 0.291990
Yes 0.710345
dtype: float64
tips. groupby( 'smoker' , group_keys= False ) . apply ( top)
total_bill
tip
smoker
day
time
size
tip_pct
88
24.71
5.85
No
Thur
Lunch
2
0.236746
185
20.69
5.00
No
Sun
Dinner
5
0.241663
51
10.29
2.60
No
Sun
Dinner
2
0.252672
149
7.51
2.00
No
Thur
Lunch
2
0.266312
232
11.61
3.39
No
Sat
Dinner
2
0.291990
109
14.31
4.00
Yes
Sat
Dinner
2
0.279525
183
23.17
6.50
Yes
Sun
Dinner
4
0.280535
67
3.07
1.00
Yes
Sat
Dinner
1
0.325733
178
9.60
4.00
Yes
Sun
Dinner
2
0.416667
172
7.25
5.15
Yes
Sun
Dinner
2
0.710345
frame = pd. DataFrame( { 'data1' : np. random. randn( 1000 ) ,
'data2' : np. random. randn( 1000 ) } )
quartiles = pd. cut( frame. data1, 4 )
quartiles[ : 10 ]
0 (-1.415, 0.195]
1 (0.195, 1.805]
2 (-1.415, 0.195]
3 (0.195, 1.805]
4 (-1.415, 0.195]
5 (-1.415, 0.195]
6 (0.195, 1.805]
7 (-1.415, 0.195]
8 (-1.415, 0.195]
9 (-1.415, 0.195]
Name: data1, dtype: category
Categories (4, interval[float64]): [(-3.032, -1.415] < (-1.415, 0.195] < (0.195, 1.805] < (1.805, 3.414]]
def get_stats ( group) :
return { 'min' : group. min ( ) , 'max' : group. max ( ) ,
'count' : group. count( ) , 'mean' : group. mean( ) }
grouped = frame. data2. groupby( quartiles)
grouped. apply ( get_stats) . unstack( )
count
max
mean
min
data1
(-3.032, -1.415]
75.0
2.055051
0.217488
-2.419102
(-1.415, 0.195]
511.0
3.134327
0.002096
-2.709974
(0.195, 1.805]
379.0
3.146306
-0.014438
-2.561375
(1.805, 3.414]
35.0
2.209609
0.293813
-1.865174
grouping = pd. qcut( frame. data1, 10 , labels= False )
grouped = frame. data2. groupby( grouping)
grouped. apply ( get_stats) . unstack( )
count
max
mean
min
data1
0
100.0
2.055051
0.128959
-2.419102
1
100.0
3.134327
-0.106525
-2.131247
2
100.0
2.178958
0.041848
-1.964788
3
100.0
2.443715
0.176331
-2.587572
4
100.0
2.231973
0.016023
-2.709974
5
100.0
2.432079
-0.082424
-2.189209
6
100.0
3.146306
0.072272
-2.282556
7
100.0
1.831667
-0.011231
-2.538154
8
100.0
2.060099
-0.060095
-2.561375
9
100.0
2.209609
0.046783
-2.049609
s = pd. Series( np. random. randn( 6 ) )
s[ : : 2 ] = np. nan
s
0 NaN
1 -0.162668
2 NaN
3 0.273627
4 NaN
5 0.345890
dtype: float64
s. fillna( s. mean( ) )
0 0.152283
1 -0.162668
2 0.152283
3 0.273627
4 0.152283
5 0.345890
dtype: float64
states = [ 'Ohio' , 'New York' , 'Vermont' , 'Florida' ,
'Oregon' , 'Nevada' , 'California' , 'Idaho' ]
group_key = [ 'East' ] * 4 + [ 'West' ] * 4
data = pd. Series( np. random. randn( 8 ) , index= states)
data
Ohio -1.141165
New York 0.077290
Vermont 1.178992
Florida 1.413521
Oregon -0.840100
Nevada -0.262705
California 0.410535
Idaho -2.736216
dtype: float64
data[ [ 'Vermont' , 'Nevada' , 'Idaho' ] ] = np. nan
data
Ohio -1.141165
New York 0.077290
Vermont NaN
Florida 1.413521
Oregon -0.840100
Nevada NaN
California 0.410535
Idaho NaN
dtype: float64
data. groupby( group_key) . mean( )
East 0.116549
West -0.214783
dtype: float64
fill_mean = lambda g: g. fillna( g. mean( ) )
data. groupby( group_key) . apply ( fill_mean)
Ohio -1.141165
New York 0.077290
Vermont 0.116549
Florida 1.413521
Oregon -0.840100
Nevada -0.214783
California 0.410535
Idaho -0.214783
dtype: float64
suits = [ 'H' , 'S' , 'C' , 'D' ]
card_val = ( list ( range ( 1 , 11 ) ) + [ 10 ] * 3 ) * 4
base_names = [ 'A' ] + list ( range ( 2 , 11 ) ) + [ 'J' , 'K' , 'Q' ]
cards = [ ]
for suit in [ 'H' , 'S' , 'C' , 'D' ] :
cards. extend( str ( num) + suit for num in base_names)
deck = pd. Series( card_val, index= cards)
deck[ : 13 ]
AH 1
2H 2
3H 3
4H 4
5H 5
6H 6
7H 7
8H 8
9H 9
10H 10
JH 10
KH 10
QH 10
dtype: int64
def draw ( deck, n= 5 ) :
return deck. sample( n)
draw( deck)
QH 10
AS 1
2S 2
4C 4
5C 5
dtype: int64
get_suit = lambda card: card[ - 1 ]
deck. groupby( get_suit) . apply ( draw, n= 2 )
C 9C 9
2C 2
D QD 10
10D 10
H 5H 5
KH 10
S 3S 3
2S 2
dtype: int64
df = pd. DataFrame( { 'category' : [ 'a' , 'a' , 'a' , 'a' ,
'b' , 'b' , 'b' , 'b' ] ,
'data' : np. random. randn( 8 ) ,
'weights' : np. random. rand( 8 ) } )
df
category
data
weights
0
a
-0.326019
0.107540
1
a
1.333957
0.220552
2
a
-0.055586
0.189389
3
a
-0.947162
0.598072
4
b
-0.764437
0.505189
5
b
1.204008
0.921015
6
b
-1.048168
0.660169
7
b
-1.495828
0.525462
grouped = df. groupby( 'category' )
get_wavg = lambda g: np. average( g[ 'data' ] , weights= g[ 'weights' ] )
grouped. apply ( get_wavg)
category
a -0.284928
b -0.289163
dtype: float64
close_px = pd. read_csv( 'examples/stock_px_2.csv' , parse_dates= True ,
index_col= 0 )
close_px. info( )
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2214 entries, 2003-01-02 to 2011-10-14
Data columns (total 4 columns):
AAPL 2214 non-null float64
MSFT 2214 non-null float64
XOM 2214 non-null float64
SPX 2214 non-null float64
dtypes: float64(4)
memory usage: 86.5 KB
close_px[ - 4 : ]
AAPL
MSFT
XOM
SPX
2011-10-11
400.29
27.00
76.27
1195.54
2011-10-12
402.19
26.96
77.16
1207.25
2011-10-13
408.43
27.18
76.37
1203.66
2011-10-14
422.00
27.27
78.11
1224.58
spx_corr = lambda x: x. corrwith( x[ 'SPX' ] )
rets = close_px. pct_change( ) . dropna( )
get_year = lambda x: x. year
by_year = rets. groupby( get_year)
by_year. apply ( spx_corr)
AAPL
MSFT
XOM
SPX
2003
0.541124
0.745174
0.661265
1.0
2004
0.374283
0.588531
0.557742
1.0
2005
0.467540
0.562374
0.631010
1.0
2006
0.428267
0.406126
0.518514
1.0
2007
0.508118
0.658770
0.786264
1.0
2008
0.681434
0.804626
0.828303
1.0
2009
0.707103
0.654902
0.797921
1.0
2010
0.710105
0.730118
0.839057
1.0
2011
0.691931
0.800996
0.859975
1.0
tips. pivot_table( index= [ 'day' , 'smoker' ] )
size
tip
tip_pct
total_bill
day
smoker
Fri
No
2.250000
2.812500
0.151650
18.420000
Yes
2.066667
2.714000
0.174783
16.813333
Sat
No
2.555556
3.102889
0.158048
19.661778
Yes
2.476190
2.875476
0.147906
21.276667
Sun
No
2.929825
3.167895
0.160113
20.506667
Yes
2.578947
3.516842
0.187250
24.120000
Thur
No
2.488889
2.673778
0.160298
17.113111
Yes
2.352941
3.030000
0.163863
19.190588
tips. pivot_table( [ 'tip_pct' , 'size' ] , index= [ 'time' , 'day' ] ,
columns= 'smoker' )
size
tip_pct
smoker
No
Yes
No
Yes
time
day
Dinner
Fri
2.000000
2.222222
0.139622
0.165347
Sat
2.555556
2.476190
0.158048
0.147906
Sun
2.929825
2.578947
0.160113
0.187250
Thur
2.000000
NaN
0.159744
NaN
Lunch
Fri
3.000000
1.833333
0.187735
0.188937
Thur
2.500000
2.352941
0.160311
0.163863
tips. pivot_table( [ 'tip_pct' , 'size' ] , index= [ 'time' , 'day' ] ,
columns= 'smoker' , margins= True )
size
tip_pct
smoker
No
Yes
All
No
Yes
All
time
day
Dinner
Fri
2.000000
2.222222
2.166667
0.139622
0.165347
0.158916
Sat
2.555556
2.476190
2.517241
0.158048
0.147906
0.153152
Sun
2.929825
2.578947
2.842105
0.160113
0.187250
0.166897
Thur
2.000000
NaN
2.000000
0.159744
NaN
0.159744
Lunch
Fri
3.000000
1.833333
2.000000
0.187735
0.188937
0.188765
Thur
2.500000
2.352941
2.459016
0.160311
0.163863
0.161301
All
2.668874
2.408602
2.569672
0.159328
0.163196
0.160803
tips. pivot_table( 'tip_pct' , index= [ 'time' , 'smoker' ] , columns= 'day' ,
aggfunc= len , margins= True )
day
Fri
Sat
Sun
Thur
All
time
smoker
Dinner
No
3.0
45.0
57.0
1.0
106.0
Yes
9.0
42.0
19.0
NaN
70.0
Lunch
No
1.0
NaN
NaN
44.0
45.0
Yes
6.0
NaN
NaN
17.0
23.0
All
19.0
87.0
76.0
62.0
244.0
tips. pivot_table( 'tip_pct' , index= [ 'time' , 'size' , 'smoker' ] ,
columns= 'day' , aggfunc= 'mean' , fill_value= 0 )
day
Fri
Sat
Sun
Thur
time
size
smoker
Dinner
1
No
0.000000
0.137931
0.000000
0.000000
Yes
0.000000
0.325733
0.000000
0.000000
2
No
0.139622
0.162705
0.168859
0.159744
Yes
0.171297
0.148668
0.207893
0.000000
3
No
0.000000
0.154661
0.152663
0.000000
Yes
0.000000
0.144995
0.152660
0.000000
4
No
0.000000
0.150096
0.148143
0.000000
Yes
0.117750
0.124515
0.193370
0.000000
5
No
0.000000
0.000000
0.206928
0.000000
Yes
0.000000
0.106572
0.065660
0.000000
6
No
0.000000
0.000000
0.103799
0.000000
Lunch
1
No
0.000000
0.000000
0.000000
0.181728
Yes
0.223776
0.000000
0.000000
0.000000
2
No
0.000000
0.000000
0.000000
0.166005
Yes
0.181969
0.000000
0.000000
0.158843
3
No
0.187735
0.000000
0.000000
0.084246
Yes
0.000000
0.000000
0.000000
0.204952
4
No
0.000000
0.000000
0.000000
0.138919
Yes
0.000000
0.000000
0.000000
0.155410
5
No
0.000000
0.000000
0.000000
0.121389
6
No
0.000000
0.000000
0.000000
0.173706