环境 jupyter notebook tensorflow2.1.0 python3.7.5
import pathlib
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib. pyplot as plt
from tensorflow import keras as keras
from tensorflow. keras import layers as layers
dataset_path = keras. utils. get_file( "auto-mpg.data" , "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data" )
print ( dataset_path)
/ Users/ xxxx/ . keras/ datasets/ auto- mpg. data
column_names = [ 'MPG' , 'Cylinders' , 'Displacement' , 'Horsepower' , 'Weight' ,
'Acceleration' , 'Model Year' , 'Origin' ]
raw_dataset= pd. read_csv( dataset_path,
names= column_names,
na_values= '?' ,
comment= '\t' ,
sep= ' ' ,
skipinitialspace= True )
dataset = raw_dataset. copy( )
dataset. tail( )
MPG
Cylinders
Displacement
Horsepower
Weight
Acceleration
Model Year
Origin
393
27.0
4
140.0
86.0
2790.0
15.6
82
1
394
44.0
4
97.0
52.0
2130.0
24.6
82
2
395
32.0
4
135.0
84.0
2295.0
11.6
82
1
396
28.0
4
120.0
79.0
2625.0
18.6
82
1
397
31.0
4
119.0
82.0
2720.0
19.4
82
1
dataset. isna( ) . sum ( )
MPG 0
Cylinders 0
Displacement 0
Horsepower 6
Weight 0
Acceleration 0
Model Year 0
Origin 0
dtype: int64
dataset = dataset. dropna( )
origin= dataset. pop( 'Origin' )
dataset[ 'USA' ] = ( origin== 1 ) * 1.0
dataset[ 'Europe' ] = ( origin== 2 ) * 1.0
dataset[ 'Japan' ] = ( origin== 3 ) * 1.0
dataset. tail( )
MPG
Cylinders
Displacement
Horsepower
Weight
Acceleration
Model Year
USA
Europe
Japan
393
27.0
4
140.0
86.0
2790.0
15.6
82
1.0
0.0
0.0
394
44.0
4
97.0
52.0
2130.0
24.6
82
0.0
1.0
0.0
395
32.0
4
135.0
84.0
2295.0
11.6
82
1.0
0.0
0.0
396
28.0
4
120.0
79.0
2625.0
18.6
82
1.0
0.0
0.0
397
31.0
4
119.0
82.0
2720.0
19.4
82
1.0
0.0
0.0
train_dataset = dataset. sample( frac= 0.8 , random_state= 0 )
test_dataset = dataset. drop( train_dataset. index)
sns. pairplot( train_dataset[ [ "MPG" , "Cylinders" , "Displacement" , "Weight" ] ] , diag_kind= 'kde' )
< seaborn. axisgrid. PairGrid at 0x10cd98110 >
train_stats = train_dataset. describe( )
train_stats. pop( "MPG" )
count 314.000000
mean 23.310510
std 7.728652
min 10.000000
25 % 17.000000
50 % 22.000000
75 % 28.950000
max 46.600000
Name: MPG, dtype: float64
train_stats = train_stats. transpose( )
train_stats
count
mean
std
min
25%
50%
75%
max
Cylinders
314.0
5.477707
1.699788
3.0
4.00
4.0
8.00
8.0
Displacement
314.0
195.318471
104.331589
68.0
105.50
151.0
265.75
455.0
Horsepower
314.0
104.869427
38.096214
46.0
76.25
94.5
128.00
225.0
Weight
314.0
2990.251592
843.898596
1649.0
2256.50
2822.5
3608.00
5140.0
Acceleration
314.0
15.559236
2.789230
8.0
13.80
15.5
17.20
24.8
Model Year
314.0
75.898089
3.675642
70.0
73.00
76.0
79.00
82.0
USA
314.0
0.624204
0.485101
0.0
0.00
1.0
1.00
1.0
Europe
314.0
0.178344
0.383413
0.0
0.00
0.0
0.00
1.0
Japan
314.0
0.197452
0.398712
0.0
0.00
0.0
0.00
1.0
train_labels = train_dataset. pop( 'MPG' )
test_labels = test_dataset. pop( 'MPG' )
def norm ( x) :
return ( x - train_stats[ 'mean' ] ) / train_stats[ 'std' ]
normed_train_data = norm( train_dataset)
normed_test_data = norm( test_dataset)
model = keras. models. Sequential( )
model. add( layers. Dense( 64 , activation= 'relu' , input_shape= [ len ( train_dataset. keys( ) ) ] ) )
model. add( layers. Dense( 64 , activation= 'relu' ) )
model. add( layers. Dense( 1 ) )
model. compile ( loss= 'mse' ,
optimizer= tf. keras. optimizers. RMSprop( 0.001 ) ,
metrics= [ 'mae' , 'mse' ] )
model. summary( )
Model: "sequential"
_________________________________________________________________
Layer ( type ) Output Shape Param
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == =
dense ( Dense) ( None , 64 ) 640
_________________________________________________________________
dense_1 ( Dense) ( None , 64 ) 4160
_________________________________________________________________
dense_2 ( Dense) ( None , 1 ) 65
== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == =
Total params: 4 , 865
Trainable params: 4 , 865
Non- trainable params: 0
_________________________________________________________________
example_batch = normed_train_data[ : 10 ]
example_result = model. predict( example_batch)
print ( example_result)
[[ 0.06187941]
[ 0.16284567]
[ 0.19416149]
[ 0.3226478 ]
[ 0.09883147]
[ 0.00343724]
[ 0.13330291]
[ 0.62984717]
[-0.05348695]
[ 0.44078857]]
class PrintDot ( keras. callbacks. Callback) :
def on_epoch_end ( self, epoch, logs= None ) :
if ( epoch% 100 == 0 ) :
print ( ' ' )
print ( '.' , end= ' ' )
history= model. fit( normed_train_data,
train_labels,
epochs= 1000 ,
validation_split= 0.2 ,
verbose= 0 ,
callbacks= [ PrintDot( ) ] )
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
hist= pd. DataFrame( history. history)
hist[ 'epoch' ] = history. epoch
hist. tail( )
loss
mae
mse
val_loss
val_mae
val_mse
epoch
995
2.448885
0.975710
2.448885
9.030066
2.268713
9.030066
995
996
2.376843
0.999163
2.376843
9.096817
2.273271
9.096817
996
997
2.383884
0.992754
2.383883
9.657296
2.356696
9.657296
997
998
2.504148
1.021134
2.504148
9.152325
2.318949
9.152325
998
999
2.421463
0.947287
2.421463
9.146635
2.284075
9.146635
999
def plot_history ( history) :
hist = pd. DataFrame( history. history)
hist[ 'epoch' ] = history. epoch
plt. figure( )
plt. xlabel( 'Epoch' )
plt. ylabel( 'Mean Abs Error [MPG]' )
plt. plot( hist[ 'epoch' ] , hist[ 'mae' ] ,
label= 'Train Error' )
plt. plot( hist[ 'epoch' ] , hist[ 'val_mae' ] ,
label = 'Val Error' )
plt. ylim( [ 0 , 5 ] )
plt. legend( )
plt. figure( )
plt. xlabel( 'Epoch' )
plt. ylabel( 'Mean Square Error [$MPG^2$]' )
plt. plot( hist[ 'epoch' ] , hist[ 'mse' ] ,
label= 'Train Error' )
plt. plot( hist[ 'epoch' ] , hist[ 'val_mse' ] ,
label = 'Val Error' )
plt. ylim( [ 0 , 20 ] )
plt. legend( )
plt. show( )
plot_history( history)
train_dataset. keys( )
Index( [ 'Cylinders' , 'Displacement' , 'Horsepower' , 'Weight' , 'Acceleration' ,
'Model Year' , 'USA' , 'Europe' , 'Japan' ] ,
dtype= 'object' )
model = keras. models. Sequential( )
model. add( layers. Dense( 64 , activation= 'relu' , input_shape= [ len ( train_dataset. keys( ) ) ] ) )
model. add( layers. Dense( 64 , activation= 'relu' ) )
model. add( layers. Dense( 1 ) )
model. compile ( loss= 'mse' ,
optimizer= tf. keras. optimizers. RMSprop( 0.001 ) ,
metrics= [ 'mae' , 'mse' ] )
history= model. fit( normed_train_data,
train_labels,
epochs= 1000 ,
validation_split= 0.2 ,
verbose= 0 ,
callbacks= [ keras. callbacks. EarlyStopping( monitor= 'val_loss' , patience= 10 ) ,
PrintDot( ) ] )
. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
plot_history( history)
test_predictions = model. predict( normed_test_data) . flatten( )
plt. scatter( test_labels, test_predictions)
plt. xlabel( 'True values [MPG]' )
plt. ylabel( "Predictions [MPG]" )
plt. axis( 'equal' )
plt. axis( 'square' )
plt. xlim( [ 0 , plt. xlim( ) [ 1 ] ] )
plt. ylim( [ 0 , plt. ylim( ) [ 1 ] ] )
_ = plt. plot( [ - 100 , 100 ] , [ - 100 , 100 ] )
error = test_predictions- test_labels
plt. hist( error, bins= 25 )
plt. xlabel( 'Prediction Error [MPG]' )
_ = plt. ylabel( "Count" )