博主环境搭配
超参数选择神经网络层数与 每层节点数
sys.version_info(major=3, minor=7, micro=2, releaselevel='final', serial=0)
3.7.2 (v3.7.2:9a3ffc0492, Dec 24 2018, 02:44:43)
[Clang 6.0 (clang-600.0.57)]
matplotlib 3.0.3
numpy 1.17.2
pandas 0.25.1
sklearn 0.21.3
tensorflow 2.0.0-beta0
tensorflow.python.keras.api._v2.keras 2.2.4-tf
import warnings
warnings.filterwarnings('ignore')
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import *
print(sys.version_info)
print(sys.version)
for module in mpl,np,pd,sklearn,tf,keras:
print(module.__name__,module.__version__)
from sklearn.model_selection import train_test_split
#把所有的数据集 分成 验证 训练 测试 三部分
boston_housing= keras.datasets.boston_housing.load_data()
(x_train,y_train),(x_test,y_test)=boston_housing
x_train,x_valid,y_train,y_valid=train_test_split(x_train,y_train,random_state=7)
# print(housing_data)
#查看测试集形状
print(x_train.shape,y_train.shape)
print(x_valid.shape,y_valid.shape)
print(x_test.shape,y_test.shape)
#归一化
from sklearn.preprocessing import StandardScaler
scaler= StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_valid_scaled=scaler.transform(x_valid)
x_test_scaled=scaler.transform(x_test)
x_train.shape[1]
# RandomizedSearchCV
# 1. 转化为 sklearn 的model
# 2. 定义参数集合
# 3. 开始搜索参数
def build_model(hidden_layers=1,
layer_size=30,
learning_rate=3e-3):
model=keras.models.Sequential([
Dense(layer_size,input_shape=x_train.shape[1:],activation='selu')
])
for _ in range(hidden_layers):
model.add(Dense(layer_size,activation='selu'))
model.add(Dense(1))
#定义我们自己的 学习率 优化器
optimizer1=keras.optimizers.Adam(learning_rate)
model.compile(loss='mse',optimizer=optimizer1)
return model
# e这一步 build_model 不要加 ()
#把 tf model 转化为 sklearn model
sklearn_model=keras.wrappers.scikit_learn.KerasRegressor(
build_model)
###定义回调函数
log_dir='./search_housing_logs'
if not os.path.exists(log_dir):
os.mkdir(log_dir)
save_model_dir='./search_housing_model'
if not os.path.exists(save_model_dir):
os.mkdir(save_model_dir)
save_model_file=os.path.join(save_model_dir,'search_housing.h5')
callback1=[
keras.callbacks.TensorBoard(log_dir),
keras.callbacks.ModelCheckpoint(save_model_file,save_best_only=True),
keras.callbacks.EarlyStopping(patience=5,min_delta=1e-2)
]
# fitres=sklearn_model.fit(x_train,
# y_train,epochs=20,
# validation_data=(x_valid,
# y_valid),
# callbacks=callback1)
from scipy.stats import reciprocal
#定义搜索空间
param_distribution ={
'hidden_layers':[1,2,3,4,5],
'layer_size':np.arange(1,100),
'learning_rate':reciprocal(1e-4,1e-2)
}
from sklearn.model_selection import RandomizedSearchCV
#cross_calidation: 训练分成 n 分, 使用 n-1分 训练, 1分测试
# cv
#n_jobs=最大并行数
#保留最好的多少个 参数组合
random_search_cv= RandomizedSearchCV(
sklearn_model,param_distribution,
n_iter=10,n_jobs=1,
cv=3)
random_search_cv.fit(x_train, y_train, epochs=20,
validation_data=(x_valid,y_valid),
callbacks=callback1)
#查看做好的参数
print(random_search_cv.best_params_)
print(random_search_cv.best_score_)
print(random_search_cv.best_estimator_)
#获得最好的模型
model=random_search_cv.best_estimator_.model
model.evaluate(x_test,y_test)
{‘hidden_layers’: 3, ‘layer_size’: 32, ‘learning_rate’: 0.0038406176766716676}
-61.8116564797883
所以在本次搜索中,层数为3 每层32 初始化学习率在 0.00384左右时,
神经网络针对本测试集,效果最好