模型调参
网格搜索
from sklearn.model_selection import GridSearchCV
n_fold = 5 #5折交叉验证
scoring = 'accuracy' #评价标准为准确率
- 逻辑回归
ltc_param = {'penalty':['l1', 'l2'],
'C':[0.0001, 0.001, 0.01, 0.1, 1.0]}
ltc_grid = GridSearchCV(ltc, ltc_param, cv = n_fold, scoring=scoring, n_jobs=1)
ltc_grid.fit(X_train, y_train)
print(ltc_grid.best_score_) #最优分数
print(ltc_grid.best_params_) #最优参数display(pd.DataFrame(ltc_grid.cv_results_).T)
display(pd.DataFrame(ltc_grid.cv_results_).T)
2. 决策树
dtc_param = {'max_depth':range(1,10)}
dtc_grid = GridSearchCV(dtc, dtc_param, cv=n_fold, scoring=scoring, n_jobs=-1)
dtc_grid.fit(X_train, y_train)
print(dtc_grid.best_score_)
print(dtc_grid.best_params_)
display(pd.DataFrame(dtc_grid.cv_results_).T)
- SVM
svc_param = {'C': [0.001, 0.01, 0.1]}
svc_grid = GridSearchCV(svc, svc_param, cv=n_fold, scoring=scoring, n_jobs=-1)
svc_grid.fit(X_train, y_train)
print(svc_grid.best_score_)
print(svc_grid.best_params_)
display(pd.DataFrame(svc_grid.cv_results_).T)
- 随机森林
rfc_param = {'n_estimators': [5, 15, 30, 50],
'criterion': ['gini', 'entropy']}
rfc_grid = GridSearchCV(rfc, rfc_param, cv=n_fold, scoring=scoring, n_jobs=-1)
rfc_grid.fit(X_train, y_train)
print(rfc_grid.best_score_)
print(rfc_grid.best_params_)
display(pd.DataFrame(rfc_grid.cv_results_).T)
- XGBoost
xgb_param = {'booster': ['gbtree', 'gblinear'],
'max_depth': range(3, 6)}
xgb_grid = GridSearchCV(xgb, xgb_param, cv=n_fold, scoring=scoring, n_jobs=-1)
xgb_grid.fit(X_train, y_train)
print(xgb_grid.best_score_)
print(xgb_grid.best_params_)
display(pd.DataFrame(xgb_grid.cv_results_).T)