lightGBM 回归模型代码
个人认为 K 折交叉验证是通过 K 次平均结果,用来评价测试模型或者该组参数的效果好坏,通过 K折交叉验证之后找出最优的模型和参数,最后预测还是重新训练预测一次。
lightGBM K折验证效果
from sklearn.model_selection import KFold
import lightgbm as lgb
import numpy as np
import pandas as pd
train_data2 = pd.read_csv('./data/zhengqi_train.txt', sep='\t')
test_data2 = pd.read_csv('./data/zhengqi_test.txt', sep='\t')
train_data2_f = train_data2[test_data2.columns].values
train_data2_target = train_data2['target'].values
# 5 折交叉验证
Folds = 5
kf = KFold(n_splits=5)
# 记录训练和预测 MSE
MSE_DICT = {
'train_mse': [], 'test_mse': []}
# 线下训练预测
for i, (train_index, test_index) in enumerate(kf.split(train_data2_f)):
# LGB 树模型
lgb_reg = lgb.LGBMRegressor(
learning_rate=0.01,
max_depth=-1,
n_estimators=5000,
boosting_type='gbdt',
random_state=2019,
objective='regression'
)
# 切分训练集和预测集
X_train_KFold, X_test_KFold = train_data2_f[train_index], train_data2_f[test_index]
y_train_KFold, y_test_KFold = train_data2_target[train_index], train_data2_target[test_index]
# 训练模型
lgb_reg.fit(
X=X_train_KFold,
y=y_train_KFold,
eval_set=[(X_train_KFold, y_train_KFold), (X_test_KFold, y_test_KFold)],
eval_names=['Train', 'Test'],
early_stopping_rounds=200,
eval_metric='MSE',
verbose=50
)
# 训练集和测试集预测
y_train_KFold_predict = lgb_reg.predict(X_train_KFold, num_iteration=lgb_reg.best_iteration_)
y_test_KFold_predict = lgb_reg.predict(X_test_KFold, num_iteration=lgb_reg.best_iteration_)
print(f"第{
i+1}折 训练和预测 训练MSE 预测MSE")
train_mse = mean_squared_error(y_train_KFold_predict, y_train_KFold)
print('------\n', '训练MSE\n', train_mse, '\n------')
test_mse = mean_squared_error(y_test_KFold_predict, y_test_KFold)
print('------\n', '预测MSE\n', test_mse, '\n------')
MSE_DICT['train_mse'].append(train_mse)
MSE_DICT['test_mse'].append(test_mse)
print('------\n', '训练MSE\n', MSE_DICT['train_mse'], '\n', np.mean(MSE_DICT['train_mse']), '\n------')
print('------\n', '预测MSE\n', MSE_DICT['test_mse'], '\n', np.mean(MSE_DICT['test_mse']), '\n------')
模型保存与调用
import lightgbm as lgb
import joblib
# 模型训练
lgb_reg = lgb.LGBMRegressor(objective='regression', num_leaves=31, learning_rate=0.05, n_estimators=20)
lgb_reg.fit(train_data, train_target, eval_set=[test_data, test_target], eval_metric='l1', early_stopping_rounds=5)
# 保存模型
joblib.dump(lgb_reg, 'model.pkl')
# 模型加载
lgb_reg = joblib.load('model.pkl')
# 模型预测
test_predict = lgb_reg.predict(test_data, num_iteration=lgb_reg.best_iteration_)