-
lightgbm建模
-
import lightgbm as lgbm
-
from sklearn import metrics
-
from sklearn import model_selection
-
np.random.seed(42)
-
model = lgbm.LGBMRegressor(
-
objective='regression',
-
max_depth=5,
-
num_leaves=25,
-
learning_rate=0.007,
-
n_estimators=1000,
-
min_child_samples=80,
-
subsample=0.8,
-
colsample_bytree=1,
-
reg_alpha=0,
-
reg_lambda=0,
-
random_state=np.random.randint(10e6)
-
)
-
n_splits = 6
-
cv = model_selection.KFold(n_splits=n_splits, shuffle=True, random_state=42)
-
val_scores = [0] * n_splits
-
sub = submission['id'].to_frame()
-
sub['visitors'] = 0
-
feature_importances = pd.DataFrame(index=X_train.columns)
-
for i, (fit_idx, val_idx) in enumerate(cv.split(X_train, y_train)):
-
X_fit = X_train.iloc[fit_idx]
-
y_fit = y_train.iloc[fit_idx]
-
X_val = X_train.iloc[val_idx]
-
y_val = y_train.iloc[val_idx]
-
model.fit(
-
X_fit,
-
y_fit,
-
eval_set=[(X_fit, y_fit), (X_val, y_val)],
-
eval_names=('fit', 'val'),
-
eval_metric='l2',
-
early_stopping_rounds=200,
-
feature_name=X_fit.columns.tolist(),
-
verbose=False
-
)
-
val_scores[i] = np.sqrt(model.best_score_['val']['l2'])
-
sub['visitors'] += model.predict(X_test, num_iteration=model.best_iteration_)
-
feature_importances[i] = model.feature_importances_
-
print('Fold {} RMSLE: {:.5f}'.format(i+1, val_scores[i]))
-
sub['visitors'] /= n_splits
-
sub['visitors'] = np.expm1(sub['visitors'])
-
val_mean = np.mean(val_scores)
-
val_std = np.std(val_scores)
下面我是用LightGBM的cv函数进行演示:
params = { 'boosting_type': 'gbdt', 'objective': 'regression', 'learning_rate': 0.1, 'num_leaves': 50, 'max_depth': 6, 'subsample': 0.8, 'colsample_bytree': 0.8, }
data_train = lgb.Dataset(df_train, y_train, silent=True) cv_results = lgb.cv( params, data_train, num_boost_round=1000, nfold=5, stratified=False, shuffle=True, metrics='rmse', early_stopping_rounds=50, verbose_eval=50, show_stdv=True, seed=0) print('best n_estimators:', len(cv_results['rmse-mean'])) print('best cv score:', cv_results['rmse-mean'][-1])