模型的评价
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score, roc_curve, precision_score
from matplotlib import pyplot as plt
# 定义评估函数
def model_metrics(clf, X_train, X_test, y_train, y_test):
# 预测
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)
y_train_pred_proba = clf.predict_proba(X_train)[:, 1]
y_test_pred_proba = clf.predict_proba(X_test)[:, 1]
# 评估
# 准确性
print('准确性:')
print('Train:{:.4f}'.format(accuracy_score(y_train, y_train_pred)))
print('Test:{:.4f}'.format(accuracy_score(y_test, y_test_pred)))
#精确性
print('精确性:')
print("Train:{:.4f}".format(precision_score(y_train, y_train_pred)))
print("Test: {:.4f}".format(precision_score(y_train, y_train_pred)))
# 召回率
print('召回率:')
print('Train:{:.4f}'.format(recall_score(y_train, y_train_pred)))
print('Test:{:.4f}'.format(recall_score(y_test, y_test_pred)))
# f1_score
print('f1_score:')
print('Train:{:.4f}'.format(f1_score(y_train, y_train_pred)))
print('Test:{:.4f}'.format(f1_score(y_test, y_test_pred)))
# roc_auc
print('roc_auc:')
print('Train:{:.4f}'.format(roc_auc_score(y_train, y_train_pred_proba)))
print('Test:{:.4f}'.format(roc_auc_score(y_test, y_test_pred_proba)))
# 描绘 ROC 曲线
fpr_tr, tpr_tr, _ = roc_curve(y_train, y_train_pred_proba)
fpr_te, tpr_te, _ = roc_curve(y_test, y_test_pred_proba)
# KS
print('KS:')
print('Train:{:.4f}'.format(max(abs((fpr_tr - tpr_tr)))))
print('Test:{:.4f}'.format(max(abs((fpr_te - tpr_te)))))
# 绘图
plt.plot(fpr_tr, tpr_tr, 'r-',
label="Train:AUC: {:.3f} KS:{:.3f}".format(roc_auc_score(y_train, y_train_pred_proba),
max(abs((fpr_tr - tpr_tr)))))
plt.plot(fpr_te, tpr_te, 'g-',
label="Test:AUC: {:.3f} KS:{:.3f}".format(roc_auc_score(y_test, y_test_pred_proba),
max(abs((fpr_tr - tpr_tr)))))
plt.plot([0, 1], [0, 1], 'd--')
plt.legend(loc='best')
plt.title("ROC curse")
plt.show()
- 每个模型的准确率,精确率,召回率,F1-score,AUC值,ROC曲线
模型 | 准确率 | 精确率 | 召回率 | F1-score | AUC值 | ROC曲线 |
---|---|---|---|---|---|---|
逻辑回归 | train 0.7493 test 0.7477 | train 0.0000 test 0.0000 | train 0.0000 test 0.0000 | train 0.0000 test 0.0000 | train 0.5954 test 0.5838 | |
决策树 | train 1.0000 test 0.6847 | train 1.0000 test 1.000 | train 1.0000 test 0.4.95 | train 1.0000 test 0.3952 | train1.0000 test 0.5933 | |
SVM | train 1.0000 test 0.7477 | train 0.000 test 0.000 | train 0.000 test 0.000 | train 0.000 test 0.000 | 0.5954 test 0.5838 | |
随机森林 | train 0.9862 test 0.7694 | train 0.9937 test 0.9937 | train 0.9508 test 0.2841 | train 0.9718 test 0.3827 | 0.9988 test 0.7374 | |
XGBoost | train 0.8602 test 0.7933 | train 0.8625 test 0.8625 | train 0.5264 test 0.3928 | train 0.6538 test 0.4887 | 0.9222 test 0.7788 |