客户贷款逾期预测[4]-记录评分、绘制roc曲线

任务

       记录五个模型(逻辑回归、svm、决策树、xgboost、lightgbm)关于precision、recall score、f1 score、roc、aoc的评分表格。

实现

# -*- coding: utf-8 -*-
"""
Created on Thu Nov 15 13:02:11 2018

@author: keepi
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

#导入数据
data = pd.read_csv('data.csv',encoding='gb18030')
print("data.shape:",data.shape)
#数据处理
miss_rate = data.isnull().sum() / len(data)
#print("缺失率:",miss_rate.sort_values(ascending=False))
X_num = data.select_dtypes('number').copy()
X_num.fillna(X_num.mean(),inplace=True)
print("数值型特征的shape:",X_num.shape)
print(X_num.columns)
X_num.drop(['Unnamed: 0','status'],axis=1,inplace=True)

X_str = data.select_dtypes(exclude='number').copy()
X_str.fillna(0,inplace=True)
print("非数值型特征:",X_str.columns)
print(X_str.head())

X_dummy = pd.get_dummies(X_str['reg_preference_for_trad'])
X = pd.concat([X_num,X_dummy],axis=1,sort=False)
y = data['status']

#划分训练集、测试集
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1117)

#归一化
ss = StandardScaler()
X_train_std = ss.fit_transform(X_train)
X_test_std = ss.transform(X_test)

#逻辑回归模型
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train_std,y_train)

#svm
from sklearn.svm import SVC
svm_linear = SVC(kernel = 'linear',probability=True).fit(X_train_std,y_train)

#决策树模型
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier(max_depth=8)
dtc.fit(X_train_std,y_train)

#xgboost sklearn版
from xgboost.sklearn import XGBClassifier
xgbc = XGBClassifier()
xgbc.fit(X_train_std,y_train)

#lightgbm sklearn版
from lightgbm.sklearn import LGBMClassifier
lgb = LGBMClassifier()
lgb.fit(X_train_std,y_train)

print('all done!')


#模型评估
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, roc_curve, auc
import matplotlib.pyplot as plt

def draw_metrics(clf, X_train, X_test, y_train, y_test):
    y_train_pred = clf.predict(X_train)
    y_test_pred = clf.predict(X_test)
    
    y_train_prob = clf.predict_proba(X_train)[:,1]
    y_test_prob = clf.predict_proba(X_test)[:,1]
    
    #准确率
    print('准确率:\n')
    print('训练集: ','%.4f'%accuracy_score(y_train,y_train_pred), end=' ')
    print('测试集: ','%4f'%accuracy_score(y_test,y_test_pred),end=' ')
    
    #精准率
    print('精准率:\n')
    print('训练集: ','%.4f'%precision_score(y_train,y_train_pred),end=' ')
    print('测试集: ','%.4f'%precision_score(y_test,y_test_pred),end=' ')
    
    #召回率
    print('召回率:\n')
    print('训练集: ','%.4f'%recall_score(y_train,y_train_pred),end=' ')
    print('测试集: ','%.4f'%recall_score(y_test,y_test_pred),end=' ')
    
    #f1_score
    print('f1-score:\n')
    print('训练集: ','%.4f'%f1_score(y_train,y_train_pred),end=' ')
    print('测试集: ','%.4f'%f1_score(y_test,y_test_pred),end=' ')
    
    #auc
    print('auc:\n')
    print('训练集: ','%.4f'%roc_auc_score(y_train,y_train_prob),end=' ')
    print('测试集: ','%.4f'%roc_auc_score(y_test,y_test_prob),end=' ')
    
    #roc曲线
    fpr_train, tpr_train, thred_train = roc_curve(y_train,y_train_prob,pos_label=1)
    fpr_test, tpr_test, thred_test = roc_curve(y_test,y_test_prob,pos_label=1)
    
    label = ['Train - AUC:{:.4f}'.format(auc(fpr_train,tpr_train)),
             'Test - AUC:{:.4f}'.format(auc(fpr_test,tpr_test))]
    plt.plot(fpr_train,tpr_train)
    plt.plot(fpr_test,tpr_test)
    plt.plot([0,1],[0,1],'d--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(label, loc = 4)
    plt.title('ROC Curve')
    
    
draw_metrics(lr, X_train_std,X_test_std,y_train,y_test)
draw_metrics(svm_linear, X_train_std,X_test_std,y_train,y_test)
draw_metrics(dtc, X_train_std,X_test_std,y_train,y_test)
draw_metrics(xgbc, X_train_std,X_test_std,y_train,y_test)
draw_metrics(lgb, X_train_std,X_test_std,y_train,y_test)

评估结果

  accuracy precision recall

f1 score

roc-auc score ROC曲线

逻辑回归

训练集:0.8019

测试集:0.7772

训练集:0.7181

测试集:0.6384

训练集:0.3604

测试集:0.3763

训练集:0.4799

测试集:0.4753

训练集:0.8157

测试集:0.7775

svm线性核

训练集:  0.8019

测试集:  0.7680

训练集:  0.7790

测试集:  0.6690

训练集:  0.2645

测试集:  0.2553

训练集:  0.3949

测试集:  0.3695

训练集:  0.8131

测试集:  0.7825

决策树

训练集:  0.8954

测试集:  0.7337

训练集:  0.8756

测试集:  0.5000

训练集:  0.6667

测试集:  0.3816

训练集:  0.7570 测试集:  0.4328 训练集:  0.9060 测试集:  0.6481  
xgboost 训练集:  0.8584 测试集:  0.7842

训练集:  0.8800

测试集:  0.6651

训练集:  0.4871

测试集:  0.3816

训练集:  0.6271 测试集:  0.4849  训练集:  0.9156 测试集:  0.7855 
lightgbm 训练集:  0.9976 测试集:  0.7730 训练集:  1.0000 测试集:  0.6120 训练集:  0.9902 测试集:  0.4026 训练集:  0.9951 测试集:  0.4857 训练集:  1.0000 测试集:  0.7764 

 

                

猜你喜欢

转载自blog.csdn.net/truffle528/article/details/84310726