和上一篇聚类算法类似,本文主要分享一个包装好的使用sklearn中常用分类器分类结果对比的代码,方便初步比较模型的时候使用,程序运行结果如下图所示。先给出代码,后续再做详细分析。
可执行Python3代码:
# 分类问题可运行代码
from collections import Counter
from sklearn.datasets import load_iris
from imblearn.combine import SMOTEENN
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import threading
import warnings
import numpy as np
import time
warnings.filterwarnings("ignore")
model_list = [
("LR", LogisticRegression()),
("DT", DecisionTreeClassifier()),
("NB", GaussianNB()),
("RF", RandomForestClassifier()),
("GBDT", GradientBoostingClassifier())
]
iris = load_iris()
X, y = iris.data, iris.target
# 上采样
x_smo, y_smo = SMOTE(random_state=0).fit_sample(X, y)
time_, acc_ = [], []
for model in model_list:
print("Now ", model[0], end=" ")
pipe_lr = Pipeline([('st', MinMaxScaler()),
('pca', PCA(n_components=0.99)),
('clf', model[1])
])
t0 = time.time()
y_pred = cross_val_predict(pipe_lr, x_smo, y_smo, cv=5)
t1 = time.time()
report = classification_report(y_smo, y_pred)
print("({:.2f} s)".format(t1 - t0))
print(report)
time_.append(t1 - t0)
acc_.append(float(report.split()[20]))
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(18, 7)
ax1.bar(np.arange(len(time_)), time_)
ax1.set_title("Time costed for each classifier (s)")
ax1.set_xticks(range(0,len(time_), 1))
ax1.set_xticklabels([model[0] for model in model_list])
ax1.set_ylabel("Time(s)")
ax2.bar(np.arange(len(time_)), acc_)
ax2.set_title("Accuracy for each classifier (s)")
ax2.set_xticks(range(0,len(time_), 1))
ax2.set_xticklabels([model[0] for model in model_list])
ax2.set_ylabel("Accuracy")
plt.suptitle("Results on different classifiers", fontweight="bold", fontsize=15)
plt.show()
(完)