作业题目如下:
代码如下:
1.库的引用
from sklearn import datasets,cross_validation from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn import metrics
2.交叉检验划分
dataset = datasets.make_classification(n_samples=2000, n_features=15) data, target = dataset[0], dataset[1] kf = cross_validation.KFold(len(target), n_folds=10, shuffle=True)
3.机器学习部分代码
num = 1 for train_index, test_index in kf: data_train, target_train = data[train_index], target[train_index] data_test, target_test = data[test_index], target[test_index] print("Test:",num) num = num + 1 print("GaussianNB:") clf = GaussianNB() clf.fit(data_train, target_train) pred = clf.predict(data_test) print("Accuracy:", metrics.accuracy_score(target_test, pred)) print("F1-score:", metrics.f1_score(target_test, pred)) print("AUC ROC:",metrics.roc_auc_score(target_test, pred)) print("SVC:") clf = SVC(C=1e-01, kernel='rbf', gamma=0.1) clf.fit(data_train, target_train) pred = clf.predict(data_test) print("Accuracy:", metrics.accuracy_score(target_test, pred)) print("F1-score:", metrics.f1_score(target_test, pred)) print("AUC ROC:",metrics.roc_auc_score(target_test, pred)) print("RandomForestClassifier:") clf = RandomForestClassifier(n_estimators=100) clf.fit(data_train, target_train) pred = clf.predict(data_test) print("Accuracy:", metrics.accuracy_score(target_test, pred)) print("F1-score:", metrics.f1_score(target_test, pred)) print("AUC ROC:",metrics.roc_auc_score(target_test, pred)) print()
输出结果:
通过实验可知,二分类问题中,随机森林算法的效果比朴素贝叶斯和向量机都更好