from sklearn.feature_extraction import DictVectorizer import csv from sklearn import preprocessing from sklearn import tree from sklearn.externals.six import StringIO allElectronicsData = open("F:\\tree.csv","r") reader = csv.reader(allElectronicsData) headers = next(reader) #第一行数据内容 # print(headers) featureList = [] labelList = [] for row in reader: labelList.append(row[len(row)-1]) rowDict = {} for i in range(1,len(row)-1): rowDict[headers[i]] = row[i] featureList.append(rowDict) # print(labelList) # print(featureList) vec = DictVectorizer() dummyX = vec.fit_transform(featureList).toarray() print("dummyX:\n"+str(dummyX)) # print(vec.get_feature_names()) # print("labelList:\n" + str(labelList)) lb = preprocessing.LabelBinarizer() dummyY = lb.fit_transform(labelList) print("dummyY:\n" + str(dummyY)) #训练分类器 clf = tree.DecisionTreeClassifier(criterion = 'entropy') clf = clf.fit(dummyX,dummyY) print("clf:\n" + str(clf)) with open("allElectronicsInformationGainOri.dot","w") as f: f = tree.export_graphviz(clf,feature_names = vec.get_feature_names(),out_file=f) oneRowX = dummyX[0,:] print("oneRowX:\n" + str(oneRowX)) newRow = oneRowX newRow[0] = 1 newRow[2] = 0 print("oneRowX:\n" + str(oneRowX)) predictedY = clf.predict(oneRowX) print("predictedY:" + str(predictedY))
机器学习----决策树(二)
猜你喜欢
转载自blog.csdn.net/zhourunan123/article/details/80064542
今日推荐
周排行