import pandas as pd import numpy as np from sklearn.cross_validation import train_test_split from sklearn.feature_extraction import DictVectorizer from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import classification_report tantic=pd.read_csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt") # print(tantic.head()) X=tantic[['age','sex','pclass']] Y=tantic['survived'] # print(X.describe()) X['age'].fillna(X['age'].mean(),inplace=True) X_train,X_text,Y_train,Y_text=train_test_split(X,Y,test_size=0.25,random_state=33) vec=DictVectorizer(sparse=False) X_train=vec.fit_transform(X_train.to_dict(orient='record')) X_text=vec.transform(X_text.to_dict(orient='record')) jueceshu=DecisionTreeClassifier() jueceshu.fit(X_train,Y_train) Y_predict=jueceshu.predict(X_text) print(classification_report(Y_text,Y_predict,target_names=['died','survived'])) print(jueceshu.score(X_text,Y_text))
利用决策树预测泰坦尼克生还人数
猜你喜欢
转载自www.cnblogs.com/yiduobaozhiblog1/p/8948065.html
今日推荐
周排行