来自于 《python机器学习基础教程》[德] Andreas C. Muller [美] Sarah Guido 著 张亮(hysic)译 的 第一章。
from sklearn.datasets import load_iris
iris_dataset = load_iris()
In [9]:
print( (iris_dataset.keys()))
print((iris_dataset['target']))
In [26]:
print((iris_dataset['feature_names']))
In [23]:
print( iris_dataset['data'].shape)
In [33]:
from sklearn.model_selection import train_test_split
X_train,X_test, y_train,y_test = train_test_split( iris_dataset['data'] , iris_dataset['target'] , random_state=0)
In [46]:
import pandas as pd
import mglearn
iris_dataframe = pd.DataFrame(X_train, columns=iris_dataset.feature_names)
grr = pd.scatter_matrix( iris_dataframe , c =y_train , figsize=(15,15) , marker='o' , hist_kwds={'bins':20}, s =60 ,alpha=.8,cmap=mglearn.cm3)
In [47]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
In [48]:
knn.fit(X_train , y_train)
Out[48]:
In [49]:
xnew = np.array([[5,2.9,1,0.2]])
pred = knn.predict(xnew)
print( pred )
print( iris_dataset["target_names"][pred])
y_pred=knn.predict(X_test)
print( np.mean(y_pred==y_test) , ' is the successful rate' )