1、导入iris数据集
from sklearn.datasets import load_iris
iris_dataset = load_iris()
print("key of iris_dataset: \n{}".format(iris_dataset.keys()))
'''
key of iris_dataset:
dict_keys(['feature_names', 'target', 'data', 'target_names', 'DESCR', 'filename'])
'''
print(iris_dataset['target_names'])
'''
array(['setosa', 'versicolor', 'virginica'],
dtype='<U10')
'''
2、划分数据集
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(
iris_dataset['data'],iris_dataset['target'],random_state=0)
print(X_train.shape,X_test.shape)
# (112, 4) (38, 4)
print(y_train.shape,y_test.shape)
#(112,) (38,)
3、观察数据集
import pandas as pd
iris_dataframe = pd.DataFrame(X_train,columns=iris_dataset.feature_names)
print(iris_dataframe.head())
'''
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
0 5.9 3.0 4.2 1.5
1 5.8 2.6 4.0 1.2
2 6.8 3.0 5.5 2.1
3 4.7 3.2 1.3 0.2
4 6.9 3.1 5.1 2.3
'''
print(iris_dataframe.tail())
'''
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
107 4.9 3.1 1.5 0.1
108 6.3 2.9 5.6 1.8
109 5.8 2.7 4.1 1.0
110 7.7 3.8 6.7 2.2
111 4.6 3.2 1.4 0.2
'''
4、构建KNN预测模型
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier()
knn.fit(X_train,y_train)
'''
Out[38]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=5, p=2,
weights='uniform')
'''
prediction = knn.predict(X_test)
print(prediction)
#[2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0 2]
print(iris_dataset['target_names'][prediction])
'''
['virginica' 'versicolor' 'setosa' 'virginica' 'setosa' 'virginica'
'setosa' 'versicolor' 'versicolor' 'versicolor' 'virginica' 'versicolor'
'versicolor' 'versicolor' 'versicolor' 'setosa' 'versicolor' 'versicolor'
'setosa' 'setosa' 'virginica' 'versicolor' 'setosa' 'setosa' 'virginica'
'setosa' 'setosa' 'versicolor' 'versicolor' 'setosa' 'virginica'
'versicolor' 'setosa' 'virginica' 'virginica' 'versicolor' 'setosa'
'virginica']
'''
print("test set score:{:.2f}".format(knn.score(X_test,prediction)))
#test set score:1.00
import numpy as np
print("test set score:{:.2f}".format(np.mean(np.mean(prediction == y_test))))
#test set score:0.97