1、knn predication iris

 1、导入iris数据集

from sklearn.datasets import load_iris

iris_dataset = load_iris()

print("key of iris_dataset: \n{}".format(iris_dataset.keys()))
'''
key of iris_dataset: 
  dict_keys(['feature_names', 'target', 'data', 'target_names', 'DESCR', 'filename'])

'''
print(iris_dataset['target_names'])
'''
array(['setosa', 'versicolor', 'virginica'],
      dtype='<U10')
'''

2、划分数据集 

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(
iris_dataset['data'],iris_dataset['target'],random_state=0)

print(X_train.shape,X_test.shape)
# (112, 4) (38, 4)

print(y_train.shape,y_test.shape)
#(112,) (38,)

3、观察数据集

import pandas as pd

iris_dataframe = pd.DataFrame(X_train,columns=iris_dataset.feature_names)

print(iris_dataframe.head())
'''
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.9               3.0                4.2               1.5
1                5.8               2.6                4.0               1.2
2                6.8               3.0                5.5               2.1
3                4.7               3.2                1.3               0.2
4                6.9               3.1                5.1               2.3
'''
print(iris_dataframe.tail())
'''
     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
107                4.9               3.1                1.5               0.1
108                6.3               2.9                5.6               1.8
109                5.8               2.7                4.1               1.0
110                7.7               3.8                6.7               2.2
111                4.6               3.2                1.4               0.2
'''

 4、构建KNN预测模型

from sklearn.neighbors import KNeighborsClassifier

knn=KNeighborsClassifier()

knn.fit(X_train,y_train)
'''
Out[38]: 
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform')
'''
prediction = knn.predict(X_test)

print(prediction)
#[2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0 2]


print(iris_dataset['target_names'][prediction])
'''
['virginica' 'versicolor' 'setosa' 'virginica' 'setosa' 'virginica'
 'setosa' 'versicolor' 'versicolor' 'versicolor' 'virginica' 'versicolor'
 'versicolor' 'versicolor' 'versicolor' 'setosa' 'versicolor' 'versicolor'
 'setosa' 'setosa' 'virginica' 'versicolor' 'setosa' 'setosa' 'virginica'
 'setosa' 'setosa' 'versicolor' 'versicolor' 'setosa' 'virginica'
 'versicolor' 'setosa' 'virginica' 'virginica' 'versicolor' 'setosa'
 'virginica']
'''
print("test set score:{:.2f}".format(knn.score(X_test,prediction)))
#test set score:1.00

import numpy as np
print("test set score:{:.2f}".format(np.mean(np.mean(prediction == y_test))))
#test set score:0.97

猜你喜欢

转载自blog.csdn.net/TU_JCN/article/details/86512166