python自编KNN算法

KNN算法主要有两个步骤,计算样本之间的距离和找邻居判定自己的类别。计算距离之前先将特征进行标准化处理。

from sklearn.datasets import load_iris
from sklearn.model_selection  import train_test_split
from sklearn
import numpy as np
import pandas as pd

#从sklearn.datasets导入鸢尾花数据集
dataset = load_iris()
data = dataset['data']
target = dataset['target']

#将data转化成DataFrame的格式,进行数值标准化
data = pd.DataFrame(data)
data = data.transform(lambda x:(x-min(x)) / (max(x) -min(x)))

#训练集和测试集的划分
traindata,testdata,traintarget,testtarget = train_test_split(data,target,test_size = 0.2)    #test_size 为测试集的比例

#KNN算法

#####################计算距离
#计算距离,用的是欧式距离
dist = np.zeros([traindata.shape[0],testdata.shape[0]])
for i in range(traindata.shape[0]):
    for j in range(testdata.shape[0]):
        dist[i,j] = sum((traindata.iloc[i] - testdata.iloc[j]) ** 2)**0.5


#####################找邻居,判定自己的所属类别
k = 6    #邻居个数
max_index = []
for j in range(testdata.shape[0]):
    list1 = dist[:,j]
    list2 = sorted(list1)
    max_num = list2[:k]
    max_index.append([y for y,i in enumerate(list1) if i in max_num])

#分类
pre = [pd.value_counts(traintarget[max_index[i]]).idxmax() for i in range(testdata.shape[0])]

#查看结果
print(pre == testtarget)

猜你喜欢

转载自blog.csdn.net/spartanfuk/article/details/82151397