#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2018/8/16 15:21 # @Author : limingyu # @Site : # @File : Test_KNN.py # @Software: PyCharm import numpy as np import operator def createDataSet(): #造数据和标签 group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ['A','A','B','B'] return group,labels #inX:待分类的点,最近邻:k def classify0(inX,dataSet,labels,k): dataSetSize = dataSet.shape[0]#样本个数 print(dataSetSize) #4 #复制矩阵inX大小为:dataSetSize行1列,将inX与数据特征相减的结果存下来 diffMat = np.tile(inX,(dataSetSize,1)) - dataSet #求平方 sqDiffMat = diffMat ** 2 #相加 sqDistances = sqDiffMat.sum(axis=1) #求根号 distance = sqDistances ** 0.5 print("distance",distance ) #distance [2.75862284 2.82842712 4.24264069 4.17252921] #排序 sortedDistIndicies = distance.argsort() print("sortedDistIndicies",sortedDistIndicies) #sortedDistIndicies [0 1 3 2] classCount = {} #定义类别 #遍历 for i in range(k): #取对应位置的标签值A或B voteLabel = labels[sortedDistIndicies[i]] print("voteLabel",voteLabel) #voteLabel A voteLabel A voteLabel B print(classCount.get(voteLabel,0)) #取出类别的个数,开始没有的话默认为0 classCount[voteLabel] = classCount.get(voteLabel,0) + 1 #取字典中某个类别的个数 print(classCount[voteLabel]) #对字典中统计的类别个数排序,倒序(由大到小) sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True) print(sortedClassCount) #属于每个类别的个数[('A', 2), ('B', 1)] return sortedClassCount[0][0] if __name__ == '__main__': group,labels = createDataSet() test = classify0([3,3],group,labels,3) print(test) #A
机器学习之KNN
猜你喜欢
转载自blog.csdn.net/mingyuli/article/details/81747483
今日推荐
周排行