机器学习之KNN

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/8/16 15:21
# @Author  : limingyu
# @Site    : 
# @File    : Test_KNN.py
# @Software: PyCharm
import numpy as np
import operator

def createDataSet():
    #造数据和标签
    group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels = ['A','A','B','B']
    return group,labels
#inX:待分类的点，最近邻：k
def classify0(inX,dataSet,labels,k):
    dataSetSize = dataSet.shape[0]#样本个数
    print(dataSetSize) #4
    #复制矩阵inX大小为：dataSetSize行1列，将inX与数据特征相减的结果存下来
    diffMat = np.tile(inX,(dataSetSize,1)) - dataSet
    #求平方
    sqDiffMat = diffMat ** 2
    #相加
    sqDistances = sqDiffMat.sum(axis=1)
    #求根号
    distance = sqDistances ** 0.5
    print("distance",distance ) #distance [2.75862284 2.82842712 4.24264069 4.17252921]
    #排序
    sortedDistIndicies = distance.argsort()
    print("sortedDistIndicies",sortedDistIndicies) #sortedDistIndicies [0 1 3 2]
    classCount = {}  #定义类别
    #遍历
    for i in range(k):
        #取对应位置的标签值A或B
        voteLabel = labels[sortedDistIndicies[i]]
        print("voteLabel",voteLabel) #voteLabel A voteLabel A  voteLabel B
        print(classCount.get(voteLabel,0)) #取出类别的个数，开始没有的话默认为0
        classCount[voteLabel] = classCount.get(voteLabel,0) + 1
        #取字典中某个类别的个数
        print(classCount[voteLabel])
    #对字典中统计的类别个数排序，倒序（由大到小）
    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)
    print(sortedClassCount)  #属于每个类别的个数[('A', 2), ('B', 1)]
    return sortedClassCount[0][0]


if __name__ == '__main__':
    group,labels = createDataSet()
    test = classify0([3,3],group,labels,3)
    print(test) #A
猜你喜欢