本文章目的主要是在《机器学习实战》这本书与其实践代码的基础上,对其原理和特殊函数进行解释,并给出对应超链接。
第二章 第一个 感兴趣男人分类器(实现k近邻算法)
from numpy import * import operator import numpy as np def createDataSet(): group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ['A','A','B','B'] return group, labels def classify0(inx,dataSet,labels,k): dataSetSize=dataSet.shape[0] # numpy函数:[1]shape用法 https://jingyan.baidu.com/article/a24b33cd5c90b319fe002b9e.html diffMat=tile(inx,(dataSetSize,1))-dataSet #【python系列】numpy中的tile函数 http://blog.csdn.net/ksearch/article/details/21388985 sqDiffMat=diffMat**2 # **两个乘号就是乘方,比如2**4,结果就是2的4次方,结果是16 这里是全部元素分别乘方 sqDistances=sqDiffMat.sum(axis=1) # python中的sum函数.sum(axis=1) http://www.cnblogs.com/yyxayz/p/4033736.html distances=sqDistances**0.5 sortedDistIndcies=distances.argsort() #浅述python中argsort()函数的用法 http://www.cnblogs.com/yyxf1413/p/6253995.html classCount={} #python中,花括号,中括号,小括号的区别在哪里 https://zhidao.baidu.com/question/484920124.html for i in range(k): #详细记录python的range()函数用法 http://www.cnblogs.com/buro79xxd/archive/2011/05/23/2054493.html voteIlabel=labels[sortedDistIndcies[i]] classCount[voteIlabel]=classCount.get(voteIlabel,0)+1 #Python 字典(Dictionary) get()方法 http://www.runoob.com/python/att-dictionary-get.html sortedClassCount=sorted(classCount.items(), key=operator.itemgetter(1), reverse=True) #python中的operator.itemgetter函数 http://www.cnblogs.com/zhoufankui/p/6274172.html #Python 字典items返回列表,iteritems返回迭代器 http://www.iplaypy.com/jinjie/items-iteritems.html #【Python】 sorted函数 http://www.cnblogs.com/sysu-blackbear/p/3283993.html return sortedClassCount[0][0] def file2matrix(filename): fr=open(filename) arrayOLines =fr.readlines() #读所有行 numberOfLines=len(arrayOLines) returnMat=np.zeros((numberOfLines,3)) classLabelVector=[] index=0 for line in arrayOLines: line=line.strip() #去回车符 listFromLine=line.split('\t') returnMat[index,:]=listFromLine[0:3] #取前3成特征矩阵 classLabelVector.append(int(listFromLine[-1])) #取最后一列元素 index+=1 return returnMat,classLabelVector def autoNorm(dataset): minVals=dataset.min(0) maxVals=dataset.max(0) #python numpy中数组.min() http://blog.csdn.net/qq_18433441/article/details/54743271 ranges=maxVals-minVals normDataSet=np.zeros(shape(dataset)) m=dataset.shape[0] normDataSet=dataset-tile(minVals,(m,1)) normDataSet=normDataSet/tile(ranges,(m,1)) return normDataSet,ranges,minVals def classifyPerson(): resultList=['not in all','in small doses', 'in large doses'] percentTats=float(input("percentage of time spent playing video games?")) ffMiles=float(input("frequent fliter miles earned per year?")) iceCream=float(input("liters of ice cream consumed per year?")) datingDataMat,datingLabels=file2matrix('C:\\Users\卢雨辰\Desktop\course resource·\machinelearninginaction\Ch02\datingTestSet2.txt') normMat,ranges,minVals=autoNorm(datingDataMat) inArr=np.array([ffMiles,percentTats,iceCream]) Result=classify0((inArr-minVals)/ranges,normMat,datingLabels,3) print("You will probably like this person:"+resultList[Result-1]) print(normMat[0:10])