#学习自机器学习实战,部分更改来自自己
from numpy import*
import operator
import matplotlib
import matplotlib.pyplot as plt
def createDataset():
group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels=['A' ,'A' ,'B' ,'B']
return group,labels
def classify(inX,dataSet,labels,k):
# print(inX)
dataSetSize=dataSet.shape[0]
# print(tile(inX,(dataSetSize)))
#print((dataSet.shape)) #4 1
diffMat=tile(inX,(dataSetSize,1))-dataSet#tile like copy it is [inx inx inx inx]
print("diffMat" ,diffMat)
sqDiffMat=diffMat**2
# print("sqDiffMat\n" ,sqDiffMat)
#print(sqDiffMat.sum(axis=0))
sqDistances=sqDiffMat.sum(axis=1)# sum of each list
distances=sqDistances**0.5
#print(sqDistances)
sortedDistIndicies=distances.argsort()#return the index
# print(sortedDistIndicies)
classCount={}
for i in range(k):
voteIlabel=labels[sortedDistIndicies[i]]
#print(voteIlabel)
classCount[voteIlabel]=classCount.get(voteIlabel,0)+1
#print(classCount)
sortedClassCount=sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)#sort from big to small
print(sortedClassCount)
return sortedClassCount[0][0]
def file2matrix(filename,n):
fr=open(filename)#open the file
arrayOLines=fr.readlines()# an iterator
numberOfLines=len(arrayOLines)#num of line
returnMat=zeros((numberOfLines,n-1))
# print("arrayOLines:" , arrayOLines,"\n numberOfLines:" ,numberOfLines,"\nreturnMat:" ,returnMat)
classLabelVector=[]
index=0
for line in arrayOLines:
line=line.strip()#end with the end of line
listFromLine=line.split('\t')#s
#print(listFromLine)
returnMat[index,:]=listFromLine[0:n-1]
#print(returnMat)
classLabelVector.append(float(listFromLine[-1]))
index+=1
return returnMat,classLabelVector
def autoNorm(dataSet):
minVals=dataSet.min(0)
maxVals=dataSet.max(0)
ranges=maxVals-minVals
normDataSet=zeros(shape(dataSet))
# print(normDataSet)
m=dataSet.shape[0]
normDataSet=dataSet-tile(minVals,(m,1))# val-min)/ranges
normDataSet=normDataSet/tile(ranges,(m,1))
#print(minVals,maxVals,ranges,m,normDataSet)
return normDataSet,ranges,minVals
def datingClassTest():
hoRatio=0.10
datingDataMat,datingLabels=file2matrix('dataset.txt' ,3)
normMat,ranges,minVals=autoNorm(datingDataMat)
m=normMat.shape[0]
n=normMat.shape[1]
print("n:" ,n)
numTestVecs=int(m*hoRatio)
print(numTestVecs)
print("normMat" ,normMat)
errorCount=0.0
for i in range(numTestVecs):
#print("test" ,normMat[i:2,:2])
classifierResult=classify(normMat[i,:n],normMat[numTestVecs:m,:n],\
datingLabels[numTestVecs:m],3)
if classifierResult!=datingLabels[i] :
errorCount+=1
print(classifierResult)
print(errorCount/numTestVecs)
if __name__=='__main__':
datingClassTest()
#inx,labels=createDataset()
#y=classify([0.5,1],inx,labels,3)
#print(y)
# x,y=file2matrix("dataset.txt")
# print(autoNorm(x))
# fig=plt.figure()#build
# ax=fig.add_subplot(111)#the rate of heigh width and depth
# ax.scatter(x[:,1],x[:,2])#the x and y axis
# plt.show()
# print(x)
KNN学习笔记1
猜你喜欢
转载自blog.csdn.net/qq_34902939/article/details/85345991
今日推荐
周排行