import numpy as np import matplotlib.pyplot as plt import time import math import collections raw_data_x = [[3.39,2.33], [3.11,1.78], [1.34,3.36], [3.58,4.67], [2.28,2.86], [7.442,4.69], [5.74,3.53], [9.17,2.51], [7.79,3.42], [7.93,0.79] ] raw_data_y = [0,0,0,0,0,1,1,1,1,1] x_train = np.array(raw_data_x) y_train = np.array(raw_data_y) x_test = np.array([8.0,3.36]) plt.scatter(x_train[y_train == 0,0],x_train[y_train == 0,1],color = 'r') plt.scatter(x_train[y_train == 1,0],x_train[y_train == 1,1],color = 'g') plt.scatter(x_test[0],x_test[1],color = 'b') plt.show() #compute the Euclidean distance distance = [] #creat empty list for i in x_train: # for loop result_sum = math.sqrt(sum((i-x_test)**2)) #compute distance for single vector distance.append(result_sum) # add the result to list index = np.argsort(distance) k = 6 result = [y_train[i] for i in index[:k]] vote = collections.Counter(result) print("the class is {}".format(vote.most_common(1)[0][0]))
简单实现KNN(处理连续型数据)
猜你喜欢
转载自www.cnblogs.com/zijidefengge/p/11763148.html
今日推荐
周排行