版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_28306361/article/details/87889630
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from collections import Counter
from math import sqrt
class kNNClassifier:
def __init__(self, k):
# initialize kNN classifier
assert k >= 1, "k must be valid"
self.k = k
self._X_train = None
self._y_train = None
def fit(self, X_train, y_train):
# train the kNN classifier based on the train set _X_train and _y_train
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
assert self.k <= X_train.shape[0], \
"the size of X_train must be at least k"
self._X_train = X_train
self._y_train = y_train
return self
def predict(self, X_predict):
# given the X_predict to be predict, return the result vector X_predict
assert self._X_train is not None and self._y_train is not None, \
"must fit before predict"
assert X_predict.shape[1] == self._X_train.shape[1], \
"the feature number of X_predict must be equal to X_train"
y_predict = [self._predict(x) for x in X_predict]
return np.array(y_predict)
def _predict(self, x):
# given the single x to be predict, return the predict value
assert x.shape[0] == self._X_train.shape[1], \
"the feature number of X_predict must be equal to X_train "
distances = [sqrt(np.sum((x_train - x)**2)) for x_train in self._X_train]
nearest = np.argsort(distances)
topK_y = [self._y_train[i] for i in nearest[:self.k]]
votes = Counter(topK_y)
return votes.most_common(1)[0][0]
# 一般的sklearn中的kNN用法
kNN_classifier = KNeighborsClassifier(n_neighbors= 6)
kNN_classifier.fit(X_train, y_train)
X_predict = x.reshape(1,-1)
y_predict = kNN_classifier.predict(X_predict)
y_predict[0]