import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
#读取数据
data = pd.read_csv('data.csv',encoding='utf8',header=None)
arr = data.values
#预处理
encoder = preprocessing.LabelEncoder()
labels = encoder.fit_transform(arr[:,1])
for label,num in enumerate(encoder.classes_):
print(label,'-->',num)
arr[:,1] = labels
x,y = arr[:,0:-1],arr[:,-1]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=1)
k = 3
n = x_train.shape[0]
predict_train = np.zeros(n)
for i in range(n):
vote = {}
for j in range(n):
dist = round(np.sqrt(sum((x_train[i]-x_train[j])**2)),2)
if dist:
vote[j] = dist
neighbors = sorted(vote.items(),key=lambda x:x[1])[0:k]
labels = []
for item in neighbors:
actual = y_train[item[0]]
labels.append(actual)
count = {}
for label in labels:
if label in count:
count[label] += 1
else:
count[label] = 1
predict_train[i] = (max(count,key=count.get))
right = np.where(predict_train*y_train>0)[0]
train_acc = round(len(right)/len(y_train),2)
print('k = ',k,' Train acc = ',train_acc)
n = x_test.shape[0]
predict_test = np.zeros(n)
for i in range(n):
vote = {}
for j in range(n):
dist = round(np.sqrt(sum((x_test[i]-x_test[j])**2)),2)
if dist:
vote[j] = dist
neighbors = sorted(vote.items(),key=lambda x:x[1])[0:k]
labels = []
for item in neighbors:
actual = y_test[item[0]]
labels.append(actual)
count = {}
for label in labels:
if label in count:
count[label] += 1
else:
count[label] = 1
predict_test[i] = (max(count,key=count.get))
right = np.where(predict_test*y_test>0)[0]
test_acc = round(len(right)/len(y_test),2)
print('k = ',k,' Test acc = ',test_acc)