实现简单、收敛快
需要指定类别数量
# 2d类别划分
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
data = pd.read_csv('data.csv')
x = data.drop(['labels'], axis=1)
y = data.loc[:, 'labels'] # 原始数据
pd.value_counts(y)
# 建立模型
KM = KMeans(n_clusters=3, random_state=0) # n_clusters=3分为三类
KM.fit(x)
centers = KM.cluster_centers_ # 中心点
# 预测
y_predict_text = KM.predict([[]])
# 计算准确率
y_predict = KM.predict(x) # 训练后的模型数据
accuracy = accuracy_score(y, y_predict)
# 矫正
y_corrected = []
for i in y_predict:
if i == 0:
y_corrected.append(1)
elif i == 1:
y_corrected.append(2)
else:
y_corrected.append(0)