原理:k均值聚类算法(k-means clustering algorithm)是一种迭代求解的聚类分析算法,其步骤是随机选取K个对象作为初始的聚类中心,然后计算每个对象与各个种子聚类中心之间的距离,把每个对象分配给距离它最近的聚类中心。聚类中心以及分配给它们的对象就代表一个聚类。每分配一个样本,聚类的聚类中心会根据聚类中现有的对象被重新计算。这个过程将不断重复直到满足某个终止条件。终止条件可以是没有(或最小数目)对象被重新分配给不同的聚类,没有(或最小数目)聚类中心再发生变化,误差平方和局部最小。
二、程序代码
# -*- coding:utf-8 -*-
import random
import math
def formula_diatance(a= None,b=None):
result=math.sqrt(pow((a[0]-b[0]),2)+pow((a[1]-b[1]),2))
return result
def GetNewPoint(list=None):
x=0
y=0
for l in list:
x+=l[0]
y+=l[1]
result=[x/len(list),y/len(list)]
return result
def row2column(list=None):
kind=[]
i=len(list)
j=len(list[0])
for m in range(0,j):
col = []
for n in range(0,i):
col.append(list[n][m])
kind.append(col)
return kind
def which_kind(list=None,data=None):
i=len(list)
j=len(list[0])
dict={}
for m in range(0,j):
dict[m]=[]
for n in range(0,i):
min=list[n][0]
for l in list[n]:
if min>=l:
min=l
dict[list[n].index(min)].append(data[n])
kind=[]
for value in dict.values():
kind.append(value)
return kind;
def k_means(x=None,k=0,k_list=None,class_list=None,round=0):
if k_list is None:
k_list=[]
while len(k_list)!=k:
r=random.randint(0,len(x)-1)
if r not in k_list:
k_list.append(r)
print('初始随机点:'+str(x[k_list[0]])+' , '+str(x[k_list[1]]))
kind_list = []
for i in range(0, len(k_list)):
d_list = []
for j in range(0, len(x)):
d_list.append(formula_diatance(x[k_list[i]], x[j]))
kind_list.append(d_list)
kind_list = row2column(kind_list)
class_list = which_kind(kind_list,x)
round+=1
print('第'+str(round)+'轮:'+str(class_list))
point=[]
for l in class_list:
point.append(GetNewPoint(l))
print('新均值点:' + str(point))
y=k_means(x=x,k_list=point,class_list=class_list,round=round)
return y
else:
k=len(k_list)
kind_list = []
for i in range(0, len(k_list)):
d_list = []
for j in range(0, len(x)):
d_list.append(formula_diatance(k_list[i], x[j]))
kind_list.append(d_list)
kind_list = row2column(kind_list)
cl_list = which_kind(kind_list,x)
round += 1
print('第' + str(round) + '轮:' + str(cl_list))
is_equal=0
for l in cl_list:
if l in class_list:
is_equal+=1
if k==is_equal:
return cl_list
point = []
for l in cl_list:
point.append(GetNewPoint(l))
print('新均值点:'+str(point))
y=k_means(x=x, k_list=point, class_list=cl_list,round=round)
return y
if __name__ == '__main__':
x=[[1,1],[4,5],[3,5],[4,4],[2,2],[3,4],[0,0]]
print('输入数据:'+str(x))
y=k_means(x=x,k=2)
print('最后结果:'+str(y))
作者:WangB