from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits import mplot3d
def loadDataSet(fileName,delim='\t'):
fr = open(fileName)
stringArr = [line.strip().split(delim) for line in fr.readlines()]
datArr = [list(map(float,line)) for line in stringArr]
return np.mat(datArr)
#计算任意两个样本点之间的距离
def calDist(x, y):
d = metrics.pairwise_distances(x, y)
return d
def cal_B(d):
m, n = np.shape(d)
dij2 = np.square(d) # 计算dist(ij)^2
di = np.sum(dij2, axis=1) / m # 计算dist(i.)^2
dj = np.sum(dij2, axis=0) / m # 计算dist(.j)^2
dij = np.sum(dij2) / (m ** 2) # 计算dist(..)^2
b = np.zeros((m, m))
for i in range(m):
for j in range(n):
b[i, j] = (dij2[i, j] - di[i] - dj[j] + dij) / (-2)
return b
def MDS(data, n=2):
d = calDist(data, data)
b = cal_B(d)
bVals, bVects = np.linalg.eig(b) # 计算矩阵B的特征值和特征向量
bValInd = np.argsort(bVals) # 特征值排序
bValInd = bValInd[:-(n+1):-1] # 取前n个
bValdiag = np.diag(bVals[bValInd])
bVectSele = bVects[:, bValInd]
z = np.dot(np.sqrt(bValdiag), bVectSele.T).T # 得到降维后的样本矩阵z
return z
if __name__=="__main__":
data = loadDataSet("testSet3.txt")
Z = MDS(data)
ax = plt.axes(projection="3d")
ax.scatter3D(data[:, 0], data[:, 1], data[:, 2], edgecolors='r')
ax.scatter(Z[:, 0], Z[:, 1])
plt.show()
参考:
机器学习-降维算法(MDS算法)
《机器学习》周志华著