5、python代码(PCA案例)
#-*- coding:utf-8-*-
from numpy import *
import numpy as np
import cv2
import matplotlib.pyplot as plt
def zeroMean(dataMat):
meanVal=np.mean(dataMat,axis=0) #axis=0表示按照列求均值
newData=dataMat-meanVal #减均值
return newData,meanVal #返回结果
#机器学习实战
def loadDataSet(filename,delim = "\t"):
fr = open(filename)
stringArr = [line.strip().split(delim) for line in fr.readlines()]
datArr = [map(float, line) for line in stringArr]
return np.mat(datArr)
def PCA_SVD(dataMat,k=2):
newData,meanVals=zeroMean(dataMat)
covMat=np.cov(newData,rowvar=0)
U, S, V = np.linalg.svd(covMat)
n_eigVect=U[:,:k] #最大的k个特征值对应的特征向量
lowDataMat=newData*n_eigVect #低维特征空间的数据
reconMat=(lowDataMat*n_eigVect.T)+meanVals #重构数据
return lowDataMat,reconMat
data=loadDataSet("F:/DEEP LEARNING PAPERS/machinelearninginaction/Ch13/testSet.txt")
lowdataMat,reconMat=PCA_SVD(data,2)
fig1 = plt.figure()
fig2=plt.figure()
ax = fig1.add_subplot(111)
ay=fig2.add_subplot(111)
ax.scatter(data[:,0].flatten().A[0], data[:,1].flatten().A[0], marker='^', s = 10 )
ay.scatter(lowdataMat[:,0].flatten().A[0], lowdataMat[:,1].flatten().A[0],marker='o', s = 20 , c ='red' )
plt.show()
参考资料:
[1] https://en.wikipedia.org/wiki/Singular_value_decomposition
[2] https://www.cnblogs.com/pinard/p/6251584.html (这个博主良心)