版权声明:欢迎转载~ 转载请注明来源及作者,谢谢! https://blog.csdn.net/qq_42442369/article/details/86730033
import pandas as pd
from sklearn.decomposition import PCA
#要读取的文件
inputfile = './data.xls'
#要写入的文件
outputfile = './reduced_data.xls'
#读取
data = pd.read_excel(inputfile,header=None)
print(data)
#pca对象
pca = PCA()
#数据拟合
pca.fit(data)
#分析
print(pca.components_) #特征向量
print(pca.explained_variance_) #特征值
print(pca.explained_variance_ratio_)#特征方差的百分比
#降维
pca = PCA(3)
pca.fit(data)
low_d = pca.transform(data)
pd.DataFrame(low_d).to_excel(outputfile)
#恢复数据
data = pca.inverse_transform(low_d)
print(data)
#导入模块
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.datasets.samples_generator import make_blobs
from sklearn.decomposition import PCA
#获取数据
X,y = make_blobs(n_samples=10000,n_features=3,centers=[[3,3,3],[0,0,0],[1,1,1],[2,2,2]],cluster_std=[0.2,0.1,0.2,0.2],random_state=9)
#显示数据
fig = plt.figure()
ax = Axes3D(fig,rect=[0,0,1,1],elev=30,azim=10)
plt.scatter(X[:,0],X[:,1],X[:,2],marker='o')
plt.show()
#进行主成分分析
pca = PCA(n_components=3)
pca.fit(X)
print(pca.explained_variance_ratio_)
print(pca.explained_variance_)
#降维
pca = PCA(n_components=2)
pca.fit(X)
X_new = pca.transform(X)
plt.scatter(X_new[:,0],X_new[:,1],marker='o')
plt.show()
from numpy import *
def eigValPct(eigVals,percentage):
sortArray = sort(eigVals) #将特征值从小到大排序
sortArray = sortArray[-1::-1] #将特征值从大道小排序
arraySum = sum(sortArray) #arraySum数据全部的方差
tempSum = 0
num = 0
for i in sortArray:
tempSum+=i
num+=1
if tempSum>=arraySum*percentage:
return num
def pca(dataMat,percentage=0.9):
meanVals = mean(dataMat,axis=0) #对每一列秋平均值
meanRemoved = dataMat-meanVals
covMat = cov(meanRemoved,rowvar=0) #计算方差
eigVals,eigVects = linalg.eig(mat(covMat)) #寻找特征值和特征向量
k = eigValPct(eigVals,percentage) #要达到方差的百分比,需要前k个向量
eigVallnd = argsort(eigVals) #从小到大排序
eigVallnd = eigVallnd[:-(k+1):-1] #从后往前取K个
redEigVects = eigVects[:,eigVallnd] #排序后特征值对应的特征向量
lowDDataMat = meanRemoved*redEigVects #将原始数据投影得到新的降维数据
reconMat = (lowDDataMat*redEigVects.T)+meanVals #重构
return lowDDataMat,reconMat