#python
from numpy import *
def loadData(filename):
data=[]
for line in open(filename).readlines():
ft=line.strip().split(" ")
th=map(float,ft)
data.append(th)
return mat(data)
def pca(datamat,k):
meanval=mean(datamat,0)
center=datamat-meanval
covmat=(center.T*center)/(shape(datamat)[0]-1)
eigval,eigvec=linalg.eig(mat(covmat))
eigvalind=argsort(eigval)
sortind=eigvalind[:-(k+1):-1]
eigve=eigvec[:,sortind]
lowmat=center*eigve
return lowmat
datamat=loadData("testSet.txt")
lowmat=pca(datamat,1)
print lowmat
%matlab
data=vpa(load('testSet.txt'),10);
[m,n]=size(data);
meanval=data-repmat(mean(data),m,1);%repmat与numpy的tile相对
calccov=(meanval'*meanval)./(m-1);%计算协方差矩阵
[eigvec,eigval]=eig(calccov);%计算协方差矩阵的特征值和特征向量
[val,ind]=sort(eigval,'descend');%按特征值进行排序,选择特征值占比较大的前N个
eigvecmatrix=eigvec(:,ind(1));%降特征降到一维
lowmatrix=meanval*eigvecmatrix;%降维后的样本矩阵
lowmatrix=vpa(lowmatrix,10);