#python
from numpy import * def loadData(filename): data=[] for line in open(filename).readlines(): ft=line.strip().split(" ") th=map(float,ft) data.append(th) return mat(data) def pca(datamat,k): meanval=mean(datamat,0) center=datamat-meanval covmat=(center.T*center)/(shape(datamat)[0]-1) eigval,eigvec=linalg.eig(mat(covmat)) eigvalind=argsort(eigval) sortind=eigvalind[:-(k+1):-1] eigve=eigvec[:,sortind] lowmat=center*eigve return lowmat datamat=loadData("testSet.txt") lowmat=pca(datamat,1) print lowmat
%matlab
data=vpa(load('testSet.txt'),10); [m,n]=size(data); meanval=data-repmat(mean(data),m,1);%repmat与numpy的tile相对 calccov=(meanval'*meanval)./(m-1);%计算协方差矩阵 [eigvec,eigval]=eig(calccov);%计算协方差矩阵的特征值和特征向量 [val,ind]=sort(eigval,'descend');%按特征值进行排序,选择特征值占比较大的前N个 eigvecmatrix=eigvec(:,ind(1));%降特征降到一维 lowmatrix=meanval*eigvecmatrix;%降维后的样本矩阵 lowmatrix=vpa(lowmatrix,10);