zoukankan      html  css  js  c++  java
  • 《机器学习实战》PCA降维


    注释:由于各方面原因,理论部分不做介绍,网上很多自行百度吧!


     pca.py

     1 import numpy as np
     2 import matplotlib.pyplot as plt
     3 import math
     4 
     5 def  loadDataSet(filename, delin = '	'):
     6     fr = open(filename)
     7     #读取分割存入数组
     8     stringArr = [line.strip().split(delin) for line in fr.readlines()]
     9     dataArr   = [list(map(float,line)) for line in stringArr]
    10     return np.mat(dataArr)
    11 def pca(dataMat, topNfeet = 9999999):
    12     meanVals = np.mean(dataMat,axis=0)#求取平均值
    13     meanRemoved = dataMat - meanVals
    14     covMat = np.cov(meanRemoved,rowvar=0)#方差
    15     eigVals, eigVects= np.linalg.eig(np.mat(covMat))#求解特征向量和特征值
    16     eigValInd = np.argsort(eigVals)#对特征值进行排序
    17     eigValInd = eigValInd[:-(topNfeet+1):-1]#最后的-1是防止越界的,当然你可以在前面加一个判断
    18     redEigVects = eigVects[:,eigValInd]
    19     lowDDataMat = meanRemoved*redEigVects #
    20     reconMat = (lowDDataMat * redEigVects.T) + meanVals
    21     return lowDDataMat, reconMat

    main.py

     1 import PCA
     2 import matplotlib.pyplot as plt
     3 
     4 if __name__ == "__main__":
     5 
     6     dataMat = PCA.loadDataSet('testSet.txt')
     7     lowDMat, reconMat = PCA.pca(dataMat,1)
     8     fig = plt.figure()
     9     ax  = fig.add_subplot(111)
    10     ax.scatter(dataMat[:,0].flatten().A[0],dataMat[:,1].flatten().A[0],marker = '^',s=90)
    11     ax.scatter(reconMat[:,0].flatten().A[0],reconMat[:,1].flatten().A[0],marker = "o",s=50,c='red')
    12     plt.show()

    对丢失的值进行替代

    1 #零的数据都转化为平均值
    2 def replaceNanWithMean():
    3     dataMat = loadDataSet('secom.data',' ')
    4     numFeat = dataMat.shape[1]
    5     for i in range(numFeat):
    6         meanVal = np.mean(dataMat[np.nonzero(~np.isnan(dataMat[:,i].A))[0],i])
    7         dataMat[np.nonzero(np.isnan(dataMat[:,i].A))[0],i] = meanVal
    8     return dataMat
  • 相关阅读:
    ADL(C++参数依赖查找)
    Sublime Text3 + Golang搭建开发环境
    Zookeeper使用命令行(转载)
    软链接和硬链接(转载)
    kafka伪集群搭建
    使用librdkafka库实现kafka的生产和消费实例生产者
    vector和map使用erase删除元素
    jquery html函数的一个问题
    贪心类区间问题
    快速幂
  • 原文地址:https://www.cnblogs.com/wjy-lulu/p/8528014.html
Copyright © 2011-2022 走看看