zoukankan      html  css  js  c++  java
  • Weka——PrincipalComponents分析

    package weka.filters.unsupervised.attribute;

    PrincipalComponents

    属性:

      /** The data to transform analyse/transform. */
      protected Instances m_TrainInstances;
    
      /** Keep a copy for the class attribute (if set). */
      protected Instances m_TrainCopy;
    
      /** The header for the transformed data format. */
      protected Instances m_TransformedFormat;
    
      /** Data has a class set. */
      protected boolean m_HasClass;
    
      /** Class index. */
      protected int m_ClassIndex;
    
      /** Number of attributes. */
      protected int m_NumAttribs;
    
      /** Number of instances. */
      protected int m_NumInstances;
    
      /** Correlation matrix for the original data. */
      protected double[][] m_Correlation;
      
      /** 
       * If true, center (rather than standardize) the data and
       * compute PCA from covariance (rather than correlation)
       * matrix.
       */
      private boolean m_center = false;
    
      /** Will hold the unordered linear transformations of the (normalized)
          original data. */
      protected double[][] m_Eigenvectors;
    
      /** Eigenvalues for the corresponding eigenvectors. */
      protected double[] m_Eigenvalues = null;
    
      /** Sorted eigenvalues. */
      protected int[] m_SortedEigens;
    
      /** sum of the eigenvalues. */
      protected double m_SumOfEigenValues = 0.0;
    
      /** Filters for replacing missing values. */
      protected ReplaceMissingValues m_ReplaceMissingFilter;
      
      /** Filter for turning nominal values into numeric ones. */
      protected NominalToBinary m_NominalToBinaryFilter;
      
      /** Filter for removing class attribute, nominal attributes with 0 or 1 value. */
      protected Remove m_AttributeFilter;
      
      /** Filter for standardizing the data */
      protected Standardize m_standardizeFilter;
      
      /** Filter for centering the data */
      protected Center m_centerFilter;
    
      /** The number of attributes in the pc transformed data. */
      protected int m_OutputNumAtts = -1;  
    
      /** the amount of varaince to cover in the original data when
          retaining the best n PC's. */
      protected double m_CoverVariance = 0.95;
    
      /** maximum number of attributes in the transformed attribute name. */
      protected int m_MaxAttrsInName = 5;
    
      /** maximum number of attributes in the transformed data (-1 for all). */
      protected int m_MaxAttributes = -1;

    计算协方差矩阵或相关系数矩阵

      protected void fillCovariance() throws Exception {    
        
        if (!m_center) {
          fillCorrelation();
          return;
        }
        
        double[] att = new double[m_TrainInstances.numInstances()];
        
        // now center the data by subtracting the mean
        m_centerFilter = new Center();
        m_centerFilter.setInputFormat(m_TrainInstances);
        m_TrainInstances = Filter.useFilter(m_TrainInstances, m_centerFilter);
        
        // now compute the covariance matrix
        m_Correlation = new double[m_NumAttribs][m_NumAttribs];
        
        for (int i = 0; i < m_NumAttribs; i++) {
          for (int j = 0; j < m_NumAttribs; j++) {
            
            double cov = 0;
            for (int k = 0; k < m_NumInstances; k++) {
           
              if (i == j) {
                cov += (m_TrainInstances.instance(k).value(i) *
                    m_TrainInstances.instance(k).value(i));
              } else {
              cov += (m_TrainInstances.instance(k).value(i) *
                  m_TrainInstances.instance(k).value(j));
              }
            }
            
            cov /= (double)(m_TrainInstances.numInstances() - 1);
            m_Correlation[i][j] = cov;
            m_Correlation[j][i] = cov;                
          }
        }
      }
    
      /**
       * Fill the correlation matrix.
       */
      protected void fillCorrelation() throws Exception {
        int        i;
        int        j;
        int        k;
        double[]     att1;
        double[]     att2;
        double     corr;
        
        m_Correlation = new double[m_NumAttribs][m_NumAttribs];
        att1          = new double [m_NumInstances];
        att2          = new double [m_NumInstances];
    
        for (i = 0; i < m_NumAttribs; i++) {
          for (j = 0; j < m_NumAttribs; j++) {
            for (k = 0; k < m_NumInstances; k++) {
              att1[k] = m_TrainInstances.instance(k).value(i);
              att2[k] = m_TrainInstances.instance(k).value(j);
            }
        if (i == j) {
          m_Correlation[i][j] = 1.0;
        }
        else {      
          corr = Utils.correlation(att1,att2,m_NumInstances);
          m_Correlation[i][j] = corr;
          m_Correlation[j][i] = corr;
        }
          }
        }
        
        // now standardize the input data
        m_standardizeFilter = new Standardize();
        m_standardizeFilter.setInputFormat(m_TrainInstances);
        m_TrainInstances = Filter.useFilter(m_TrainInstances, m_standardizeFilter);
      }

    处理数据

      /**
       * Transform an instance in original (unormalized) format.
       * 
       * @param instance     an instance in the original (unormalized) format
       * @return         a transformed instance
       * @throws Exception     if instance can't be transformed
       */
      protected Instance convertInstance(Instance instance) throws Exception {
        Instance    result;
        double[]     newVals;
        Instance     tempInst;
        double     cumulative;
        int        i;
        int        j;
        double     tempval;
        int        numAttsLowerBound;
        
        newVals  = new double[m_OutputNumAtts];
        tempInst = (Instance) instance.copy();
    
        m_ReplaceMissingFilter.input(tempInst);
        m_ReplaceMissingFilter.batchFinished();
        tempInst = m_ReplaceMissingFilter.output();    
    
        m_NominalToBinaryFilter.input(tempInst);
        m_NominalToBinaryFilter.batchFinished();
        tempInst = m_NominalToBinaryFilter.output();
    
        if (m_AttributeFilter != null) {
          m_AttributeFilter.input(tempInst);
          m_AttributeFilter.batchFinished();
          tempInst = m_AttributeFilter.output();
        }
        
        if (!m_center) {
          m_standardizeFilter.input(tempInst);
          m_standardizeFilter.batchFinished();
          tempInst = m_standardizeFilter.output();
        } else {
          m_centerFilter.input(tempInst);
          m_centerFilter.batchFinished();
          tempInst = m_centerFilter.output();
        }
    
        if (m_HasClass)
          newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex());
    
        if (m_MaxAttributes > 0)
          numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
        else
          numAttsLowerBound = 0;
        if (numAttsLowerBound < 0)
          numAttsLowerBound = 0;
        
        cumulative = 0;
        for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
          tempval = 0.0;
          for (j = 0; j < m_NumAttribs; j++)
        tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j);
    
          newVals[m_NumAttribs - i - 1] = tempval;
          cumulative += m_Eigenvalues[m_SortedEigens[i]];
          if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)
        break;
        }
    
        // create instance
        if (instance instanceof SparseInstance)
          result = new SparseInstance(instance.weight(), newVals);
        else
          result = new DenseInstance(instance.weight(), newVals);
        
        return result;
      }
    
      /**
       * Initializes the filter with the given input data.
       *
       * @param instances   the data to process
       * @throws Exception  in case the processing goes wrong
       * @see               #batchFinished()
       */
      protected void setup(Instances instances) throws Exception {
        int                i;
        int                j;
        Vector<Integer>         deleteCols;
        int[]             todelete;
        double[][]             v;
        Matrix             corr;
        EigenvalueDecomposition     eig;
        Matrix             V;
        
        m_TrainInstances = new Instances(instances);
    
        // make a copy of the training data so that we can get the class
        // column to append to the transformed data (if necessary)
        m_TrainCopy = new Instances(m_TrainInstances, 0);
    
        m_ReplaceMissingFilter = new ReplaceMissingValues();
        m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);
        m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter);
    
        m_NominalToBinaryFilter = new NominalToBinary();
        m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);
        m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter);
    
        // delete any attributes with only one distinct value or are all missing
        deleteCols = new Vector<Integer>();
        for (i = 0; i < m_TrainInstances.numAttributes(); i++) {
          if (m_TrainInstances.numDistinctValues(i) <= 1)
        deleteCols.addElement(i);
        }
    
        if (m_TrainInstances.classIndex() >=0) {
          // get rid of the class column
          m_HasClass = true;
          m_ClassIndex = m_TrainInstances.classIndex();
          deleteCols.addElement(new Integer(m_ClassIndex));
        }
    
        // remove columns from the data if necessary
        if (deleteCols.size() > 0) {
          m_AttributeFilter = new Remove();
          todelete = new int [deleteCols.size()];
          for (i = 0; i < deleteCols.size(); i++)
        todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();
          m_AttributeFilter.setAttributeIndicesArray(todelete);
          m_AttributeFilter.setInvertSelection(false);
          m_AttributeFilter.setInputFormat(m_TrainInstances);
          m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);
        }
    
        // can evaluator handle the processed data ? e.g., enough attributes?
        getCapabilities().testWithFail(m_TrainInstances);
    
        m_NumInstances = m_TrainInstances.numInstances();
        m_NumAttribs   = m_TrainInstances.numAttributes();
    
        //fillCorrelation();
        fillCovariance();
    
        // get eigen vectors/values
        corr = new Matrix(m_Correlation);
        eig  = corr.eig();
        V    = eig.getV();
        v    = new double[m_NumAttribs][m_NumAttribs];
        for (i = 0; i < v.length; i++) {
          for (j = 0; j < v[0].length; j++)
            v[i][j] = V.get(i, j);
        }
        m_Eigenvectors = (double[][]) v.clone();
        m_Eigenvalues  = (double[]) eig.getRealEigenvalues().clone();
    
        // any eigenvalues less than 0 are not worth anything --- change to 0
        for (i = 0; i < m_Eigenvalues.length; i++) {
          if (m_Eigenvalues[i] < 0)
        m_Eigenvalues[i] = 0.0;
        }
        m_SortedEigens     = Utils.sort(m_Eigenvalues);
        m_SumOfEigenValues = Utils.sum(m_Eigenvalues);
    
        m_TransformedFormat = determineOutputFormat(m_TrainInstances);
        setOutputFormat(m_TransformedFormat);
        
        m_TrainInstances = null;
      }
  • 相关阅读:
    为什么不要用VSCODE来写Makefile
    JavaFX第三弹
    javaFX文件和文件夹选择器
    写了一个vsftpd的GUI
    在java中调用shell命令和执行shell脚本
    正交投影与斯密特正交化的好处
    Linux下安装软件
    C++中的仿函数
    C++中重载操作符[ ]
    使用斐波那契查找
  • 原文地址:https://www.cnblogs.com/549294286/p/3417810.html
Copyright © 2011-2022 走看看