zoukankan      html  css  js  c++  java
  • python统计分析-主成份分析

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    
    # <editable>
    
    
    def execute():
        # <editable>
        '''
        载入模块
        '''
        from sklearn.decomposition import PCA
        import numpy as np
        import pandas as pd
        from sqlalchemy import create_engine
        '''
        连接数据库
        '''
        engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
        '''
        选择目标数据
        '''
        # 生成数据
        params = {
            "columns": "SUNACTIVITY",
            "n_components": 1,  #
    
        }
        inputs = {"table": '纯随机性检验'}
        data_sql = 'select ' + params['columns'] + ' from ' + inputs['table']
        data_in = pd.read_sql_query(data_sql, engine)
        print(data_in)
    
        '''
        主成分分析
        '''
        data_in = data_in.select_dtypes(include=['number'])  # 筛选数值型数据
        n_samples, n_features = data_in.shape
        if not 1 <= int(params['n_components']) <= n_features:
            raise ValueError("
    降维后的维数为%r,该值必须要在[1,%r]之间." % (int(params['n_components']), n_features))
    
        pca_model = PCA(n_components=int(params['n_components']))
        pca_model.fit(data_in)
        print(pca_model.explained_variance_ratio_)
        print(pca_model.explained_variance_)
    
        # 执行降维
        data_out = pca_model.transform(data_in)
        columns = list(range(1, int(params['n_components']) + 1))
        columns = ['comp_' + str(i) for i in columns]
        data_out = pd.DataFrame(data_out, columns=columns)
        data_out = np.around(data_out, decimals=4)
    
        '''
        将结果写出
        '''
        print(data_out)
    
        '''
        数据示例
            SUNACTIVITY
        0           5.0
        1          11.0
        2          16.0
        3          23.0
        4          36.0
        5          40.4
        6          29.8
        7          15.2
        8           7.5
        9           2.9
    10         83.4
    11         47.7
    12         47.8
    13         30.7
    14         12.2
    15         40.4
    16         29.8
    17         15.2
    18          7.5
    19          2.9
    20         12.6
    [1.]
    [394.82661905]
        comp_1
    0  -19.619
    1  -13.619
    2   -8.619
    3   -1.619
    4   11.381
    5   15.781
    6    5.181
    7   -9.419
    8  -17.119
    9  -21.719
    10  58.781
    11  23.081
    12  23.181
    13   6.081
    14 -12.419
    15  15.781
    16   5.181
    17  -9.419
    18 -17.119
    19 -21.719
    20 -12.019
        '''
    # </editable>
    
    
    if __name__ == '__main__':
        execute()
    作者:沐禹辰
    出处:http://www.cnblogs.com/renfanzi/
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
  • 相关阅读:
    字符统计和滑动窗口
    字典树应用及用哈希表代替
    迷宫里的动态规划应用
    求所有排列中的第 i 个排列的问题
    最大子串和问题
    二分查找、变形及应用
    前 n 个数原址排序的问题
    LeetCode 32 括号匹配
    11.常用的API
    10.正则表达式
  • 原文地址:https://www.cnblogs.com/renfanzi/p/14688742.html
Copyright © 2011-2022 走看看