zoukankan      html  css  js  c++  java
  • 预处理算法_6_缺失值处理

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    
    
    def execute():
        params = {"method": '', "columns": "score", "value": 20}
        inputs = {"table": 'test'}
        # <editable>
        '''
        载入模块
        '''
        import pandas as pd
        import numpy as np
        from sqlalchemy import create_engine
        '''
        选择目标数据
        '''
        engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
        sql = 'select ' + params['columns'] + ' from ' + inputs['table']
        data_in = pd.read_sql_query(sql, engine)
    
        '''
        判断输入数据是否为数值类型
        '''
    
        def is_number(s):
            try:  # 如果能运行float(s)语句,返回True(字符串s是浮点数)
                float(s)
                return True
            except ValueError:  # ValueError为Python的一种标准异常,表示"传入无效的参数"
                pass  # 如果引发了ValueError这种异常,不做任何事情(pass:不做任何事情,一般用做占位语句)
            try:
                import unicodedata  # 处理ASCii码的包
                unicodedata.numeric(s)  # 把一个表示数字的字符串转换为浮点数返回的函数
                return True
            except (TypeError, ValueError):
                pass
            return False
    
        '''
        缺失值处理
        '''
        if params['method'] == 'drop':
            data_out = data_in.dropna()
        elif params['method'] == 'Median_interpolation':
            data_out = data_in.fillna(data_in.median())
        elif params['method'] == 'Mode_interpolation':
            data_out = data_in.fillna(data_in.mode())
        elif params['method'] == 'slinear':
            data_out = data_in.interpolate(method='slinear')
        elif params['method'] == 'quadratic':
            data_out = data_in.interpolate(method='quadratic')
        elif params['method'] == 'polynomial':
            data_out = data_in.fillna(data_in.mean())
        else:
            if is_number(params['value']):
                data_out = data_in.fillna(float(params['value']))
            else:
                data_out = data_in.fillna(params['value'])
    
        '''
        将结果写出
        '''
        print(data_out)
        # </editable>
    
    
    if __name__ == '__main__':
        execute()
    作者:沐禹辰
    出处:http://www.cnblogs.com/renfanzi/
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
  • 相关阅读:
    openpose_caffe_to_rknn.py
    ncnn的完整编译过程
    We Need More Bosses CodeForces
    Yet Another Problem On a Subsequence CodeForces
    牛客 82E 无向图中的最短距离 (bitset,bfs)
    Largest Submatrix 3 CodeForces
    bzoj 4245 [ONTAK2015]OR-XOR (贪心)
    BZOJ 2836 魔法树 链剖裸题~~
    BZOJ 3083 遥远的国度 树链剖分+脑子
    Luogu P1471 方差 线段树
  • 原文地址:https://www.cnblogs.com/renfanzi/p/14476550.html
Copyright © 2011-2022 走看看