zoukankan      html  css  js  c++  java
  • oracle 数据库表字段的缺失值统计 基于python

      参考资料:python 连接oracle -- sqlalchemy及cx_Oracle的使用详解

      oracle指定表缺失值统计 -- 基于cx_Oracle

    import pandas as pd
    import cx_Oracle as orcl
    
    # 批量查询数据缺失率
    def missing_count(table_name, where_condition={}, **engine):
        #where 条件参数化, str或dict
        sql_tab_columns = "select column_name from user_tab_columns \
            where table_name = '{}'".format(table_name)
    
        db = ConnectOracle(**engine)
        #sql_select.encode('utf-8')
        columns = db.select_oracle(sql=sql_tab_columns)
        
        #生成select语句
        ss = ''
        for col in columns.COLUMN_NAME:
            ss += 'sum(decode({},null, 1, 0)) as {}, '.format(col, col)
        ss = ss[:-2]
        
        #生成where条件
        wh = ''
        if where_condition:
            wh += ' where '
            if type(where_condition)==str:
                wh += where_condition
            if type(where_condition)==dict:
                for key in where_condition.keys():
                    if type(where_condition[key])!=str:
                        wh += ('t.' + str(key) + ' = ' +
                               str(where_condition[key]) + ' and ')
                    else:
                        wh += ("t." + str(key) + " = '" +
                               str(where_condition[key]) + "' and ")
                wh = wh[:-4]
        
        #print(ss)
        sql_select =  '''select count(*) as counts, {}
                        from {} t {}
                        '''.format(ss, table_name, wh)
        
        #print(sql_select)
        res = db.select_oracle(sql=sql_select)
        return pd.Series(res.values.tolist()[0], index=res.columns)
    

      缺失值统计2 -- 基于sqlalchemy

    import pandas as pd
    #import cx_Oracle as orcl
    from sqlalchemy import create_engine
    
    # 批量查询数据缺失率
    def missing_count(table_name, where_condition={}, **config):
        #where 条件参数化, str或dict
        
        #定义数据库连接
        #'oracle://qmcbrt:qmcbrt@10.85.31.20:1521/tqmcbdb'
        engine = 'oracle://{username}:{passwd}@{host}:{port}/{sid}'.format(**config)  #dbname -- 各版本语法不同
        db = create_engine(engine)
        #pd.read_sql_query(sql_tab_columns, db)
        #db.execute('truncate table {}'.format(ttb))
        
        #查询列名 -- 用于生成select项
        sql_tab_columns = "select column_name from user_tab_columns where table_name = '{}'".format(table_name)
        columns = pd.read_sql_query(sql_tab_columns, db)
         
        #生成select项
        ss = ''
        for col in columns.column_name:
            ss += 'sum(decode({}, null, 1, 0)) as {}, '.format(col, col)
        ss = ss[:-2]
         
        #生成where条件
        wh = ''
        if where_condition:
            wh += ' where '
            if type(where_condition)==str:
                wh += where_condition
            if type(where_condition)==dict:
                for key in where_condition.keys():
                    if type(where_condition[key])!=str:
                        wh += ('t.' + str(key) + ' = ' +
                               str(where_condition[key]) + ' and ')
                    else:
                        wh += ("t." + str(key) + " = '" +
                               str(where_condition[key]) + "' and ")
                wh = wh[:-4]
         
        #select语句
        sql_select =  '''select count(*) as counts, {} from {} t {} '''.format(ss, table_name, wh)
         
        #pd.Series(res.values.tolist()[0], index=res.columns)
        res = pd.read_sql_query(sql_select, db)
        return res.iloc[0,:]
    

      示例

    config = {
            'username':'qmcb',
            'passwd':'qmcb',
            'host':'localhost',
            'port':'1521',
            'sid':'tqmcbdb'
            }
    where_condition = {
                     'is_normal': 1,
                     'is_below_16': 0,
                     'is_xs': 0,
                     'is_cj': 0,
                     'is_dead': 0,
                     'AAE138_is_not_null': 0,
                     'is_dc': 0,
                     'is_px': 0
                     }
    # 计算 QMCB_KM_2019_1_31_1 表的数据缺失数
    missing_count('QMCB_KM_2019_1_31_1', where_condition, **config)
    

      

      

  • 相关阅读:
    大数据存储技术_磁盘与阵列技术
    OpenMP Programming
    大数据存储技术_背景
    群ping
    Markdown使用说明
    随笔记录--清楚sqlserver r2 的连接记录
    ORACLE备份保留策略CONFIGURE RETENTION POLICY
    ORA-00257:archiver error.Connect internal only, until freed
    expdp数据泵导出日志信息不全的问题
    处理程序“ExtensionlessUrlHandler-Integrated-4.0”在其模块列表中有一个错误模块“ManagedPipelineHandler”
  • 原文地址:https://www.cnblogs.com/iupoint/p/10438212.html
Copyright © 2011-2022 走看看