zoukankan      html  css  js  c++  java
  • 预处理算法_4_表堆叠

    表堆叠主要是两个DataFrame进行数据合并

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    
    # <editable>
    
    
    def execute():
        # <editable>
    
        '''
        载入模块
        '''
        import pandas as pd
        from sqlalchemy import create_engine
        '''
        连接数据库
        '''
        engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
        '''
        选择目标数据
        '''
        params = {
            "left_columns": "id, score",
            "right_columns": "id, name",
            "left_on": "id",
            "right_on": "id",
            "method": 0,      # axis: 需要合并链接的轴,0是行,1是列
        }
        inputs = {"table_left": 'test', "table_right": "class"}
    
        if params['left_columns'] == '':
    
            left_sql = 'select * from ' + inputs['table_left']
            left = pd.read_sql_query(left_sql, engine)
        else:
            left_sql = 'select ' + params['left_columns'] + ' from ' + inputs['table_left']
            left = pd.read_sql_query(left_sql, engine)
        if params['right_columns'] == '':
            right_sql = 'select * from ' + inputs['table_right']
            right = pd.read_sql_query(right_sql, engine)
        else:
            right_sql = 'select ' + params['right_columns'] + ' from ' + inputs['table_right']
            right = pd.read_sql_query(right_sql, engine)
        # print(left)
        # print(right)
        '''
        合并数据
        '''
        data_out = pd.concat([left, right], axis=int(params['method']))
        # axis: 需要合并链接的轴,0是行,1是列
        '''
        将结果写出
        '''
        print(data_out)
    
        '''
        数据示例
        '''
    
        """
           id  score
        0   1   80.0
        1   2   20.0
        2   3    NaN
        3   4    5.0
        4   5    4.0
           id name
        0   1   张三
        1   2   李四
        2   3   王五
        3   4   赵六
        4   5   冯七
        5   6  朱重八
            id  score  id name
        0  1.0   80.0   1   张三
        1  2.0   20.0   2   李四
        2  3.0    NaN   3   王五
        3  4.0    5.0   4   赵六
        4  5.0    4.0   5   冯七
        5  NaN    NaN   6  朱重八
    
        ==========================
           id  score name
        0   1   80.0  NaN
        1   2   20.0  NaN
        2   3    NaN  NaN
        3   4    5.0  NaN
        4   5    4.0  NaN
        5   6   20.0  NaN
        0   1    NaN   张三
        1   2    NaN   李四
        2   3    NaN   王五
        3   4    NaN   赵六
        4   5    NaN   冯七
        5   6    NaN  朱重八
    
        """
    # </editable>
    
    
    if __name__ == '__main__':
        execute()
    作者:沐禹辰
    出处:http://www.cnblogs.com/renfanzi/
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。
  • 相关阅读:
    关于上网内容
    lua 学习笔记1
    庖丁解牛Linux基本系统组成分析
    使用163.com的Centos6 yum源,更新RHEL6系统
    安装FreeBSD 8.2
    虚拟机安装FreeBSD 8.2
    也谈苹果
    2011年国庆老家记录
    Common lisp 学习笔记
    JDBC | 第八章: JDBC常用数据库连接池c3p0,dbcp,durid,hikariCP,tomcatjdbc性能及区别
  • 原文地址:https://www.cnblogs.com/renfanzi/p/14476441.html
Copyright © 2011-2022 走看看