zoukankan      html  css  js  c++  java
  • 2018.03.27 python pandas merge join 使用

    #2.16 合并 merge-join
    import numpy as np
    import pandas as pd
    df1 = pd.DataFrame({'key1':['k0','k1','k2','k3'],
                        'A':['A0','A1','A2','A3'],
                        'B':['B0','B1','B2','B3']})
    df2 = pd.DataFrame({'key1':['k0','k1','k2','k3'],
                        'C':['C0','C1','C2','C3'],
                        'D':['D0','D1','D2','D3']})
    df3 = pd.DataFrame({'key1':['k0','k1','k2','k3'],
                        'key2':['k0','k1','k0','k1'],
                        'A':['A0','A1','A2','A3'],
                        'B':['B0','B1','B2','B3']})
    df4 = pd.DataFrame({'key1':['k0','k1','k2','k3'],
                        'key2':['k0','k1','k0','k3'],
                        'C':['C0','C1','C2','C3'],
                        'D':['D0','D1','D2','D3']})
    print(df1)
    print(df2)
    print(pd.merge(df1,df2,on='key1'))
    print('------')
    #on 参考键
    
    print(df3)
    print(df4)
    print(pd.merge(df3,df4,on=['key1','key2']))#2个键值同时去匹配

    结果:

        A   B key1
    0  A0  B0   k0
    1  A1  B1   k1
    2  A2  B2   k2
    3  A3  B3   k3
        C   D key1
    0  C0  D0   k0
    1  C1  D1   k1
    2  C2  D2   k2
    3  C3  D3   k3
        A   B key1   C   D
    0  A0  B0   k0  C0  D0
    1  A1  B1   k1  C1  D1
    2  A2  B2   k2  C2  D2
    3  A3  B3   k3  C3  D3
    ------
        A   B key1 key2
    0  A0  B0   k0   k0
    1  A1  B1   k1   k1
    2  A2  B2   k2   k0
    3  A3  B3   k3   k1
        C   D key1 key2
    0  C0  D0   k0   k0
    1  C1  D1   k1   k1
    2  C2  D2   k2   k0
    3  C3  D3   k3   k3
        A   B key1 key2   C   D
    0  A0  B0   k0   k0  C0  D0
    1  A1  B1   k1   k1  C1  D1
    2  A2  B2   k2   k0  C2  D2
    #参数how 合并方式
    print(pd.merge(df3,df4,on=['key1','key2'],how='inner'))#取交集
    print(pd.merge(df3,df4,on=['key1','key2'],how='outer'))#并集  数据缺失用Nan
    print(pd.merge(df3,df4,on=['key1','key2'],how='left'))#按照df3为参考,数据缺失用Nan
    print(pd.merge(df3,df4,on=['key1','key2'],how='right'))#按照df4为参考,数据缺失用Nan

     结果:

        A   B key1 key2   C   D
    0  A0  B0   k0   k0  C0  D0
    1  A1  B1   k1   k1  C1  D1
    2  A2  B2   k2   k0  C2  D2
         A    B key1 key2    C    D
    0   A0   B0   k0   k0   C0   D0
    1   A1   B1   k1   k1   C1   D1
    2   A2   B2   k2   k0   C2   D2
    3   A3   B3   k3   k1  NaN  NaN
    4  NaN  NaN   k3   k3   C3   D3
        A   B key1 key2    C    D
    0  A0  B0   k0   k0   C0   D0
    1  A1  B1   k1   k1   C1   D1
    2  A2  B2   k2   k0   C2   D2
    3  A3  B3   k3   k1  NaN  NaN
         A    B key1 key2   C   D
    0   A0   B0   k0   k0  C0  D0
    1   A1   B1   k1   k1  C1  D1
    2   A2   B2   k2   k0  C2  D2
    3  NaN  NaN   k3   k3  C3  D3
    #参数left_on right_on left_index right_index -》当键不在一个列时,可以单独设置左键或右键
    df1 = pd.DataFrame({'key':list('abdcjeu'),
                        'data1':range(7)})
    df2 = pd.DataFrame({'rkey':list('abc'),
                        'data2':range(3)})
    print(df1,'
    ',df2)
    print(pd.merge(df1,df2,left_on = 'key',right_on = 'rkey'))#当2个dataframe中的主键名称不一致时,用left_on 和right_on 去指定键值

    结果:

       data1 key
    0      0   a
    1      1   b
    2      2   d
    3      3   c
    4      4   j
    5      5   e
    6      6   u 
        data2 rkey
    0      0    a
    1      1    b
    2      2    c
       data1 key  data2 rkey
    0      0   a      0    a
    1      1   b      1    b
    2      3   c      2    c
    df1 = pd.DataFrame({'key':list('abdcjeu'),
                        'data1':range(7)})
    df2 = pd.DataFrame({'data2':range(100,105)},index = list('abcde'))
    print(df1)
    print(df2)
    print(pd.merge(df1,df2,left_on='key',right_index=True,sort=True))#以index为键 作为左表 key对应的键值对  sort是否按照key排序
       data1 key
    0      0   a
    1      1   b
    2      2   d
    3      3   c
    4      4   j
    5      5   e
    6      6   u
       data2
    a    100
    b    101
    c    102
    d    103
    e    104
       data1 key  data2
    0      0   a    100
    1      1   b    101
    3      3   c    102
    2      2   d    103
    5      5   e    104
    #pd.join 直接通过索引链接
    left = pd.DataFrame({'A':['A0','A1','A2','A3'],
                        'B':['B0','B1','B2','B3']},
                       index = ['k0','k1','k2','k4'])
    right = pd.DataFrame({'C':['C0','C1','C2','C3'],
                        'D':['D0','D1','D2','D3']},
                       index = ['k0','k1','k2','k3'])
    print(left)
    print(right)
    print(left.join(right))
    print(left.join(right,how='outer'))#拓展

    结果:

         A   B
    k0  A0  B0
    k1  A1  B1
    k2  A2  B2
    k4  A3  B3
         C   D
    k0  C0  D0
    k1  C1  D1
    k2  C2  D2
    k3  C3  D3
         A   B    C    D
    k0  A0  B0   C0   D0
    k1  A1  B1   C1   D1
    k2  A2  B2   C2   D2
    k4  A3  B3  NaN  NaN
          A    B    C    D
    k0   A0   B0   C0   D0
    k1   A1   B1   C1   D1
    k2   A2   B2   C2   D2
    k3  NaN  NaN   C3   D3
    k4   A3   B3  NaN  NaN
    df1 = pd.DataFrame({'key':list('bbacaab'),
                        'data1':range(7)})
    df2 = pd.DataFrame({'key':list('abc'),
                        'data2':range(3)})
    print(pd.merge(df1,df2,left_index=True,right_index=True,suffixes=('_1','_2')))
    print(df1.join(df2['data2']))
    print('------')#当df1 df2的key相同时,使用suffixes 两个相同的key 成为 key_1 key_2

    结果:

       data1 key_1  data2 key_2
    0      0     b      0     a
    1      1     b      1     b
    2      2     a      2     c
       data1 key  data2
    0      0   b    0.0
    1      1   b    1.0
    2      2   a    2.0
    3      3   c    NaN
    4      4   a    NaN
    5      5   a    NaN
    6      6   b    NaN
    left = pd.DataFrame({'A':['A0','A1','A2','A3'],
                        'B':['B0','B1','B2','B3'],
                        'key':['k0','k1','k0','k3']})
    right = pd.DataFrame({'C':['C0','C1'],
                        'D':['D0','D1']},
                       index = ['k0','k1'])
    print(left)
    print(right)
    print(left.join(right,on='key'))#用left的key和 right的index 合并

    结果:

        A   B key
    0  A0  B0  k0
    1  A1  B1  k1
    2  A2  B2  k0
    3  A3  B3  k3
         C   D
    k0  C0  D0
    k1  C1  D1
        A   B key    C    D
    0  A0  B0  k0   C0   D0
    1  A1  B1  k1   C1   D1
    2  A2  B2  k0   C0   D0
    3  A3  B3  k3  NaN  NaN
    
  • 相关阅读:
    如何面试程序员?
    类似猪八戒网的网站
    存储过程
    一个不错的网站(博客制作参考)
    用触发器来实现级联更新级联删除
    用触发器进行级联删除
    数据库触发器详解
    浅谈数据库中的存储过程
    JDBC连接数据库
    Java递归函数
  • 原文地址:https://www.cnblogs.com/jxzhu/p/8658237.html
Copyright © 2011-2022 走看看