zoukankan      html  css  js  c++  java
  • 按照Key合并DateFrame

    import pandas as pd
    
    left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                         'A': ['A0', 'A1', 'A2', 'A3'],
                         'B': ['B0', 'B1', 'B2', 'B3']})
    print('left
    ', left)
    right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                          'C': ['C0', 'C1', 'C2', 'C3'],
                          'D': ['D0', 'D1', 'D2', 'D3']})
    print('right
    ', right)
    result = pd.merge(left, right, on='key')
    print('result
    ', result)

    输出

    /Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/PD/merge_key.py
    left
       key   A   B
    0  K0  A0  B0
    1  K1  A1  B1
    2  K2  A2  B2
    3  K3  A3  B3
    right
       key   C   D
    0  K0  C0  D0
    1  K1  C1  D1
    2  K2  C2  D2
    3  K3  C3  D3
    result
       key   A   B   C   D
    0  K0  A0  B0  C0  D0
    1  K1  A1  B1  C1  D1
    2  K2  A2  B2  C2  D2
    3  K3  A3  B3  C3  D3
    
    Process finished with exit code 0
    import pandas as pd
    
    df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3'],
                        'C': ['C0', 'C1', 'C2', 'C3'],
                        'D': ['D0', 'D1', 'D2', 'D3']},
                       index=[0, 1, 2, 3])
    
    print('df 1
    ', df1)
    
    df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                        'B': ['B4', 'B5', 'B6', 'B7'],
                        'C': ['C4', 'C5', 'C6', 'C7'],
                        'D': ['D4', 'D5', 'D6', 'D7']},
                       index=[4, 5, 6, 7])
    print('df2
    ', df2)
    
    df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                        'B': ['B8', 'B9', 'B10', 'B11'],
                        'C': ['C8', 'C9', 'C10', 'C11'],
                        'D': ['D8', 'D9', 'D10', 'D11']},
                       index=[8, 9, 10, 11])
    print('df3', df3)
    frames = [df1, df2, df3]
    print('frame 123
    ', frames)
    result = pd.concat(frames, keys=['x', 'y', 'z'])
    print('xyz
    ', result)
    print('loc y
    
    ')
    print(result.loc['y'])
    
    df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],
                        'D': ['D2', 'D3', 'D6', 'D7'],
                        'F': ['F2', 'F3', 'F6', 'F7']},
                       index=[2, 3, 6, 7])
    result_d1_d4_sort = pd.concat([df1, df4], axis=1, sort=False)
    print('result_d1_d4_sort
    
    ', result_d1_d4_sort)
    
    result_d1_d4_join_inner = pd.concat([df1, df4], axis=1, join='inner')
    print('result_d1_d4_join
    
    ', result_d1_d4_join_inner)
    输出
    /Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/PD/combine_index.py
    df 1
         A   B   C   D
    0  A0  B0  C0  D0
    1  A1  B1  C1  D1
    2  A2  B2  C2  D2
    3  A3  B3  C3  D3
    df2
         A   B   C   D
    4  A4  B4  C4  D4
    5  A5  B5  C5  D5
    6  A6  B6  C6  D6
    7  A7  B7  C7  D7
    df3       A    B    C    D
    8    A8   B8   C8   D8
    9    A9   B9   C9   D9
    10  A10  B10  C10  D10
    11  A11  B11  C11  D11
    frame 123
     [    A   B   C   D
    0  A0  B0  C0  D0
    1  A1  B1  C1  D1
    2  A2  B2  C2  D2
    3  A3  B3  C3  D3,     A   B   C   D
    4  A4  B4  C4  D4
    5  A5  B5  C5  D5
    6  A6  B6  C6  D6
    7  A7  B7  C7  D7,       A    B    C    D
    8    A8   B8   C8   D8
    9    A9   B9   C9   D9
    10  A10  B10  C10  D10
    11  A11  B11  C11  D11]
    xyz
             A    B    C    D
    x 0    A0   B0   C0   D0
      1    A1   B1   C1   D1
      2    A2   B2   C2   D2
      3    A3   B3   C3   D3
    y 4    A4   B4   C4   D4
      5    A5   B5   C5   D5
      6    A6   B6   C6   D6
      7    A7   B7   C7   D7
    z 8    A8   B8   C8   D8
      9    A9   B9   C9   D9
      10  A10  B10  C10  D10
      11  A11  B11  C11  D11
    loc y
    
    
        A   B   C   D
    4  A4  B4  C4  D4
    5  A5  B5  C5  D5
    6  A6  B6  C6  D6
    7  A7  B7  C7  D7
    result_d1_d4_sort
    
          A    B    C    D    B    D    F
    0   A0   B0   C0   D0  NaN  NaN  NaN
    1   A1   B1   C1   D1  NaN  NaN  NaN
    2   A2   B2   C2   D2   B2   D2   F2
    3   A3   B3   C3   D3   B3   D3   F3
    6  NaN  NaN  NaN  NaN   B6   D6   F6
    7  NaN  NaN  NaN  NaN   B7   D7   F7
    result_d1_d4_join
    
         A   B   C   D   B   D   F
    2  A2  B2  C2  D2  B2  D2  F2
    3  A3  B3  C3  D3  B3  D3  F3
    
    Process finished with exit code 0

    lambda 连接
    import pandas as pd
    
    df = pd.DataFrame({'Year': ['2014', '2015'], 'Quarter': ['q1', 'q2']})
    print('fist
    ', df)
    df['YearQuarter'] = df[['Year', 'Quarter']].apply(lambda x: '{}--{}'.format(x[0], x[1]), axis=1)
    print('new df
    ', df)

    输出

    /Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/PD/format.py
    fist
        Year Quarter
    0  2014      q1
    1  2015      q2
    new df
        Year Quarter YearQuarter
    0  2014      q1    2014--q1
    1  2015      q2    2015--q2
    
    Process finished with exit code 0

    merge suffixes

    import pandas as pd
    import numpy as np
    
    df1 = pd.DataFrame({'fruit': ['apple', 'banana', 'orange'] * 3,
                        'weight': ['high', 'medium', 'low'] * 3,
                        'price': np.random.randint(0, 15, 9)})
    print('df1', df1)
    df2 = pd.DataFrame({'pazham': ['apple', 'orange', 'pine'] * 2,
                        'kilo': ['high', 'low'] * 3,
                        'price': np.random.randint(0, 15, 6)})
    
    print('df2',df2)
    out = df1.merge(df2, left_on=('fruit', 'weight'), right_on=('pazham', 'kilo'), how='inner',
                    suffixes=('_left', '_right')).head(10)
    
    print('out', out)
    输出
    /Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/PD/combine_data.py
    df1     fruit  weight  price
    0   apple    high      1
    1  banana  medium     12
    2  orange     low     11
    3   apple    high     13
    4  banana  medium      6
    5  orange     low     13
    6   apple    high      6
    7  banana  medium     13
    8  orange     low      6
    df2    pazham  kilo  price
    0   apple  high      9
    1  orange   low      8
    2    pine  high      7
    3   apple   low     11
    4  orange  high      3
    5    pine   low      9
    out     fruit weight  price_left  pazham  kilo  price_right
    0   apple   high           1   apple  high            9
    1   apple   high          13   apple  high            9
    2   apple   high           6   apple  high            9
    3  orange    low          11  orange   low            8
    4  orange    low          13  orange   low            8
    5  orange    low           6  orange   low            8
    
    Process finished with exit code 0

    initialising _dictionary
    # Python code to demonstrate
    # to split dictionary
    # into keys and values
    
    # initialising _dictionary
    ini_dict = {'a': 'akshat', 'b': 'bhuvan', 'c': 'chandan'}
    
    # printing iniial_dictionary
    print("intial_dictionary", str(ini_dict))
    
    # split dictionary into keys and values
    keys = []
    values = []
    items = ini_dict.items()
    for item in items:
        keys.append(item[0]), values.append(item[1])
    
    # printing keys and values separately
    print("keys : ", str(keys))
    print("values : ", str(values))
    输出
    /Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/debug/split_items.py
    intial_dictionary {'a': 'akshat', 'b': 'bhuvan', 'c': 'chandan'}
    keys :  ['a', 'b', 'c']
    values :  ['akshat', 'bhuvan', 'chandan']
    
    Process finished with exit code 0

    zip(*ini_dict.items())
    # Python code to demonstrate
    # to split dictionary
    # into keys and values
    
    # initialising _dictionary
    ini_dict = {'a': 'akshat', 'b': 'bhuvan', 'c': 'chandan'}
    
    # printing iniial_dictionary
    print("intial_dictionary", str(ini_dict))
    
    # split dictionary into keys and values
    keys, values = zip(*ini_dict.items())
    
    # printing keys and values separately
    print("keys : ", str(keys))
    print("values : ", str(values))

    输出

    /Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/debug/split_zip_dict.py
    intial_dictionary {'a': 'akshat', 'b': 'bhuvan', 'c': 'chandan'}
    keys :  ('a', 'b', 'c')
    values :  ('akshat', 'bhuvan', 'chandan')
    
    Process finished with exit code 0

    拼接字典JSON合并LIST

    test_list = [{'userId': '55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5'}, {'userId': 'ac05eb4d-1e2f-4065-9f45-33f6f4579448'}]
    combine_list = []
    ids = ['55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5','ac05eb4d-1e2f-4065-9f45-33f6f4579448', 'xxxxx-1e2f-4065-9f45-33f6f4579448' ]
    x = {}
    for i in ids:
        # for x in range(len(ids)):
            x[f'userId'] = i
            combine_list.append(x.copy())
            print(combine_list)

    输出

    /Users/cloud/.conda/envs/auto/bin/python /Users/cloud/Downloads/project_static/debug/for_dict.py
    [{'userId': '55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5'}]
    [{'userId': '55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5'}, {'userId': 'ac05eb4d-1e2f-4065-9f45-33f6f4579448'}]
    [{'userId': '55b6a1da-01d9-4ae6-9ba8-6ebd2a485ca5'}, {'userId': 'ac05eb4d-1e2f-4065-9f45-33f6f4579448'}, {'userId': 'xxxxx-1e2f-4065-9f45-33f6f4579448'}]
    
    Process finished with exit code 0
  • 相关阅读:
    如何在Ubuntu Server 18.04上安装Microsoft的Procmon
    如何在Ubuntu 20.04上安装Wine 5.0
    如何在Kali Linux 2020中启用SSH服务
    如何在Ubuntu 20.04 LTS Focal Fossa上安装Apache Groovy
    如何使用命令在Ubuntu 20.04 Linux上安装Vmware Tools
    在Ubuntu 20.04 LTS Focal Fossa上安装Zabbix Agent
    hdu 2089 不要62
    hdu 2093 成绩排名
    hdu 2104 hide handkerchief
    leetcode147对链表进行插入排序
  • 原文地址:https://www.cnblogs.com/a00ium/p/13877503.html
Copyright © 2011-2022 走看看