zoukankan      html  css  js  c++  java
  • DataFrame合并:轴向链接concat

    from pandas import DataFrame,Series
    import pandas as pd
    import numpy as np
    
    arr = np.arange(12).reshape((3,4))
    print(arr)
    '''
    [[ 0  1  2  3]
     [ 4  5  6  7]
     [ 8  9 10 11]]
    '''
    print(np.concatenate([arr,arr],axis=1))
    '''
    [[ 0  1  2  3  0  1  2  3]
     [ 4  5  6  7  4  5  6  7]
     [ 8  9 10 11  8  9 10 11]]
    '''
    
    s1 = Series([0,1],index=['a','b'])
    s2 = Series([2,3,4],index=['c','d','e'])
    s3 = Series([5,6],index=['f','g'])
    print(s1)
    '''
    a    0
    b    1
    dtype: int64
    '''
    print(s2)
    '''
    c    2
    d    3
    e    4
    dtype: int64
    '''
    print(s3)
    '''
    f    5
    g    6
    dtype: int64
    '''
    print(pd.concat([s1,s2,s3]))
    '''
    a    0
    b    1
    c    2
    d    3
    e    4
    f    5
    g    6
    dtype: int64
    '''
    # concat是在axis=0上工作的,产生一个新的series,如果传入axis=1,结果会变成dataframe(axis=1是列)
    print(pd.concat([s1,s2,s3],axis=1,sort=True))
    '''
         0    1    2
    a  0.0  NaN  NaN
    b  1.0  NaN  NaN
    c  NaN  2.0  NaN
    d  NaN  3.0  NaN
    e  NaN  4.0  NaN
    f  NaN  NaN  5.0
    g  NaN  NaN  6.0
    '''
    
    s4 = Series([0,1],index=['a','b'])
    s5 = Series([2,3,4],index=['a','b','c'])
    print(pd.concat([s4,s5]))  # 轴向链接
    '''
    a    0
    b    1
    a    2
    b    3
    c    4
    dtype: int64
    '''
    print(pd.concat([s4,s5],axis=1,sort=True)) # 外连接,并集
    '''
         0  1
    a  0.0  2
    b  1.0  3
    c  NaN  4
    '''
    print(pd.concat([s4,s5],axis=1,join='inner')) # 内连接,交集
    '''
       0  1
    a  0  2
    b  1  3
    '''
    print(pd.concat([s4,s5],axis=1,join_axes=[['a','b','c','d']])) # 使用join_axes指定索引
    '''
         0    1
    a  0.0  2.0
    b  1.0  3.0
    c  NaN  4.0
    d  NaN  NaN
    '''
    print(pd.concat([s1,s2,s3],keys=['A','B','C'])) # 连接的片段在结果可使用keys区分
    '''
    A  a    0
       b    1
    B  c    2
       d    3
       e    4
    C  f    5
       g    6
    dtype: int64
    '''
    # 如果axis=1,keys就会变成dataframe的列头
    print(pd.concat([s1,s2,s3],keys=['A','B','C'],axis=1,sort=True))
    '''
         A    B    C
    a  0.0  NaN  NaN
    b  1.0  NaN  NaN
    c  NaN  2.0  NaN
    d  NaN  3.0  NaN
    e  NaN  4.0  NaN
    f  NaN  NaN  5.0
    g  NaN  NaN  6.0
    '''
    
    df1 = DataFrame(np.arange(6).reshape((3,2)),index=['a','b','c'],columns=['one','two'])
    df2 = DataFrame(np.arange(4).reshape((2,2)),index=['a','c'],columns=['three','four'])
    print(df1)
    '''
       one  two
    a    0    1
    b    2    3
    c    4    5
    '''
    print(df2)
    '''
       three  four
    a      0     1
    c      2     3
    '''
    print(pd.concat([df1,df2],sort=True))
    '''
       four  one  three  two
    a   NaN  0.0    NaN  1.0
    b   NaN  2.0    NaN  3.0
    c   NaN  4.0    NaN  5.0
    a   1.0  NaN    0.0  NaN
    c   3.0  NaN    2.0  NaN
    '''
    print(pd.concat([df1,df2],axis=1,sort=True))
    '''
       one  two  three  four
    a    0    1    0.0   1.0
    b    2    3    NaN   NaN
    c    4    5    2.0   3.0
    '''
    print(pd.concat([df1,df2],axis=1,keys=['level1','level2'],names=['upper','lower'],sort=True))
    '''
    upper level1     level2     
    lower    one two  three four
    a          0   1    0.0  1.0
    b          2   3    NaN  NaN
    c          4   5    2.0  3.0
    '''
  • 相关阅读:
    LeetCode 81 Search in Rotated Sorted Array II(循环有序数组中的查找问题)
    LeetCode 80 Remove Duplicates from Sorted Array II(移除数组中出现两次以上的元素)
    LeetCode 79 Word Search(单词查找)
    LeetCode 78 Subsets (所有子集)
    LeetCode 77 Combinations(排列组合)
    LeetCode 50 Pow(x, n) (实现幂运算)
    LeetCode 49 Group Anagrams(字符串分组)
    LeetCode 48 Rotate Image(2D图像旋转问题)
    LeetCode 47 Permutations II(全排列)
    LeetCode 46 Permutations(全排列问题)
  • 原文地址:https://www.cnblogs.com/nicole-zhang/p/14481538.html
Copyright © 2011-2022 走看看