zoukankan      html  css  js  c++  java
  • pandas之数据合并

    import numpy as np
    import pandas as pd
    
    df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
    df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])
    df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])
    # print(df1)
    # print(df2)
    # print(df3)
    
    # 纵向合并
    res = pd.concat([df1, df2, df3], axis=0)
    print(res)
    # 重置index索引
    res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
    print(res)
    
    # 纵向外合并
    df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
    df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
    res = pd.concat([df3, df4], axis=1, join='outer', ignore_index=True)
    print(res)
    
    # 纵向内合并
    df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
    df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
    res = pd.concat([df3, df4], axis=0, join='inner', ignore_index=True)
    print(res)
    
    # 按照某个指定的轴来对齐数据
    df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
    df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
    res = pd.concat([df1, df2], axis=1, join_axes=[df1.index])
    print(res)
    
    # append合并
    df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
    df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
    res = df1.append(df2,ignore_index=True)
    print(res)
    
    输出结果:
    
         a    b    c    d
    0  0.0  0.0  0.0  0.0
    1  0.0  0.0  0.0  0.0
    2  0.0  0.0  0.0  0.0
    0  1.0  1.0  1.0  1.0
    1  1.0  1.0  1.0  1.0
    2  1.0  1.0  1.0  1.0
    0  2.0  2.0  2.0  2.0
    1  2.0  2.0  2.0  2.0
    2  2.0  2.0  2.0  2.0
         a    b    c    d
    0  0.0  0.0  0.0  0.0
    1  0.0  0.0  0.0  0.0
    2  0.0  0.0  0.0  0.0
    3  1.0  1.0  1.0  1.0
    4  1.0  1.0  1.0  1.0
    5  1.0  1.0  1.0  1.0
    6  2.0  2.0  2.0  2.0
    7  2.0  2.0  2.0  2.0
    8  2.0  2.0  2.0  2.0
         0    1    2    3    4    5    6    7
    1  1.0  1.0  1.0  1.0  NaN  NaN  NaN  NaN
    2  1.0  1.0  1.0  1.0  2.0  2.0  2.0  2.0
    3  1.0  1.0  1.0  1.0  2.0  2.0  2.0  2.0
    4  NaN  NaN  NaN  NaN  2.0  2.0  2.0  2.0
         b    c    d
    0  1.0  1.0  1.0
    1  1.0  1.0  1.0
    2  1.0  1.0  1.0
    3  2.0  2.0  2.0
    4  2.0  2.0  2.0
    5  2.0  2.0  2.0
         a    b    c    d    a    b    c    d
    0  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    1  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
         a    b    c    d
    0  0.0  0.0  0.0  0.0
    1  0.0  0.0  0.0  0.0
    2  0.0  0.0  0.0  0.0
    3  1.0  1.0  1.0  1.0
    4  1.0  1.0  1.0  1.0
    5  1.0  1.0  1.0  1.0
    import numpy as np
    import pandas as pd
    
    a = pd.Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a'])
    print(a)
    b = pd.Series([1, np.nan, 3, 4, 5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a'])
    print(b)
    
    # 用a的数据填充b的缺失值
    print(b.combine_first(a))
    # 用b的数据填充a的缺失值
    print(a.combine_first(b))
    
    输出结果:
    f    NaN
    e    2.5
    d    NaN
    c    3.5
    b    4.5
    a    NaN
    dtype: float64
    f    1.0
    e    NaN
    d    3.0
    c    4.0
    b    5.0
    a    NaN
    dtype: float64
    f    1.0
    e    2.5
    d    3.0
    c    4.0
    b    5.0
    a    NaN
    dtype: float64
    f    1.0
    e    2.5
    d    3.0
    c    3.5
    b    4.5
    a    NaN
    dtype: float64
  • 相关阅读:
    redis
    sqlalchemy ORM
    元类的理解
    python连接mysql
    ffmpeg去水印
    ffmpeg给视频加文字水印
    yt-seo-checklist
    ffmpeg下载直播流
    ffmpeg拼接mp4视频
    ffmpeg截取视频
  • 原文地址:https://www.cnblogs.com/yuxiangyang/p/11286394.html
Copyright © 2011-2022 走看看