zoukankan      html  css  js  c++  java
  • pandas之数据合并

    import numpy as np
    import pandas as pd
    
    df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
    df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])
    df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])
    # print(df1)
    # print(df2)
    # print(df3)
    
    # 纵向合并
    res = pd.concat([df1, df2, df3], axis=0)
    print(res)
    # 重置index索引
    res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
    print(res)
    
    # 纵向外合并
    df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
    df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
    res = pd.concat([df3, df4], axis=1, join='outer', ignore_index=True)
    print(res)
    
    # 纵向内合并
    df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
    df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
    res = pd.concat([df3, df4], axis=0, join='inner', ignore_index=True)
    print(res)
    
    # 按照某个指定的轴来对齐数据
    df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
    df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
    res = pd.concat([df1, df2], axis=1, join_axes=[df1.index])
    print(res)
    
    # append合并
    df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
    df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
    res = df1.append(df2,ignore_index=True)
    print(res)
    
    输出结果:
    
         a    b    c    d
    0  0.0  0.0  0.0  0.0
    1  0.0  0.0  0.0  0.0
    2  0.0  0.0  0.0  0.0
    0  1.0  1.0  1.0  1.0
    1  1.0  1.0  1.0  1.0
    2  1.0  1.0  1.0  1.0
    0  2.0  2.0  2.0  2.0
    1  2.0  2.0  2.0  2.0
    2  2.0  2.0  2.0  2.0
         a    b    c    d
    0  0.0  0.0  0.0  0.0
    1  0.0  0.0  0.0  0.0
    2  0.0  0.0  0.0  0.0
    3  1.0  1.0  1.0  1.0
    4  1.0  1.0  1.0  1.0
    5  1.0  1.0  1.0  1.0
    6  2.0  2.0  2.0  2.0
    7  2.0  2.0  2.0  2.0
    8  2.0  2.0  2.0  2.0
         0    1    2    3    4    5    6    7
    1  1.0  1.0  1.0  1.0  NaN  NaN  NaN  NaN
    2  1.0  1.0  1.0  1.0  2.0  2.0  2.0  2.0
    3  1.0  1.0  1.0  1.0  2.0  2.0  2.0  2.0
    4  NaN  NaN  NaN  NaN  2.0  2.0  2.0  2.0
         b    c    d
    0  1.0  1.0  1.0
    1  1.0  1.0  1.0
    2  1.0  1.0  1.0
    3  2.0  2.0  2.0
    4  2.0  2.0  2.0
    5  2.0  2.0  2.0
         a    b    c    d    a    b    c    d
    0  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    1  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
         a    b    c    d
    0  0.0  0.0  0.0  0.0
    1  0.0  0.0  0.0  0.0
    2  0.0  0.0  0.0  0.0
    3  1.0  1.0  1.0  1.0
    4  1.0  1.0  1.0  1.0
    5  1.0  1.0  1.0  1.0
    import numpy as np
    import pandas as pd
    
    a = pd.Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a'])
    print(a)
    b = pd.Series([1, np.nan, 3, 4, 5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a'])
    print(b)
    
    # 用a的数据填充b的缺失值
    print(b.combine_first(a))
    # 用b的数据填充a的缺失值
    print(a.combine_first(b))
    
    输出结果:
    f    NaN
    e    2.5
    d    NaN
    c    3.5
    b    4.5
    a    NaN
    dtype: float64
    f    1.0
    e    NaN
    d    3.0
    c    4.0
    b    5.0
    a    NaN
    dtype: float64
    f    1.0
    e    2.5
    d    3.0
    c    4.0
    b    5.0
    a    NaN
    dtype: float64
    f    1.0
    e    2.5
    d    3.0
    c    3.5
    b    4.5
    a    NaN
    dtype: float64
  • 相关阅读:
    JavaScript操作符instanceof揭秘
    Linux打开txt文件乱码的解决方法
    Working copy locked run svn cleanup not work
    poj 2299 UltraQuickSort 归并排序求解逆序对
    poj 2312 Battle City 优先队列+bfs 或 记忆化广搜
    poj2352 stars 树状数组
    poj 2286 The Rotation Game 迭代加深
    hdu 1800 Flying to the Mars
    poj 3038 Children of the Candy Corn bfs dfs
    hdu 1983 Kaitou Kid The Phantom Thief (2) DFS + BFS
  • 原文地址:https://www.cnblogs.com/yuxiangyang/p/11286394.html
Copyright © 2011-2022 走看看