zoukankan      html  css  js  c++  java
  • pandas 6 合并数据 concat, append 垂直合并,数据会变高/长

    from __future__ import print_function
    import pandas as pd
    import numpy as np
    

    concatenating

    # ignore index
    df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
    df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
    df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])
    
    print(df1)
    print(df2)
    print(df3)
    
    >      a    b    c    d
    > 0  0.0  0.0  0.0  0.0
    > 1  0.0  0.0  0.0  0.0
    > 2  0.0  0.0  0.0  0.0
    
    >      a    b    c    d
    > 0  1.0  1.0  1.0  1.0
    > 1  1.0  1.0  1.0  1.0
    > 2  1.0  1.0  1.0  1.0
    
    >      a    b    c    d
    > 0  2.0  2.0  2.0  2.0
    > 1  2.0  2.0  2.0  2.0
    > 2  2.0  2.0  2.0  2.0
    
    res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)  # 忽略掉原来的编号012012012重新排序成0
    print(res)
    
    >      a    b    c    d
    > 0  0.0  0.0  0.0  0.0
    > 1  0.0  0.0  0.0  0.0
    > 2  0.0  0.0  0.0  0.0
    > 3  1.0  1.0  1.0  1.0
    > 4  1.0  1.0  1.0  1.0
    > 5  1.0  1.0  1.0  1.0
    > 6  2.0  2.0  2.0  2.0
    > 7  2.0  2.0  2.0  2.0
    > 8  2.0  2.0  2.0  2.0
    
    join, ('inner', 'outer')
    # join, ('inner', 'outer')
    df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])
    df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
    print(df1)
    print(df2)
    
    >      a    b    c    d
    > 1  0.0  0.0  0.0  0.0
    > 2  0.0  0.0  0.0  0.0
    > 3  0.0  0.0  0.0  0.0
    
    >      b    c    d    e
    > 2  1.0  1.0  1.0  1.0
    > 3  1.0  1.0  1.0  1.0
    > 4  1.0  1.0  1.0  1.0
    
    res = pd.concat([df1, df2], axis=1, join='outer')  # 默认是outer,没有的属性值用NaN填充,求并集
    print(res)
    
    >      a    b    c    d    b    c    d    e
    > 1  0.0  0.0  0.0  0.0  NaN  NaN  NaN  NaN
    > 2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    > 3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    > 4  NaN  NaN  NaN  NaN  1.0  1.0  1.0  1.0
    
    res = pd.concat([df1, df2], axis=1, join='inner')  # 只寻找有相同属性的值,其他舍弃,求交集
    print(res)
    
    >      a    b    c    d    b    c    d    e
    > 2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    > 3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    
    join_axes
    # join_axes
    res = pd.concat([df1, df2], axis=1, join_axes=[df1.index])  # 保留df1,
    print(res)
    
    >      a    b    c    d    b    c    d    e
    > 1  0.0  0.0  0.0  0.0  NaN  NaN  NaN  NaN
    > 2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    > 3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
    

    append

    # append
    df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
    df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
    df3 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
    print(df1)
    print(df2)
    
    >      a    b    c    d
    > 0  0.0  0.0  0.0  0.0
    > 1  0.0  0.0  0.0  0.0
    > 2  0.0  0.0  0.0  0.0
    
    >      a    b    c    d
    > 0  1.0  1.0  1.0  1.0
    > 1  1.0  1.0  1.0  1.0
    > 2  1.0  1.0  1.0  1.0
    
    res = df1.append([df2], ignore_index=True)
    print(res)
    
    >      a    b    c    d
    > 0  0.0  0.0  0.0  0.0
    > 1  0.0  0.0  0.0  0.0
    > 2  0.0  0.0  0.0  0.0
    > 3  1.0  1.0  1.0  1.0
    > 4  1.0  1.0  1.0  1.0
    > 5  1.0  1.0  1.0  1.0
    
    res = df1.append([df2, df3])
    print(res)
    
    >      a    b    c    d    e
    > 0  0.0  0.0  0.0  0.0  NaN
    > 1  0.0  0.0  0.0  0.0  NaN
    > 2  0.0  0.0  0.0  0.0  NaN
    > 0  1.0  1.0  1.0  1.0  NaN
    > 1  1.0  1.0  1.0  1.0  NaN
    > 2  1.0  1.0  1.0  1.0  NaN
    > 2  NaN  1.0  1.0  1.0  1.0
    > 3  NaN  1.0  1.0  1.0  1.0
    > 4  NaN  1.0  1.0  1.0  1.0
    
    s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
    print(s1)
    
    > a    1
    > b    2
    > c    3
    > d    4
    > dtype: int64
    
    res = df1.append(s1, ignore_index=True)  # 添加具体的一行
    print(res)
    
    >      a    b    c    d
    > 0  0.0  0.0  0.0  0.0
    > 1  0.0  0.0  0.0  0.0
    > 2  0.0  0.0  0.0  0.0
    > 3  1.0  2.0  3.0  4.0
    

    END

  • 相关阅读:
    Ubuntu16.04下同时安装Anaconda2与Anaconda3
    ansible 常用模块
    docker 笔记 (7) 限制容器
    linux 磁盘
    docker 笔记 (6)搭建本地registry
    docker 笔记 (5)常用命令
    docker 笔记(4) Dockerfile 常用的指令
    NGINX下配置CACHE-CONTROL
    mysql二进制安装
    [Selenium] Explicit wait 方法
  • 原文地址:https://www.cnblogs.com/yangzhaonan/p/10436031.html
Copyright © 2011-2022 走看看