from __future__ import print_function
import pandas as pd
import numpy as np
concatenating
# ignore index
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])
print(df1)
print(df2)
print(df3)
> a b c d
> 0 0.0 0.0 0.0 0.0
> 1 0.0 0.0 0.0 0.0
> 2 0.0 0.0 0.0 0.0
> a b c d
> 0 1.0 1.0 1.0 1.0
> 1 1.0 1.0 1.0 1.0
> 2 1.0 1.0 1.0 1.0
> a b c d
> 0 2.0 2.0 2.0 2.0
> 1 2.0 2.0 2.0 2.0
> 2 2.0 2.0 2.0 2.0
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True) # 忽略掉原来的编号012012012重新排序成0
print(res)
> a b c d
> 0 0.0 0.0 0.0 0.0
> 1 0.0 0.0 0.0 0.0
> 2 0.0 0.0 0.0 0.0
> 3 1.0 1.0 1.0 1.0
> 4 1.0 1.0 1.0 1.0
> 5 1.0 1.0 1.0 1.0
> 6 2.0 2.0 2.0 2.0
> 7 2.0 2.0 2.0 2.0
> 8 2.0 2.0 2.0 2.0
join, ('inner', 'outer')
# join, ('inner', 'outer')
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
print(df1)
print(df2)
> a b c d
> 1 0.0 0.0 0.0 0.0
> 2 0.0 0.0 0.0 0.0
> 3 0.0 0.0 0.0 0.0
> b c d e
> 2 1.0 1.0 1.0 1.0
> 3 1.0 1.0 1.0 1.0
> 4 1.0 1.0 1.0 1.0
res = pd.concat([df1, df2], axis=1, join='outer') # 默认是outer,没有的属性值用NaN填充,求并集
print(res)
> a b c d b c d e
> 1 0.0 0.0 0.0 0.0 NaN NaN NaN NaN
> 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
> 3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
> 4 NaN NaN NaN NaN 1.0 1.0 1.0 1.0
res = pd.concat([df1, df2], axis=1, join='inner') # 只寻找有相同属性的值,其他舍弃,求交集
print(res)
> a b c d b c d e
> 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
> 3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
join_axes
# join_axes
res = pd.concat([df1, df2], axis=1, join_axes=[df1.index]) # 保留df1,
print(res)
> a b c d b c d e
> 1 0.0 0.0 0.0 0.0 NaN NaN NaN NaN
> 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
> 3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
append
# append
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
print(df1)
print(df2)
> a b c d
> 0 0.0 0.0 0.0 0.0
> 1 0.0 0.0 0.0 0.0
> 2 0.0 0.0 0.0 0.0
> a b c d
> 0 1.0 1.0 1.0 1.0
> 1 1.0 1.0 1.0 1.0
> 2 1.0 1.0 1.0 1.0
res = df1.append([df2], ignore_index=True)
print(res)
> a b c d
> 0 0.0 0.0 0.0 0.0
> 1 0.0 0.0 0.0 0.0
> 2 0.0 0.0 0.0 0.0
> 3 1.0 1.0 1.0 1.0
> 4 1.0 1.0 1.0 1.0
> 5 1.0 1.0 1.0 1.0
res = df1.append([df2, df3])
print(res)
> a b c d e
> 0 0.0 0.0 0.0 0.0 NaN
> 1 0.0 0.0 0.0 0.0 NaN
> 2 0.0 0.0 0.0 0.0 NaN
> 0 1.0 1.0 1.0 1.0 NaN
> 1 1.0 1.0 1.0 1.0 NaN
> 2 1.0 1.0 1.0 1.0 NaN
> 2 NaN 1.0 1.0 1.0 1.0
> 3 NaN 1.0 1.0 1.0 1.0
> 4 NaN 1.0 1.0 1.0 1.0
s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
print(s1)
> a 1
> b 2
> c 3
> d 4
> dtype: int64
res = df1.append(s1, ignore_index=True) # 添加具体的一行
print(res)
> a b c d
> 0 0.0 0.0 0.0 0.0
> 1 0.0 0.0 0.0 0.0
> 2 0.0 0.0 0.0 0.0
> 3 1.0 2.0 3.0 4.0
END