axis合并方向
import pandas as pd import pickle import numpy as np df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d']) df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d']) df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d']) #0表示竖项合并 1表示横项合并 ingnore_index重置序列index index变为0 1 2 3 4 5 6 7 8 res = pd.concat([df1, df2, df3], axis=0, ignore_index=True) print(res)
输出
a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 1.0 1.0 1.0 1.0 4 1.0 1.0 1.0 1.0 5 1.0 1.0 1.0 1.0 6 2.0 2.0 2.0 2.0 7 2.0 2.0 2.0 2.0 8 2.0 2.0 2.0 2.0
join合并方式
import pandas as pd import pickle import numpy as np df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3]) df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4]) print(df1) print(df2) res=pd.concat([df1,df2],axis=1,join='outer')#行往外进行合并,并集 print(res) res=pd.concat([df1,df2],axis=1,join='inner')#行相同的进行合并,合并都有的行,交集 print(res) res=pd.concat([df1,df2],axis=1,join_axes=[df1.index])#以df1的序列进行合并 df2中没有的序列NaN值填充 print(res)
输出
a b c d 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 0.0 0.0 0.0 0.0 b c d e 2 1.0 1.0 1.0 1.0 3 1.0 1.0 1.0 1.0 4 1.0 1.0 1.0 1.0 a b c d b c d e 1 0.0 0.0 0.0 0.0 NaN NaN NaN NaN 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 4 NaN NaN NaN NaN 1.0 1.0 1.0 1.0 a b c d b c d e 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 a b c d b c d e 1 0.0 0.0 0.0 0.0 NaN NaN NaN NaN 2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
append添加数据
import pandas as pd import pickle import numpy as np df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d']) df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d']) df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d']) print(df1) print(df2) print(df3) s1 = pd.Series([1,2,3,4], index=['a','b','c','d']) print(s1) #将df2合并到df1的下面 并重置index res=df1.append(df2,ignore_index=True) print(res) #将s1合并到df1下面 并重置index res=df1.append(s1,ignore_index=True) print(res)
输出
a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 a b c d 0 1.0 1.0 1.0 1.0 1 1.0 1.0 1.0 1.0 2 1.0 1.0 1.0 1.0 a b c d 0 2.0 2.0 2.0 2.0 1 2.0 2.0 2.0 2.0 2 2.0 2.0 2.0 2.0 a 1 b 2 c 3 d 4 dtype: int64 a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 1.0 1.0 1.0 1.0 4 1.0 1.0 1.0 1.0 5 1.0 1.0 1.0 1.0 a b c d 0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 3 1.0 2.0 3.0 4.0
Pandas合并merge
依据一组key合并
import pandas as pd import pickle import numpy as np left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']}) print(left) right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}) print(right) res=pd.merge(left,right,on='key') print(res)
输出
key A B 0 K0 A0 B0 1 K1 A1 B1 2 K2 A2 B2 3 K3 A3 B3 key C D 0 K0 C0 D0 1 K1 C1 D1 2 K2 C2 D2 3 K3 C3 D3 key A B C D 0 K0 A0 B0 C0 D0 1 K1 A1 B1 C1 D1 2 K2 A2 B2 C2 D2 3 K3 A3 B3 C3 D3
依据两组key合并
import pandas as pd import pickle import numpy as np left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], 'key2': ['K0', 'K1', 'K0', 'K1'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']}) print(left) print(" ") right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'], 'key2': ['K0', 'K0', 'K0', 'K0'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}) print(right) print(" ") #内联合并,即共有的key1-key2,交集 res=pd.merge(left,right,on=['key1','key2'],how='inner') print(res) print(" ") #外联合并,key1-key2,并集 res=pd.merge(left,right,on=['key1','key2'],how='outer') print(res) print(" ") #左联合并,使用左键值 res=pd.merge(left,right,on=['key1','key2'],how='left') print(res) print(" ") #右联合并,使用右键值 res=pd.merge(left,right,on=['key1','key2'],how='right') print(res)
输出
key1 key2 A B 0 K0 K0 A0 B0 1 K0 K1 A1 B1 2 K1 K0 A2 B2 3 K2 K1 A3 B3 key1 key2 C D 0 K0 K0 C0 D0 1 K1 K0 C1 D1 2 K1 K0 C2 D2 3 K2 K0 C3 D3 key1 key2 A B C D 0 K0 K0 A0 B0 C0 D0 1 K1 K0 A2 B2 C1 D1 2 K1 K0 A2 B2 C2 D2 key1 key2 A B C D 0 K0 K0 A0 B0 C0 D0 1 K0 K1 A1 B1 NaN NaN 2 K1 K0 A2 B2 C1 D1 3 K1 K0 A2 B2 C2 D2 4 K2 K1 A3 B3 NaN NaN 5 K2 K0 NaN NaN C3 D3 key1 key2 A B C D 0 K0 K0 A0 B0 C0 D0 1 K0 K1 A1 B1 NaN NaN 2 K1 K0 A2 B2 C1 D1 3 K1 K0 A2 B2 C2 D2 4 K2 K1 A3 B3 NaN NaN key1 key2 A B C D 0 K0 K0 A0 B0 C0 D0 1 K1 K0 A2 B2 C1 D1 2 K1 K0 A2 B2 C2 D2 3 K2 K0 NaN NaN C3 D3
import pandas as pd import pickle import numpy as np df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']}) print(df1) print(" ") df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]}) print(df2) print(" ") #依据col1进行合并 并启用indicator=True输出每项合并方式 res=pd.merge(df1,df2,on='col1',how='outer',indicator=True) print(res) print(" ") #自定义indicator column名称 res = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column') print(res) print(" ")
输出
col1 col_left 0 0 a 1 1 b col1 col_right 0 1 2 1 2 2 2 2 2 col1 col_left col_right _merge 0 0 a NaN left_only 1 1 b 2.0 both 2 2 NaN 2.0 right_only 3 2 NaN 2.0 right_only col1 col_left col_right indicator_column 0 0 a NaN left_only 1 1 b 2.0 both 2 2 NaN 2.0 right_only 3 2 NaN 2.0 right_only
依据index合并
import pandas as pd import pickle import numpy as np left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index=['K0', 'K1', 'K2']) print(left) print(" ") right = pd.DataFrame({'C': ['C0', 'C2', 'C3'], 'D': ['D0', 'D2', 'D3']}, index=['K0', 'K2', 'K3']) print(right) print(" ") #根据index索引进行合并 并选择外联合并 res=pd.merge(left,right,left_index=True,right_index=True,how='outer') print(res) print(" ") res=pd.merge(left,right,left_index=True,right_index=True,how='inner') print(res) print(" ")
输出
A B
K0 A0 B0
K1 A1 B1
K2 A2 B2
C D
K0 C0 D0
K2 C2 D2
K3 C3 D3
A B C D
K0 A0 B0 C0 D0
K1 A1 B1 NaN NaN
K2 A2 B2 C2 D2
K3 NaN NaN C3 D3
A B C D
K0 A0 B0 C0 D0
K2 A2 B2 C2 D2