zoukankan      html  css  js  c++  java
  • panda库------对数据进行操作---合并,转换,拼接

     1 >>> frame2
     2        addr  age   name
     3 0   beijing   12  zhang
     4 1  shanghai   24     li
     5 2  hangzhou   24    cao
     6 >>> frame1
     7        addr   name
     8 0   beijing  zhang
     9 1  shanghai     li
    10 2  hangzhou    cao
    11 3  shenzhen    han
    12 >>> pd.merge(frame1,frame2)   以name列为连接进行拼接
    13        addr   name  age
    14 0   beijing  zhang   12
    15 1  shanghai     li   24
    16 2  hangzhou    cao   24
    17 >>> pd.merge(frame1,frame2,on='name')     指定  列  和拼接方式
    18      addr_x   name    addr_y  age
    19 0   beijing  zhang   beijing   12
    20 1  shanghai     li  shanghai   24
    21 2  hangzhou    cao  hangzhou   24
    22 >>> pd.merge(frame1,frame2,on='name',how='outer')
    23      addr_x   name    addr_y   age
    24 0   beijing  zhang   beijing  12.0
    25 1  shanghai     li  shanghai  24.0
    26 2  hangzhou    cao  hangzhou  24.0
    27 3  shenzhen    han       NaN   NaN
    28 >>> pd.merge(frame1,frame2,on='name',how='inner')
    29      addr_x   name    addr_y  age
    30 0   beijing  zhang   beijing   12
    31 1  shanghai     li  shanghai   24
    32 2  hangzhou    cao  hangzhou   24
    33 >>> pd.merge(frame1,frame2,on='name',how='left')
    34      addr_x   name    addr_y   age
    35 0   beijing  zhang   beijing  12.0
    36 1  shanghai     li  shanghai  24.0
    37 2  hangzhou    cao  hangzhou  24.0
    38 3  shenzhen    han       NaN   NaN
    39 >>> pd.merge(frame1,frame2,on='name',how='right')
    40      addr_x   name    addr_y  age
    41 0   beijing  zhang   beijing   12
    42 1  shanghai     li  shanghai   24
    43 2  hangzhou    cao  hangzhou   24
    44 >>> pd.merge(frame1,frame2,on='name',left_index=True)
    45      addr_x   name    addr_y  age
    46 0   beijing  zhang   beijing   12
    47 1  shanghai     li  shanghai   24
    48 2  hangzhou    cao  hangzhou   24
    49 >>> pd.merge(frame1,frame2,on='name',right_index=True)
    50      addr_x   name    addr_y  age
    51 0   beijing  zhang   beijing   12
    52 1  shanghai     li  shanghai   24
    53 2  hangzhou    cao  hangzhou   24
    54 >>> pd.merge(frame1,frame2,on='addr',right_index=True)
    55        addr name_x  age name_y
    56 0   beijing  zhang   12  zhang
    57 1  shanghai     li   24     li
    58 2  hangzhou    cao   24    cao
    1 >>> frame1.columns=['addr1','name1']
    2 >>> frame1.join(frame2)
    3       addr1  name1      addr   age   name    修改掉重复的列名称,然后join()
    4 0   beijing  zhang   beijing  12.0  zhang
    5 1  shanghai     li  shanghai  24.0     li
    6 2  hangzhou    cao  hangzhou  24.0    cao
    7 3  shenzhen    han       NaN   NaN    NaN
     1 >>> array1
     2 array([[0, 1, 2],
     3        [3, 4, 5],
     4        [6, 7, 8]])
     5 >>> array1=np.arange(9).reshape((3,3))+6
     6 >>> array2=np.arange(9).reshape((3,3))
     7 >>> array1
     8 array([[ 6,  7,  8],
     9        [ 9, 10, 11],
    10        [12, 13, 14]])
    11 >>> np.concatenate([array1,array2],axis=1)   np模块中对元组进行concatenate()
    12 array([[ 6,  7,  8,  0,  1,  2],
    13        [ 9, 10, 11,  3,  4,  5],
    14        [12, 13, 14,  6,  7,  8]])
    15 >>> np.concatenate([array1,array2],axis=0)
    16 array([[ 6,  7,  8],
    17        [ 9, 10, 11],
    18        [12, 13, 14],
    19        [ 0,  1,  2],
    20        [ 3,  4,  5],
    21        [ 6,  7,  8]])
    22 >>> 
    23 >>> np.concatenate([array1,array2])
    24 array([[ 6,  7,  8],
    25        [ 9, 10, 11],
    26        [12, 13, 14],
    27        [ 0,  1,  2],
    28        [ 3,  4,  5],
    29        [ 6,  7,  8]])
     1 >>> ser1=pd.Series(np.random.rand(4))   pd模块中也有concat()
     2 >>> ser1
     3 0    0.998915
     4 1    0.117503
     5 2    0.747180
     6 3    0.641508
     7 dtype: float64
     8 >>> ser1=pd.Series(np.random.rand(4)*100)
     9 >>> ser1
    10 0     8.818592
    11 1    42.317816
    12 2    43.274021
    13 3    23.245148
    14 dtype: float64
    15 >>> ser2=pd.Series(np.random.rand(4)*100,index=[5,6,7,8])
    16 >>> ser2
    17 5    58.416554
    18 6    11.840838
    19 7    38.146851
    20 8     0.135517
    21 dtype: float64
    22 >>> pd.concat([ser1,ser2])
    23 0     8.818592
    24 1    42.317816
    25 2    43.274021
    26 3    23.245148
    27 5    58.416554
    28 6    11.840838
    29 7    38.146851
    30 8     0.135517
    31 dtype: float64
    32 >>> pd.concat([ser1,ser2],axis=1)
    33            0          1
    34 0   8.818592        NaN
    35 1  42.317816        NaN
    36 2  43.274021        NaN
    37 3  23.245148        NaN
    38 5        NaN  58.416554
    39 6        NaN  11.840838
    40 7        NaN  38.146851
    41 8        NaN   0.135517
     1 >> pd.concat([ser1,ser2],axis=1,keys=[1,2])
     2            1          2
     3 0   8.818592        NaN
     4 1  42.317816  79.632793
     5 2  43.274021  96.700070
     6 3  23.245148  64.573269
     7 4        NaN  68.629709
     8 >>> ser2.index=[2,4,5,6]
     9 >>> ser2
    10 2    79.632793
    11 4    96.700070
    12 5    64.573269
    13 6    68.629709
    14 dtype: float64
    15 >>> ser1.combine_first(ser2)    对缺额的数据进行填充  combin_first()
    16 0     8.818592
    17 1    42.317816
    18 2    43.274021
    19 3    23.245148
    20 4    96.700070
    21 5    64.573269
    22 6    68.629709
    23 dtype: float64
     1 >>> ser1
     2 0    a
     3 1    b
     4 2    c
     5 3    d
     6 dtype: object
     7 >>> ser2
     8 2    0
     9 4    1
    10 5    2
    11 6    3
    12 dtype: int32
    13 >>> ser2.combine_first(ser1)   ser1在后
    14 0    a
    15 1    b
    16 2    0
    17 3    d
    18 4    1
    19 5    2
    20 6    3
    21 dtype: object
    22 >>> ser1[:2].combine_first(ser2)  ser1在前
    23 0    a
    24 1    b
    25 2    0
    26 4    1
    27 5    2
    28 6    3
    29 dtype: object
     1 >>> frame1=pd.DataFrame({'name':['zhang','li','wang'],'age':[12,45,34],'addr':['beijing','shanghai','shenzhen']})
     2 >>> frame1
     3        addr  age   name
     4 0   beijing   12  zhang
     5 1  shanghai   45     li
     6 2  shenzhen   34   wang
     7 >>> frame1.stack()        frame的进栈和出栈
     8 0  addr     beijing
     9    age           12
    10    name       zhang
    11 1  addr    shanghai
    12    age           45
    13    name          li
    14 2  addr    shenzhen
    15    age           34
    16    name        wang
    17 dtype: object
    18 >>> frame1.stack().unstack()
    19        addr age   name
    20 0   beijing  12  zhang
    21 1  shanghai  45     li
    22 2  shenzhen  34   wang
    23 >>> frame1.stack().unstack(0)   列和索引转换
    24             0         1         2
    25 addr  beijing  shanghai  shenzhen
    26 age        12        45        34
    27 name    zhang        li      wang
     1 >>> longframe=pd.DataFrame({'color':['white','white','white','red','red','red','black','black','black'],'item':['ball','pen','mug','ball','pen','mug','ball','pen','mug'],'value':np.random.rand(9)})
     2 >>> longframe
     3    color  item     value        对冗余的消除,将longframe转换为wideframe
     4 0  white  ball  0.260358
     5 1  white   pen  0.543955
     6 2  white   mug  0.456874
     7 3    red  ball  0.967021
     8 4    red   pen  0.657271
     9 5    red   mug  0.984256
    10 6  black  ball  0.550236
    11 7  black   pen  0.731625
    12 8  black   mug  0.006728
    13 >>> wideframe=longframe.pivot('color','item')
    14 >>> wideframe
    15           value                    
    16 item       ball       mug       pen
    17 color                              
    18 black  0.550236  0.006728  0.731625
    19 red    0.967021  0.984256  0.657271
    20 white  0.260358  0.456874  0.543955
    21 >>> frame1
    22        addr  age   name
    23 0   beijing   12  zhang
    24 1  shanghai   12     li
    25 2   beijing   12   wang
    26 >>> del frame['addr']
    27 Traceback (most recent call last):
    28   File "<pyshell#103>", line 1, in <module>
    29     del frame['addr']
    30 NameError: name 'frame' is not defined
    31 >>> del frame1['addr']
    32 >>> frame1
    33    age   name
    34 0   12  zhang
    35 1   12     li
    36 2   12   wang
  • 相关阅读:
    BZOJ3697: 采药人的路径
    解题:WC 2007 石头剪刀布
    解题:CQOI 2017 老C的方块
    解题:洛谷4314 CPU监控
    解题:CQOI 2017 老C的任务
    解题:CF1009 Dominant Indices
    解题:CF570D Tree Requests
    解题:APIO 2012 派遣
    解题:ZJOI 2015 幻想乡战略游戏
    解题:洛谷4178 Tree
  • 原文地址:https://www.cnblogs.com/caojunjie/p/6710981.html
Copyright © 2011-2022 走看看