zoukankan      html  css  js  c++  java
  • numpy&pandas补充常用示例

    Numpy

    【数组切片】

    In [115]: a = np.arange(12).reshape((3,4))                                                                                                                                                        
    
    In [116]: a                                                                                                                                                                                       
    Out[116]: 
    array([[ 0,  1,  2,  3],
           [ 4,  5,  6,  7],
           [ 8,  9, 10, 11]])
    
    In [117]: a[1:,1:3]                                                                                                                                                                               
    Out[117]: 
    array([[ 5,  6],
           [ 9, 10]])
    
    In [118]:
    

      

     【布尔值索引】找出数据中大于5的元素

    In [134]: a = [ random.randint(0,10) for i in range(20) ]                                                                                                                                         
    
    In [135]: a                                                                                                                                                                                       
    Out[135]: [4, 4, 9, 2, 2, 5, 6, 4, 3, 9, 5, 7, 10, 4, 9, 10, 6, 10, 3, 8]
    
    In [136]: _                                                                                                                                                                                       
    Out[136]: [4, 4, 9, 2, 2, 5, 6, 4, 3, 9, 5, 7, 10, 4, 9, 10, 6, 10, 3, 8]
    
    In [137]: a = np.array(a)                                                                                                                                                                         
    
    In [138]: a                                                                                                                                                                                       
    Out[138]: 
    array([ 4,  4,  9,  2,  2,  5,  6,  4,  3,  9,  5,  7, 10,  4,  9, 10,  6,
           10,  3,  8])
    
    In [139]: a>5                                                                                                                                                                                     
    Out[139]: 
    array([False, False,  True, False, False, False,  True, False, False,
            True, False,  True,  True, False,  True,  True,  True,  True,
           False,  True])
    
    In [140]: a[a>5]                                                                                                                                                                                  
    Out[140]: array([ 9,  6,  9,  7, 10,  9, 10,  6, 10,  8])
    
    In [141]:
    

    【布尔值索引】

    - 找出数组中大于5的偶数元素

    - 找出数组中大于5的书 或 偶数

    In [143]: b = a[a>5]                                                                                                                                                                              
    
    In [144]: b                                                                                                                                                                                       
    Out[144]: array([ 9,  6,  9,  7, 10,  9, 10,  6, 10,  8])
    
    In [145]: b[b%2==0]                                                                                                                                                                               
    Out[145]: array([ 6, 10, 10,  6, 10,  8])
    
    In [146]: a[(a>5) & (a%2==0)]                                                                                                                                                                     
    Out[146]: array([ 6, 10, 10,  6, 10,  8])
    
    In [147]:                                                                                                                                                                                         
    
    In [147]: a[(a>5) and (a%2==0)]                                                                                                                                                                   
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-147-fee222ad41de> in <module>
    ----> 1 a[(a>5) and (a%2==0)]
    
    ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
    
    In [148]:                                                                                                                                                                                                                                                                                                                                                                                 
    
    In [148]: a[(a>5) | (a%2==0)]                                                                                                                                                                     
    Out[148]: array([ 4,  4,  9,  2,  2,  6,  4,  9,  7, 10,  4,  9, 10,  6, 10,  8])
    
    In [149]: a[(a>5) or (a%2==0)]                                                                                                                                                                    
    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-149-732531989282> in <module>
    ----> 1 a[(a>5) or (a%2==0)]
    
    ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
    
    In [150]: 
    

      

    【花式索引(索引位置无规律)】

    # 一位数组花式索引示例
    In [153]: a = np.arange(10,20)                                                                                                                                                                    
    
    In [154]: a                                                                                                                                                                                       
    Out[154]: array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])
    
    In [155]: a[[1,3,4,9]]                                                                                                                                                                            
    Out[155]: array([11, 13, 14, 19])
    
    In [156]:
    
    # 二维数组花式索引示例一(取第二行的第二列和第三列)
    In [165]: a                                                                                                                                                                                       
    Out[165]: 
    array([[ 0,  1,  2,  3,  4],
           [ 5,  6,  7,  8,  9],
           [10, 11, 12, 13, 14],
           [15, 16, 17, 18, 19]])
    
    In [166]: a[2,[2,3]]                                                                                                                                                                              
    Out[166]: array([12, 13])
    
    In [167]: 
    
    # 二维数组花式索引示例二 (行:取第一行和第三行,列:取第一列和第三列)
    In [167]: a                                                                                                                                                                                       
    Out[167]: 
    array([[ 0,  1,  2,  3,  4],
           [ 5,  6,  7,  8,  9],
           [10, 11, 12, 13, 14],
           [15, 16, 17, 18, 19]])
    
    In [168]: a[[1,3],[2,4]]        # 注意:在对行和列同时使用花式索引时,解释效果不一样                                                                                                                                                             
    Out[168]: array([ 7, 19])
    
    In [169]: a[[1,3],:]                                                                                                                                                                              
    Out[169]: 
    array([[ 5,  6,  7,  8,  9],
           [15, 16, 17, 18, 19]])
    
    In [170]: a[[1,3],:][:,[2,4]]                                                                                                                                                                     
    Out[170]: 
    array([[ 7,  9],
           [17, 19]])
    
    In [171]: 
    

    【数值取整问题】

      向0取整(int) 四舍五入(round) 向上取整(math.ceil) 向下取整(math.floor)
    1.7 1 2 2 1
    -1.7 -1 -2 -1 -2
    1.3 1 1 2 1
    -1.3 -1 -1 -1 -2
    In [53]: a = np.arange(-5.5,5.5)                                                                                                                                                                  
    
    In [54]: a                                                                                                                                                                                        
    Out[54]: array([-5.5, -4.5, -3.5, -2.5, -1.5, -0.5,  0.5,  1.5,  2.5,  3.5,  4.5])
    
    In [55]: np.trunc(a)         # numpy向0取整                                                                                                                                                                     
    Out[55]: array([-5., -4., -3., -2., -1., -0.,  0.,  1.,  2.,  3.,  4.])
    
    In [56]: np.round(a)         # numpy四舍五入取整                                                                                                                                                                     
    Out[56]: array([-6., -4., -4., -2., -2., -0.,  0.,  2.,  2.,  4.,  4.])
    
    In [57]: np.rint(a)          # numpy.rint() 等价于 numpy.round()                                                                                                                                                                     
    Out[57]: array([-6., -4., -4., -2., -2., -0.,  0.,  2.,  2.,  4.,  4.])
    
    In [58]: np.ceil(a)          # numpy向上取整                                                                                                                                                                     
    Out[58]: array([-5., -4., -3., -2., -1., -0.,  1.,  2.,  3.,  4.,  5.])
    
    In [59]: np.floor(a)         # numpy向下取整                                                                                                                                                                     
    Out[59]: array([-6., -5., -4., -3., -2., -1.,  0.,  1.,  2.,  3.,  4.])
    
    In [60]: 
    

      

    numpy.modf():把整数和小数分开

    In [62]: a                                                                                                                                                                                        
    Out[62]: array([-5.5, -4.5, -3.5, -2.5, -1.5, -0.5,  0.5,  1.5,  2.5,  3.5,  4.5])
    
    In [63]: x,y = np.modf(a)                                                                                                                                                                         
    
    In [64]: x                                                                                                                                                                                        
    Out[64]: array([-0.5, -0.5, -0.5, -0.5, -0.5, -0.5,  0.5,  0.5,  0.5,  0.5,  0.5])
    
    In [65]: y                                                                                                                                                                                        
    Out[65]: array([-5., -4., -3., -2., -1., -0.,  0.,  1.,  2.,  3.,  4.])
    
    In [66]: x+y                                                                                                                                                                                      
    Out[66]: array([-5.5, -4.5, -3.5, -2.5, -1.5, -0.5,  0.5,  1.5,  2.5,  3.5,  4.5])
    
    In [67]: 
    

      

    numpy.nan

    In [85]: np.nan == np.nan                                                                                                                                                                         
    Out[85]: False
    
    In [86]: np.nan is np.nan                                                                                                                                                                         
    Out[86]: True
    
    In [87]:
    In [77]: a = np.arange(5)                                                                                                                                                                         
    
    In [78]: b = a/a                                                                                                                                                                                  
    /usr/bin/ipython3:1: RuntimeWarning: invalid value encountered in true_divide
      #!/usr/local/python3.6/bin/python3.6
    
    In [79]: b                                                                                                                                                                                        
    Out[79]: array([nan,  1.,  1.,  1.,  1.])
    
    In [80]: np.*nan?                                                                                                                                                                                 
    np.isnan
    np.nan
    
    In [81]: np.isnan(b)                                                                                                                                                                              
    Out[81]: array([ True, False, False, False, False])
    
    In [82]: b[np.isnan(b)]                                                                                                                                                                           
    Out[82]: array([nan])
    
    In [83]: b[~np.isnan(b)]        # 取反                                                                                                                                                                  
    Out[83]: array([1., 1., 1., 1.])
    
    In [84]:
    

    numpy.inf

    In [97]: np.inf == np.inf                                                                                                                                                                         
    Out[97]: True
    
    In [98]: np.inf is np.inf                                                                                                                                                                         
    Out[98]: True
    
    In [99]: 
    In [89]: a = np.arange(3,8)                                                                                                                                                                       
    
    In [90]: a                                                                                                                                                                                        
    Out[90]: array([3, 4, 5, 6, 7])
    
    In [91]: b = [1,0,1,0,1]                                                                                                                                                                          
    
    In [92]: c = a/b                                                                                                                                                                                  
    /usr/bin/ipython3:1: RuntimeWarning: divide by zero encountered in true_divide
      #!/usr/local/python3.6/bin/python3.6
    
    In [93]: c                                                                                                                                                                                        
    Out[93]: array([ 3., inf,  5., inf,  7.])
    
    In [94]: c[c!=np.inf]                                                                                                                                                                             
    Out[94]: array([3., 5., 7.])
    
    
    In [96]: c[~np.isinf(c)]                                                                                                                                                                          
    Out[96]: array([3., 5., 7.])
    
    In [97]:
    

      

    numpy.maximum 和 numpy.minimum

    In [102]: a                                                                                                                                                                                       
    Out[102]: array([3, 4, 5, 6, 7])
    
    In [103]: b                                                                                                                                                                                       
    Out[103]: array([2, 5, 3, 7, 4])
    
    In [104]: np.maximum(a,b)                                                                                                                                                                         
    Out[104]: array([3, 5, 5, 7, 7])
    
    In [105]: np.minimum(a,b)                                                                                                                                                                         
    Out[105]: array([2, 4, 3, 6, 4])
    
    In [106]: 
    

      

    Pandas

    series整数索引问题,推荐多使用iloc

    In [137]: s1 =pd.Series(np.arange(10))                                                                                                                                                            
    
    In [138]: s2 = s1[5:].copy()                                                                                                                                                                      
    
    In [139]: s1                                                                                                                                                                                      
    Out[139]: 
    0    0
    1    1
    2    2
    3    3
    4    4
    5    5
    6    6
    7    7
    8    8
    9    9
    dtype: int64
    
    In [140]: s2                                                                                                                                                                                      
    Out[140]: 
    5    5
    6    6
    7    7
    8    8
    9    9
    dtype: int64
    
    In [141]:
    
    In [147]: s2[5]                                                                                                                                                                                   
    Out[147]: 5
    
    In [148]: s2.loc[5]                                                                                                                                                                               
    Out[148]: 5
    
    In [149]: s2.iloc[0]                                                                                                                                                                              
    Out[149]: 5
    
    In [150]:
    

    series数据对齐

    In [150]: a = pd.Series([12,23,34],['c','a','d'])                                                                                                                                                 
    
    In [151]: b = pd.Series([11,30,9],['d','c','a'])                                                                                                                                                  
    
    In [152]: a+b                                                                                                                                                                                     
    Out[152]: 
    a    32
    c    42
    d    45
    dtype: int64
    
    In [153]: 
    In [153]: b = pd.Series([11,30,9,100],['d','c','a','b'])                                                                                                                                          
    
    In [154]: a+b                                                                                                                                                                                     
    Out[154]: 
    a    32.0
    b     NaN    # 缺失值
    c    42.0
    d    45.0
    dtype: float64
    
    In [155]: a.add(b, fill_value=0)                                                                                                                                                                  
    Out[155]: 
    a     32.0
    b    100.0
    c     42.0
    d     45.0
    dtype: float64
    
    In [156]: 
    

    series缺失值处理

    In [158]: s = a+b                                                                                                                                                                                 
    
    In [159]: s                                                                                                                                                                                       
    Out[159]: 
    a    32.0
    b     NaN
    c    42.0
    d    45.0
    dtype: float64
    
    In [160]: s.isnull()                                                                                                                                                                              
    Out[160]: 
    a    False
    b     True
    c    False
    d    False
    dtype: bool
    
    In [161]: s.notnull()                                                                                                                                                                             
    Out[161]: 
    a     True
    b    False
    c     True
    d     True
    dtype: bool
    
    In [162]: 
    # 扔掉缺失值
    In [162]: s.dropna()                                                                                                                                                                              
    Out[162]: 
    a    32.0
    c    42.0
    d    45.0
    dtype: float64
    
    In [163]: 
    
    # 设置缺失值
    In [163]: s.fillna(0)                                                                                                                                                                             
    Out[163]: 
    a    32.0
    b     0.0
    c    42.0
    d    45.0
    dtype: float64
    
    In [164]: 
    
    # 设置缺失值(均值)
    In [166]: s.fillna(s.mean())                                                                                                                                                                      
    Out[166]: 
    a    32.000000
    b    39.666667
    c    42.000000
    d    45.000000
    dtype: float64
    
    In [167]: 
    

    DataFrame创建示例

    In [169]: pd.DataFrame({'one':[1,2,3],'two':[10,20,30]})                                                                                                                                          
    Out[169]: 
       one  two
    0    1   10
    1    2   20
    2    3   30
    
    In [170]: pd.DataFrame({'one':[1,2,3],'two':[10,20,30]},index=['A','B','C'])                                                                                                                      
    Out[170]: 
       one  two
    A    1   10
    B    2   20
    C    3   30
    
    In [171]: df = _                                                                                                                                                                                  
    
    In [172]: df                                                                                                                                                                                      
    Out[172]: 
       one  two
    A    1   10
    B    2   20
    C    3   30
    
    In [173]: pd.DataFrame({'one':pd.Series([1,2,3],index=['A','B','C']),'two':pd.Series([10,20,30,40],index=['B','A','C','D'])})                                                                     
    Out[173]: 
       one  two
    A  1.0   20
    B  2.0   10
    C  3.0   30
    D  NaN   40
    
    In [174]: 

    读文件创建DataFrame

    In [181]: !vim demo.csv                                                                                                                                                                           
    
    In [182]: !cat demo.csv                                                                                                                                                                           
    'one','two','three'
    'A',1,2,3
    'B',4,5,6
    'C',7,8,9
    
    In [183]: pd.read_csv('demo.csv')                                                                                                                                                                 
    Out[183]: 
         'one'  'two'  'three'
    'A'      1      2        3
    'B'      4      5        6
    'C'      7      8        9
    
    In [184]: 

    DateFrame索引问题:获取单个元素

    In [218]: df                                                                                                                                                                                      
    Out[218]: 
       one  two
    A  1.0   20
    B  2.0   10
    C  3.0   30
    D  NaN   40
    
    In [219]: df['two']['C']                                                                                                                                                                          
    Out[219]: 30
    
    In [220]: df.two.C                                                                                                                                                                                
    Out[220]: 30
    
    In [221]: df.loc['C','two']                                                                                                                                                                       
    Out[221]: 30
    
    In [222]: df.iloc[2,1]                                                                                                                                                                            
    Out[222]: 30
    
    In [223]: 
    

    DateFrame索引问题:获取多个元素

    In [234]: df                                                                                                                                                                                      
    Out[234]: 
       one  two
    A  1.0   20
    B  2.0   10
    C  3.0   30
    D  NaN   40
    
    In [235]: df['one']                                                                                                                                                                               
    Out[235]: 
    A    1.0
    B    2.0
    C    3.0
    D    NaN
    Name: one, dtype: float64
    
    In [236]: df.loc['B']                                                                                                                                                                             
    Out[236]: 
    one     2.0
    two    10.0
    Name: B, dtype: float64
    
    In [237]: df.loc['B',]                                                                                                                                                                            
    Out[237]: 
    one     2.0
    two    10.0
    Name: B, dtype: float64
    
    In [238]: df.loc['B',:]                                                                                                                                                                           
    Out[238]: 
    one     2.0
    two    10.0
    Name: B, dtype: float64
    
    In [239]:                                                                                                                                                                                         
    
    In [239]: df.iloc[1]                                                                                                                                                                              
    Out[239]: 
    one     2.0
    two    10.0
    Name: B, dtype: float64
    
    In [240]: df.iloc[1,]                                                                                                                                                                             
    Out[240]: 
    one     2.0
    two    10.0
    Name: B, dtype: float64
    
    In [241]: df.iloc[1,:]                                                                                                                                                                            
    Out[241]: 
    one     2.0
    two    10.0
    Name: B, dtype: float64
    
    In [242]: df.iloc[1,:1]                                                                                                                                                                           
    Out[242]: 
    one    2.0
    Name: B, dtype: float64
    
    In [243]: 

    DataFrame数据对齐

    In [243]: df                                                                                                                                                                                      
    Out[243]: 
       one  two
    A  1.0   20
    B  2.0   10
    C  3.0   30
    D  NaN   40
    
    In [244]: df = pd.DataFrame({'two':[1,2,3,4],'one':[4,5,6,7]},index=['C','D','B','A'])                                                                                                            
    
    In [245]: df2 = _243                                                                                                                                                                              
    
    In [246]: df                                                                                                                                                                                      
    Out[246]: 
       two  one
    C    1    4
    D    2    5
    B    3    6
    A    4    7
    
    In [247]: df2                                                                                                                                                                                     
    Out[247]: 
       one  two
    A  1.0   20
    B  2.0   10
    C  3.0   30
    D  NaN   40
    
    In [248]: df+df2                                                                                                                                                                                  
    Out[248]: 
       one  two
    A  8.0   24
    B  8.0   13
    C  7.0   31
    D  NaN   42
    
    In [249]: 

    DataFrame缺失值处理

    In [268]: df                                                                                                                                                                                      
    Out[268]: 
       one   two
    A  1.0  20.0
    B  2.0  10.0
    C  3.0  30.0
    D  NaN  40.0
    
    In [269]: df.fillna(0)                                                                                                                                                                            
    Out[269]: 
       one   two
    A  1.0  20.0
    B  2.0  10.0
    C  3.0  30.0
    D  0.0  40.0
    
    In [270]: df.dropna()                                                                                                                                                                             
    Out[270]: 
       one   two
    A  1.0  20.0
    B  2.0  10.0
    C  3.0  30.0
    
    In [271]: df.loc['D','two'] = np.nan                                                                                                                                                              
    
    In [272]: df.loc['B','two'] = np.nan                                                                                                                                                              
    
    In [273]: df                                                                                                                                                                                      
    Out[273]: 
       one   two
    A  1.0  20.0
    B  2.0   NaN
    C  3.0  30.0
    D  NaN   NaN
    
    In [274]: df.dropna()            # 删除含有NaN的行                                                                                                                                                                 
    Out[274]: 
       one   two
    A  1.0  20.0
    C  3.0  30.0
    
    In [275]: df.dropna(how='all')   # 整行都是NaN才删除该行                                                                                                                                                                  
    Out[275]: 
       one   two
    A  1.0  20.0
    B  2.0   NaN
    C  3.0  30.0
    
    In [276]: df.dropna(how='any')  # 默认                                                                                                                                                                  
    Out[276]: 
       one   two
    A  1.0  20.0
    C  3.0  30.0
    
    In [277]:   
    

    axis指定删除整列

    In [282]: df                                                                                                                                                                                      
    Out[282]: 
       one   two
    A  1.0  20.0
    B  2.0  10.0
    C  3.0  30.0
    D  4.0  10.0
    
    In [283]: df.iloc[2,1] = np.nan                                                                                                                                                                   
    
    In [284]: df                                                                                                                                                                                      
    Out[284]: 
       one   two
    A  1.0  20.0
    B  2.0  10.0
    C  3.0   NaN
    D  4.0  10.0
    
    In [285]: df.dropna(axis=1)          # 删除含有NaN的列                                                                                                                                                             
    Out[285]: 
       one
    A  1.0
    B  2.0
    C  3.0
    D  4.0
    
    In [286]: df.dropna(axis=0)         # 默认                                                                                                                                                              
    Out[286]: 
       one   two
    A  1.0  20.0
    B  2.0  10.0
    D  4.0  10.0
    
    In [287]: 
    

      

    DataFrame排序

    # 按值排序
    In [17]: df                                                                                                                                                                                       
    Out[17]: 
       two  one
    C    1  4.0
    D    2  NaN
    B    3  6.0
    A    4  7.0
    
    In [18]: df.mean()                                                                                                                                                                                
    Out[18]: 
    two    2.500000
    one    5.666667
    dtype: float64
    
    In [19]: df.mean(axis=1)                                                                                                                                                                          
    Out[19]: 
    C    2.5
    D    2.0
    B    4.5
    A    5.5
    dtype: float64
    
    In [20]: df.sort_values(by='one')                                                                                                                                                                 
    Out[20]: 
       two  one
    C    1  4.0
    B    3  6.0
    A    4  7.0
    D    2  NaN
    
    In [21]: df.sort_values(by='one',ascending=False)                                                                                                                                                 
    Out[21]: 
       two  one
    A    4  7.0
    B    3  6.0
    C    1  4.0
    D    2  NaN
    
    In [22]: 
    In [23]: df.sort_values(by='B',axis=1)                                                                                                                                                            
    Out[23]: 
       two  one
    C    1  4.0
    D    2  NaN
    B    3  6.0
    A    4  7.0
    
    In [24]: df.sort_values(by='B',axis=1,ascending=False)                                                                                                                                            
    Out[24]: 
       one  two
    C  4.0    1
    D  NaN    2
    B  6.0    3
    A  7.0    4
    
    In [25]:
    
    
    # 按标签排序
    In [68]: df                                                                                                                                                                                       
    Out[68]: 
       two  one
    C    1  4.0
    D    2  NaN
    B    3  6.0
    A    4  7.0
    
    In [69]: df2                                                                                                                                                                                      
    Out[69]: 
       four
    C    50
    A    60
    D    70
    B    80
    
    In [70]: df3 = pd.concat([df, df2], axis=1, join_axes=[df.index])      # concat合并df                                                                                                                           
    
    In [71]: df3                                                                                                                                                                                      
    Out[71]: 
       two  one  four
    C    1  4.0    50
    D    2  NaN    70
    B    3  6.0    80
    A    4  7.0    60
    
    In [72]: df3.sort_index()                                                                                                                                                                         
    Out[72]: 
       two  one  four
    A    4  7.0    60
    B    3  6.0    80
    C    1  4.0    50
    D    2  NaN    70
    
    In [73]: df3.sort_index(ascending=False)                                                                                                                                                          
    Out[73]: 
       two  one  four
    D    2  NaN    70
    C    1  4.0    50
    B    3  6.0    80
    A    4  7.0    60
    
    In [74]: df3.sort_index(ascending=False, axis=1)                                                                                                                                                  
    Out[74]: 
       two  one  four
    C    1  4.0    50
    D    2  NaN    70
    B    3  6.0    80
    A    4  7.0    60
    
    In [75]: df3.sort_index(axis=1)                                                                                                                                                                   
    Out[75]: 
       four  one  two
    C    50  4.0    1
    D    70  NaN    2
    B    80  6.0    3
    A    60  7.0    4
    
    In [76]: 
    

    pandas批量解析时间对象

    In [83]: pd.to_datetime(['2001-01-01','2010-Apr-09','02/04/2019','2019/02/03'])                                                                                                                   
    Out[83]: DatetimeIndex(['2001-01-01', '2010-04-09', '2019-02-04', '2019-02-03'], dtype='datetime64[ns]', freq=None)
    
    In [84]: 

    pandas生成时间对象

    In [90]: pd.date_range?                                                                                                                                                                           
    
    In [91]: pd.date_range(start='2018-01-01',end='2018-02-01')                                                                                                                                       
    Out[91]: 
    DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
                   '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
                   '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
                   '2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
                   '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
                   '2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
                   '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
                   '2018-01-29', '2018-01-30', '2018-01-31', '2018-02-01'],
                  dtype='datetime64[ns]', freq='D')
    
    In [92]: pd.date_range(start='2018-01-01',periods=30)                                                                                                                                             
    Out[92]: 
    DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
                   '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
                   '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
                   '2018-01-13', '2018-01-14', '2018-01-15', '2018-01-16',
                   '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-20',
                   '2018-01-21', '2018-01-22', '2018-01-23', '2018-01-24',
                   '2018-01-25', '2018-01-26', '2018-01-27', '2018-01-28',
                   '2018-01-29', '2018-01-30'],
                  dtype='datetime64[ns]', freq='D')
    
    In [93]:
    
    In [96]: pd.date_range(start='2018-01-01',periods=30,freq='H')                                                                                                                                    
    Out[96]: 
    DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
                   '2018-01-01 02:00:00', '2018-01-01 03:00:00',
                   '2018-01-01 04:00:00', '2018-01-01 05:00:00',
                   '2018-01-01 06:00:00', '2018-01-01 07:00:00',
                   '2018-01-01 08:00:00', '2018-01-01 09:00:00',
                   '2018-01-01 10:00:00', '2018-01-01 11:00:00',
                   '2018-01-01 12:00:00', '2018-01-01 13:00:00',
                   '2018-01-01 14:00:00', '2018-01-01 15:00:00',
                   '2018-01-01 16:00:00', '2018-01-01 17:00:00',
                   '2018-01-01 18:00:00', '2018-01-01 19:00:00',
                   '2018-01-01 20:00:00', '2018-01-01 21:00:00',
                   '2018-01-01 22:00:00', '2018-01-01 23:00:00',
                   '2018-01-02 00:00:00', '2018-01-02 01:00:00',
                   '2018-01-02 02:00:00', '2018-01-02 03:00:00',
                   '2018-01-02 04:00:00', '2018-01-02 05:00:00'],
                  dtype='datetime64[ns]', freq='H')
    
    In [97]: pd.date_range(start='2018-01-01',periods=30,freq='W')                                                                                                                                    
    Out[97]: 
    DatetimeIndex(['2018-01-07', '2018-01-14', '2018-01-21', '2018-01-28',
                   '2018-02-04', '2018-02-11', '2018-02-18', '2018-02-25',
                   '2018-03-04', '2018-03-11', '2018-03-18', '2018-03-25',
                   '2018-04-01', '2018-04-08', '2018-04-15', '2018-04-22',
                   '2018-04-29', '2018-05-06', '2018-05-13', '2018-05-20',
                   '2018-05-27', '2018-06-03', '2018-06-10', '2018-06-17',
                   '2018-06-24', '2018-07-01', '2018-07-08', '2018-07-15',
                   '2018-07-22', '2018-07-29'],
                  dtype='datetime64[ns]', freq='W-SUN')
    
    In [98]: pd.date_range(start='2018-01-01',periods=30,freq='W-MON')                                                                                                                                
    Out[98]: 
    DatetimeIndex(['2018-01-01', '2018-01-08', '2018-01-15', '2018-01-22',
                   '2018-01-29', '2018-02-05', '2018-02-12', '2018-02-19',
                   '2018-02-26', '2018-03-05', '2018-03-12', '2018-03-19',
                   '2018-03-26', '2018-04-02', '2018-04-09', '2018-04-16',
                   '2018-04-23', '2018-04-30', '2018-05-07', '2018-05-14',
                   '2018-05-21', '2018-05-28', '2018-06-04', '2018-06-11',
                   '2018-06-18', '2018-06-25', '2018-07-02', '2018-07-09',
                   '2018-07-16', '2018-07-23'],
                  dtype='datetime64[ns]', freq='W-MON')
    
    In [99]:                                                                                                                                                                                          
    
    In [99]: pd.date_range(start='2018-01-01',periods=30,freq='B')                                                                                                                                    
    Out[99]: 
    DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
                   '2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10',
                   '2018-01-11', '2018-01-12', '2018-01-15', '2018-01-16',
                   '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-22',
                   '2018-01-23', '2018-01-24', '2018-01-25', '2018-01-26',
                   '2018-01-29', '2018-01-30', '2018-01-31', '2018-02-01',
                   '2018-02-02', '2018-02-05', '2018-02-06', '2018-02-07',
                   '2018-02-08', '2018-02-09'],
                  dtype='datetime64[ns]', freq='B')
    
    In [100]: dt = _                                                                                                                                                                                  
    
    In [101]: type(dt)                                                                                                                                                                                
    Out[101]: pandas.core.indexes.datetimes.DatetimeIndex
    
    In [102]: dt[0]                                                                                                                                                                                   
    Out[102]: Timestamp('2018-01-01 00:00:00', freq='B')
    In [105]: dt[0].to_pydatetime()                                                                                                                                                                   
    Out[105]: datetime.datetime(2018, 1, 1, 0, 0)
    
    In [106]: 

    时间间隔指定非常灵活

    In [107]: pd.date_range(start='2018-01-01',periods=30,freq='1h20min')                                                                                                                             
    Out[107]: 
    DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:20:00',
                   '2018-01-01 02:40:00', '2018-01-01 04:00:00',
                   '2018-01-01 05:20:00', '2018-01-01 06:40:00',
                   '2018-01-01 08:00:00', '2018-01-01 09:20:00',
                   '2018-01-01 10:40:00', '2018-01-01 12:00:00',
                   '2018-01-01 13:20:00', '2018-01-01 14:40:00',
                   '2018-01-01 16:00:00', '2018-01-01 17:20:00',
                   '2018-01-01 18:40:00', '2018-01-01 20:00:00',
                   '2018-01-01 21:20:00', '2018-01-01 22:40:00',
                   '2018-01-02 00:00:00', '2018-01-02 01:20:00',
                   '2018-01-02 02:40:00', '2018-01-02 04:00:00',
                   '2018-01-02 05:20:00', '2018-01-02 06:40:00',
                   '2018-01-02 08:00:00', '2018-01-02 09:20:00',
                   '2018-01-02 10:40:00', '2018-01-02 12:00:00',
                   '2018-01-02 13:20:00', '2018-01-02 14:40:00'],
                  dtype='datetime64[ns]', freq='80T')
    
    In [108]: 

    pandas时间序列切片/截取

    In [121]: series = pd.Series(np.arange(1000),index=pd.date_range(start='2017-01-01',periods=1000))                                                                                                
    
    In [122]: series                                                                                                                                                                                  
    Out[122]: 
    2017-01-01      0
    2017-01-02      1
    2017-01-03      2
    2017-01-04      3
    2017-01-05      4
    2017-01-06      5
    2017-01-07      6
    2017-01-08      7
    2017-01-09      8
    2017-01-10      9
    2017-01-11     10
    2017-01-12     11
    2017-01-13     12
    2017-01-14     13
    2017-01-15     14
    2017-01-16     15
    2017-01-17     16
    2017-01-18     17
    2017-01-19     18
    2017-01-20     19
    2017-01-21     20
    2017-01-22     21
    2017-01-23     22
    2017-01-24     23
    2017-01-25     24
    2017-01-26     25
    2017-01-27     26
    2017-01-28     27
    2017-01-29     28
    2017-01-30     29
                 ... 
    2019-08-29    970
    2019-08-30    971
    2019-08-31    972
    2019-09-01    973
    2019-09-02    974
    2019-09-03    975
    2019-09-04    976
    2019-09-05    977
    2019-09-06    978
    2019-09-07    979
    2019-09-08    980
    2019-09-09    981
    2019-09-10    982
    2019-09-11    983
    2019-09-12    984
    2019-09-13    985
    2019-09-14    986
    2019-09-15    987
    2019-09-16    988
    2019-09-17    989
    2019-09-18    990
    2019-09-19    991
    2019-09-20    992
    2019-09-21    993
    2019-09-22    994
    2019-09-23    995
    2019-09-24    996
    2019-09-25    997
    2019-09-26    998
    2019-09-27    999
    Freq: D, Length: 1000, dtype: int64
    
    In [123]:                                                                                                                                                                                         
    
    In [123]: series.index                                                                                                                                                                            
    Out[123]: 
    DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
                   '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
                   '2017-01-09', '2017-01-10',
                   ...
                   '2019-09-18', '2019-09-19', '2019-09-20', '2019-09-21',
                   '2019-09-22', '2019-09-23', '2019-09-24', '2019-09-25',
                   '2019-09-26', '2019-09-27'],
                  dtype='datetime64[ns]', length=1000, freq='D')
    
    In [124]: series.head()                                                                                                                                                                           
    Out[124]: 
    2017-01-01    0
    2017-01-02    1
    2017-01-03    2
    2017-01-04    3
    2017-01-05    4
    Freq: D, dtype: int64
    
    In [125]: series.tail()                                                                                                                                                                           
    Out[125]: 
    2019-09-23    995
    2019-09-24    996
    2019-09-25    997
    2019-09-26    998
    2019-09-27    999
    Freq: D, dtype: int64
    
    In [126]: series['2018-03']                                                                                                                                                                       
    Out[126]: 
    2018-03-01    424
    2018-03-02    425
    2018-03-03    426
    2018-03-04    427
    2018-03-05    428
    2018-03-06    429
    2018-03-07    430
    2018-03-08    431
    2018-03-09    432
    2018-03-10    433
    2018-03-11    434
    2018-03-12    435
    2018-03-13    436
    2018-03-14    437
    2018-03-15    438
    2018-03-16    439
    2018-03-17    440
    2018-03-18    441
    2018-03-19    442
    2018-03-20    443
    2018-03-21    444
    2018-03-22    445
    2018-03-23    446
    2018-03-24    447
    2018-03-25    448
    2018-03-26    449
    2018-03-27    450
    2018-03-28    451
    2018-03-29    452
    2018-03-30    453
    2018-03-31    454
    Freq: D, dtype: int64
                                                                                                                                                                                        
    
    In [128]: series['2018-12':'2019-01-10']                                                                                                                                                          
    Out[128]: 
    2018-12-01    699
    2018-12-02    700
    2018-12-03    701
    2018-12-04    702
    2018-12-05    703
    2018-12-06    704
    2018-12-07    705
    2018-12-08    706
    2018-12-09    707
    2018-12-10    708
    2018-12-11    709
    2018-12-12    710
    2018-12-13    711
    2018-12-14    712
    2018-12-15    713
    2018-12-16    714
    2018-12-17    715
    2018-12-18    716
    2018-12-19    717
    2018-12-20    718
    2018-12-21    719
    2018-12-22    720
    2018-12-23    721
    2018-12-24    722
    2018-12-25    723
    2018-12-26    724
    2018-12-27    725
    2018-12-28    726
    2018-12-29    727
    2018-12-30    728
    2018-12-31    729
    2019-01-01    730
    2019-01-02    731
    2019-01-03    732
    2019-01-04    733
    2019-01-05    734
    2019-01-06    735
    2019-01-07    736
    2019-01-08    737
    2019-01-09    738
    2019-01-10    739
    Freq: D, dtype: int64
    
    In [129]: 

    resample重新采样并做计算

    In [132]: series.resample('M').sum()                                                                                                                                                              
    Out[132]: 
    2017-01-31      465
    2017-02-28     1246
    2017-03-31     2294
    2017-04-30     3135
    2017-05-31     4185
    2017-06-30     4965
    2017-07-31     6076
    2017-08-31     7037
    2017-09-30     7725
    2017-10-31     8928
    2017-11-30     9555
    2017-12-31    10819
    2018-01-31    11780
    2018-02-28    11466
    2018-03-31    13609
    2018-04-30    14085
    2018-05-31    15500
    2018-06-30    15915
    2018-07-31    17391
    2018-08-31    18352
    2018-09-30    18675
    2018-10-31    20243
    2018-11-30    20505
    2018-12-31    22134
    2019-01-31    23095
    2019-02-28    21686
    2019-03-31    24924
    2019-04-30    25035
    2019-05-31    26815
    2019-06-30    26865
    2019-07-31    28706
    2019-08-31    29667
    2019-09-30    26622
    Freq: M, dtype: int64
    
    In [133]: series.resample('M').mean()                                                                                                                                                             
    Out[133]: 
    2017-01-31     15.0
    2017-02-28     44.5
    2017-03-31     74.0
    2017-04-30    104.5
    2017-05-31    135.0
    2017-06-30    165.5
    2017-07-31    196.0
    2017-08-31    227.0
    2017-09-30    257.5
    2017-10-31    288.0
    2017-11-30    318.5
    2017-12-31    349.0
    2018-01-31    380.0
    2018-02-28    409.5
    2018-03-31    439.0
    2018-04-30    469.5
    2018-05-31    500.0
    2018-06-30    530.5
    2018-07-31    561.0
    2018-08-31    592.0
    2018-09-30    622.5
    2018-10-31    653.0
    2018-11-30    683.5
    2018-12-31    714.0
    2019-01-31    745.0
    2019-02-28    774.5
    2019-03-31    804.0
    2019-04-30    834.5
    2019-05-31    865.0
    2019-06-30    895.5
    2019-07-31    926.0
    2019-08-31    957.0
    2019-09-30    986.0
    Freq: M, dtype: float64
    
    In [134]: 
    

    pandas文件读取

    In [14]: pd.read_csv('601318.csv', index_col='date', parse_dates=['date'])                                                                                                                        
    Out[14]: 
                Unnamed: 0    open   close    high     low      volume    code
    date                                                                      
    2007-03-01           0  21.878  20.473  22.302  20.040  1977633.51  601318
    2007-03-02           1  20.565  20.307  20.758  20.075   425048.32  601318
    2007-03-05           2  20.119  19.419  20.202  19.047   419196.74  601318
    2007-03-06           3  19.253  19.800  20.128  19.143   297727.88  601318
    2007-03-07           4  19.817  20.338  20.522  19.651   287463.78  601318
    2007-03-08           5  20.171  20.093  20.272  19.988   130983.83  601318
    2007-03-09           6  20.084  19.922  20.171  19.559   160887.79  601318
    2007-03-12           7  19.821  19.563  19.821  19.471   145353.06  601318
    2007-03-13           8  19.607  19.642  19.804  19.524   102319.68  601318
    2007-03-14           9  19.384  19.664  19.734  19.161   173306.56  601318
    2007-03-15          10  19.918  19.673  20.342  19.603   152521.90  601318
    2007-03-16          11  19.686  19.782  20.106  19.428   227547.24  601318
    2007-03-20          12  20.478  20.031  20.530  19.909   222026.87  601318
    2007-03-21          13  20.040  19.734  20.128  19.646   136728.32  601318
    2007-03-22          14  19.887  19.848  20.093  19.791   167509.84  601318
    2007-03-23          15  19.839  19.760  19.922  19.563   139810.14  601318
    2007-03-26          16  19.778  20.101  20.215  19.769   223266.79  601318
    2007-03-27          17  20.036  20.088  20.285  19.966   139338.19  601318
    2007-03-28          18  20.084  20.382  20.522  19.944   258263.69  601318
    2007-03-29          19  20.482  20.740  21.349  20.338   461986.18  601318
    2007-03-30          20  20.548  20.587  20.946  20.443   144617.20  601318
    2007-04-02          21  20.587  21.174  21.309  20.587   231445.03  601318
    2007-04-03          22  21.187  21.095  21.335  20.959   132712.04  601318
    2007-04-04          23  21.099  20.911  21.222  20.806   122454.69  601318
    2007-04-05          24  20.915  20.968  21.003  20.653   122865.38  601318
    2007-04-06          25  20.863  21.007  21.419  20.784   195208.52  601318
    2007-04-09          26  21.042  22.582  22.705  20.872   462770.21  601318
    2007-04-10          27  22.316  23.112  23.488  22.316   407823.90  601318
    2007-04-11          28  23.138  23.427  24.145  23.016   243446.50  601318
    2007-04-12          29  23.619  23.383  25.378  23.169   159270.43  601318
    ...                ...     ...     ...     ...     ...         ...     ...
    2017-11-06        2533  64.690  64.010  64.700  62.920   908570.00  601318
    2017-11-07        2534  64.300  65.370  66.570  64.300  1173565.00  601318
    2017-11-08        2535  65.400  64.610  66.350  64.320   867820.00  601318
    2017-11-09        2536  64.500  66.330  66.390  64.400   708669.00  601318
    2017-11-10        2537  66.000  69.890  69.950  65.930  1254060.00  601318
    2017-11-13        2538  70.100  70.150  70.570  69.480   752207.00  601318
    2017-11-14        2539  70.690  70.420  71.290  69.770   801748.00  601318
    2017-11-15        2540  69.980  69.200  70.430  68.590  1009459.00  601318
    2017-11-16        2541  68.800  73.010  73.110  68.750  1163764.00  601318
    2017-11-17        2542  72.700  75.270  75.320  71.800  1580393.00  601318
    2017-11-20        2543  74.780  75.710  76.490  74.070  1141281.00  601318
    2017-11-21        2544  75.130  78.440  79.680  75.130  1445569.00  601318
    2017-11-22        2545  79.500  77.450  79.960  76.580  1293487.00  601318
    2017-11-23        2546  76.600  74.320  78.440  73.700  1576210.00  601318
    2017-11-24        2547  74.150  74.620  75.460  72.710  1317843.00  601318
    2017-11-27        2548  74.700  73.550  74.900  71.550  1637232.00  601318
    2017-11-28        2549  72.700  72.730  73.540  71.880   786469.00  601318
    2017-11-29        2550  73.540  72.420  74.190  71.260   875004.00  601318
    2017-11-30        2551  71.370  69.920  71.670  69.550  1163733.00  601318
    2017-12-01        2552  69.650  68.100  70.180  67.910  1393046.00  601318
    2017-12-04        2553  67.600  69.390  70.350  67.370  1159283.00  601318
    2017-12-05        2554  68.900  71.200  71.500  68.780  1692539.00  601318
    2017-12-06        2555  70.900  69.400  71.100  68.000  1245607.00  601318
    2017-12-07        2556  69.350  68.640  69.810  67.600   859703.00  601318
    2017-12-08        2557  68.940  71.490  71.860  68.660  1095632.00  601318
    2017-12-11        2558  71.200  73.250  73.310  70.820  1139927.00  601318
    2017-12-12        2559  73.250  71.210  73.560  71.170   777900.00  601318
    2017-12-13        2560  71.210  72.120  72.620  70.200   865117.00  601318
    2017-12-14        2561  72.120  71.010  72.160  70.600   676186.00  601318
    2017-12-15        2562  70.690  70.380  71.440  70.050   735547.00  601318
    
    [2563 rows x 7 columns]
    
    In [15]: df = _                                                                                                                                                                                   
    
    In [16]: df.index                                                                                                                                                                                 
    Out[16]: 
    DatetimeIndex(['2007-03-01', '2007-03-02', '2007-03-05', '2007-03-06',
                   '2007-03-07', '2007-03-08', '2007-03-09', '2007-03-12',
                   '2007-03-13', '2007-03-14',
                   ...
                   '2017-12-04', '2017-12-05', '2017-12-06', '2017-12-07',
                   '2017-12-08', '2017-12-11', '2017-12-12', '2017-12-13',
                   '2017-12-14', '2017-12-15'],
                  dtype='datetime64[ns]', name='date', length=2563, freq=None)
    
    In [17]: 
    

    如果csv文件没有首行(列名)

    pd.read_csv('601318.csv', header=None, names=['A','B','C','D','E','F','G','H'])

    如果csv文件里有一些缺失的值,比如有的是NaN,有的是None,那么如何让pandas正确解释?

    na_values参数指定列表,即列表里的字符串都会被解释成numpy.nan

    pd.read_csv('601318.csv', na_values=['None','none','nan','NaN'])
    
    In [8]: pd.read_*?                                                                                                                                                                                
    pd.read_clipboard
    pd.read_csv
    pd.read_excel
    pd.read_feather
    pd.read_fwf
    pd.read_gbq
    pd.read_hdf
    pd.read_html
    pd.read_json
    pd.read_msgpack
    pd.read_parquet
    pd.read_pickle
    pd.read_sas
    pd.read_sql
    pd.read_sql_query
    pd.read_sql_table
    pd.read_stata
    pd.read_table
    
    In [9]: 
    

      

    groupby按照索引聚合数据

    有原始数据如下,需要按照索引聚合,即 Aggragate for duplicate Indices 

    In [64]: df                                                                                                                                                                                                                               
    Out[64]: 
                         concurrence  p2p_dl       p2p_ul      cdn_dl  isp_local_p2p_ul  isp_remote_p2p_ul  isp_other_p2p_ul  isp_unknown_p2p_ul
    2019-07-23 00:00:00          2.0     0.0     952181.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          1.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          1.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          3.0     0.0     288200.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          4.0     0.0   11921229.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          2.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          5.0     0.0    8938038.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          4.0     0.0    1967635.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          6.0     0.0   19436976.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          1.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          1.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         41.0     0.0  182659387.0    358400.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         44.0     0.0   13396980.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         13.0     0.0    4225576.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         18.0     0.0   28843115.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          1.0     0.0      15952.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         23.0     0.0   64174376.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         73.0     0.0  448441433.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          2.0     0.0    1492338.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         20.0     0.0   26001517.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         67.0     0.0  189485455.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00        157.0     0.0  181990022.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          4.0     0.0    4209738.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          2.0     0.0    1887856.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         25.0     0.0   61364395.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         14.0     0.0   14395728.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00         47.0     0.0   62243987.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          4.0     0.0    5284136.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          1.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:00:00          3.0     0.0    7591219.0         0.0               0.0                0.0               0.0                 0.0
    ...                          ...     ...          ...         ...               ...                ...               ...                 ...
    2019-07-23 00:10:00          3.0     0.0   29797700.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          3.0     0.0   12962682.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          5.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          3.0     0.0    1107695.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         13.0     0.0  102279733.0   1034525.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         19.0     0.0   41296504.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          2.0     0.0    8613982.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          3.0     0.0    8017425.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          1.0     0.0    1665251.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         57.0     0.0  158300081.0  28603381.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         22.0     0.0  106194450.0    216074.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00        132.0     0.0  238920037.0   6613339.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         65.0     0.0  570891024.0   1917279.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         19.0     0.0  265779751.0   1758985.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         38.0     0.0   56797177.0   1384116.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         42.0     0.0  985598578.0   3860560.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00        207.0     0.0  824804811.0  20935193.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         24.0     0.0  115753257.0   1573962.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          2.0     0.0    1635388.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         10.0     0.0    1007358.0    219390.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         93.0     0.0  401098219.0   2656469.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         36.0     0.0  126658914.0   2714817.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         14.0     0.0   52857937.0    811010.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         52.0     0.0  252881233.0   2057686.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         11.0     0.0  101013831.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00         26.0     0.0   48285406.0    904998.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          1.0     0.0    1582081.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          3.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          1.0     0.0          0.0         0.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00          2.0     0.0    1380872.0         0.0               0.0                0.0               0.0                 0.0
    
    [201 rows x 8 columns]
    
    In [65]: 

    Aggragate for duplicate Indices

    In [58]: df2 = df.groupby(df.index).sum()                                                                                                                                                                                                 
    
    In [59]: df2                                                                                                                                                                                                                              
    Out[59]: 
                         concurrence    p2p_dl        p2p_ul       cdn_dl  isp_local_p2p_ul  isp_remote_p2p_ul  isp_other_p2p_ul  isp_unknown_p2p_ul
    2019-07-23 00:00:00       1624.0       0.0  6.363896e+09     358400.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:05:00       1648.0       0.0  5.045862e+09   48245645.0               0.0                0.0               0.0                 0.0
    2019-07-23 00:10:00       1633.0  502475.0  6.116968e+09  116286357.0               0.0                0.0               0.0                 0.0
    
    In [60]: df2.to_dict()                                                                                                                                                                                                                    
    Out[60]: 
    {'concurrence': {'2019-07-23 00:00:00': 1624.0,
      '2019-07-23 00:05:00': 1648.0,
      '2019-07-23 00:10:00': 1633.0},
     'p2p_dl': {'2019-07-23 00:00:00': 0.0,
      '2019-07-23 00:05:00': 0.0,
      '2019-07-23 00:10:00': 502475.0},
     'p2p_ul': {'2019-07-23 00:00:00': 6363895723.0,
      '2019-07-23 00:05:00': 5045861525.0,
      '2019-07-23 00:10:00': 6116968304.0},
     'cdn_dl': {'2019-07-23 00:00:00': 358400.0,
      '2019-07-23 00:05:00': 48245645.0,
      '2019-07-23 00:10:00': 116286357.0},
     'isp_local_p2p_ul': {'2019-07-23 00:00:00': 0.0,
      '2019-07-23 00:05:00': 0.0,
      '2019-07-23 00:10:00': 0.0},
     'isp_remote_p2p_ul': {'2019-07-23 00:00:00': 0.0,
      '2019-07-23 00:05:00': 0.0,
      '2019-07-23 00:10:00': 0.0},
     'isp_other_p2p_ul': {'2019-07-23 00:00:00': 0.0,
      '2019-07-23 00:05:00': 0.0,
      '2019-07-23 00:10:00': 0.0},
     'isp_unknown_p2p_ul': {'2019-07-23 00:00:00': 0.0,
      '2019-07-23 00:05:00': 0.0,
      '2019-07-23 00:10:00': 0.0}}
    
    In [61]: 
    In [76]: df2.to_dict(orient="list")                                                                                                                                                                                                       
    Out[76]: 
    {'concurrence': [1624.0, 1648.0, 1633.0],
     'p2p_dl': [0.0, 0.0, 502475.0],
     'p2p_ul': [6363895723.0, 5045861525.0, 6116968304.0],
     'cdn_dl': [358400.0, 48245645.0, 116286357.0],
     'isp_local_p2p_ul': [0.0, 0.0, 0.0],
     'isp_remote_p2p_ul': [0.0, 0.0, 0.0],
     'isp_other_p2p_ul': [0.0, 0.0, 0.0],
     'isp_unknown_p2p_ul': [0.0, 0.0, 0.0]}
    
    In [77]: df2.index                                                                                                                                                                                                                        
    Out[77]: Index(['2019-07-23 00:00:00', '2019-07-23 00:05:00', '2019-07-23 00:10:00'], dtype='object')
    
    In [78]: df2.index.tolist()                                                                                                                                                                                                               
    Out[78]: ['2019-07-23 00:00:00', '2019-07-23 00:05:00', '2019-07-23 00:10:00']
    
    In [79]: 
    

      

  • 相关阅读:
    克隆节点及添加属性节点
    元素属性的添加删除(原生js)
    清浮动方法
    css样式获取及兼容性(原生js)
    js基础---数据类型转换
    js基础---数字日期及运算
    js基础---object对象
    input询问键盘输入超时自动跳过选择默认值
    xpath定位
    selenium报错问题解决方法
  • 原文地址:https://www.cnblogs.com/standby/p/10705519.html
Copyright © 2011-2022 走看看