zoukankan      html  css  js  c++  java
  • pandas聚合aggregate

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2018/5/24 15:03
    # @Author  : zhang chao
    # @File    : s.py
    import pandas as pd
    import numpy as np
    
    df = pd.DataFrame(np.random.randn(10, 4),
          index = pd.date_range('1/1/2019', periods=10),
          columns = ['A', 'B', 'C', 'D'])
    
    print (df)
    print("=======================================")
    r = df.rolling(window=3,min_periods=1)
    print (r)
    print("=======================================")
    print("r.aggregate(np.sum)")
    print (r.aggregate(np.sum))
    print("=======================================")
    print("r['A'].aggregate(np.sum)")
    print (r['A'].aggregate(np.sum))
    print("=======================================")
    print("r[['A','B']].aggregate(np.sum)")
    print (r[['A','B']].aggregate(np.sum))
    print("=======================================")
    print("r['A'].aggregate([np.sum,np.mean])")
    print (r['A'].aggregate([np.sum,np.mean]))
    print("=======================================")
    print("r.aggregate({'A' : np.sum,'B' : np.mean})")
    print (r.aggregate({'A' : np.sum,'B' : np.mean}))
    print("=======================================")
    print("r[['A','B']].aggregate([np.sum,np.mean]")
    print (r[['A','B']].aggregate([np.sum,np.mean]))
    
    D:Downloadpython3python3.exe D:/Download/pycharmworkspace/s.py
                       A         B         C         D
    2019-01-01  0.744560  0.208652  0.542045 -0.995837
    2019-01-02  0.029809 -1.419936 -0.461988  2.177032
    2019-01-03  0.613583  1.515249  0.256546 -0.973564
    2019-01-04  0.124320  1.152804  0.152107  1.629035
    2019-01-05 -0.287906  1.003523 -0.793393  0.231969
    2019-01-06 -0.045296 -0.921622  0.894335  0.773035
    2019-01-07 -0.695347  0.512206  0.208833  0.953205
    2019-01-08 -1.197178  0.142301 -0.854875 -1.044017
    2019-01-09 -2.352468  0.047127 -0.351634 -0.373885
    2019-01-10  0.678406  0.500947  0.304250 -0.606522
    =======================================
    Rolling [window=3,min_periods=1,center=False,axis=0]
    =======================================
    r.aggregate(np.sum)
                       A         B         C         D
    2019-01-01  0.744560  0.208652  0.542045 -0.995837
    2019-01-02  0.774369 -1.211283  0.080057  1.181195
    2019-01-03  1.387952  0.303966  0.336603  0.207631
    2019-01-04  0.767712  1.248117 -0.053335  2.832504
    2019-01-05  0.449996  3.671576 -0.384740  0.887441
    2019-01-06 -0.208882  1.234705  0.253049  2.634040
    2019-01-07 -1.028549  0.594107  0.309775  1.958209
    2019-01-08 -1.937820 -0.267115  0.248293  0.682223
    2019-01-09 -4.244992  0.701633 -0.997676 -0.464698
    2019-01-10 -2.871239  0.690374 -0.902259 -2.024425
    =======================================
    r['A'].aggregate(np.sum)
    2019-01-01    0.744560
    2019-01-02    0.774369
    2019-01-03    1.387952
    2019-01-04    0.767712
    2019-01-05    0.449996
    2019-01-06   -0.208882
    2019-01-07   -1.028549
    2019-01-08   -1.937820
    2019-01-09   -4.244992
    2019-01-10   -2.871239
    Freq: D, Name: A, dtype: float64
    =======================================
    r[['A','B']].aggregate(np.sum)
                       A         B
    2019-01-01  0.744560  0.208652
    2019-01-02  0.774369 -1.211283
    2019-01-03  1.387952  0.303966
    2019-01-04  0.767712  1.248117
    2019-01-05  0.449996  3.671576
    2019-01-06 -0.208882  1.234705
    2019-01-07 -1.028549  0.594107
    2019-01-08 -1.937820 -0.267115
    2019-01-09 -4.244992  0.701633
    2019-01-10 -2.871239  0.690374
    =======================================
    r['A'].aggregate([np.sum,np.mean])
                     sum      mean
    2019-01-01  0.744560  0.744560
    2019-01-02  0.774369  0.387185
    2019-01-03  1.387952  0.462651
    2019-01-04  0.767712  0.255904
    2019-01-05  0.449996  0.149999
    2019-01-06 -0.208882 -0.069627
    2019-01-07 -1.028549 -0.342850
    2019-01-08 -1.937820 -0.645940
    2019-01-09 -4.244992 -1.414997
    2019-01-10 -2.871239 -0.957080
    =======================================
    r.aggregate({'A' : np.sum,'B' : np.mean})
                       B         A
    2019-01-01  0.208652  0.744560
    2019-01-02 -0.605642  0.774369
    2019-01-03  0.101322  1.387952
    2019-01-04  0.416039  0.767712
    2019-01-05  1.223859  0.449996
    2019-01-06  0.411568 -0.208882
    2019-01-07  0.198036 -1.028549
    2019-01-08 -0.089038 -1.937820
    2019-01-09  0.233878 -4.244992
    2019-01-10  0.230125 -2.871239
    =======================================
    r[['A','B']].aggregate([np.sum,np.mean]
                       A                   B          
                     sum      mean       sum      mean
    2019-01-01  0.744560  0.744560  0.208652  0.208652
    2019-01-02  0.774369  0.387185 -1.211283 -0.605642
    2019-01-03  1.387952  0.462651  0.303966  0.101322
    2019-01-04  0.767712  0.255904  1.248117  0.416039
    2019-01-05  0.449996  0.149999  3.671576  1.223859
    2019-01-06 -0.208882 -0.069627  1.234705  0.411568
    2019-01-07 -1.028549 -0.342850  0.594107  0.198036
    2019-01-08 -1.937820 -0.645940 -0.267115 -0.089038
    2019-01-09 -4.244992 -1.414997  0.701633  0.233878
    2019-01-10 -2.871239 -0.957080  0.690374  0.230125
    
    Process finished with exit code 0
  • 相关阅读:
    RDD执行延迟执行原理
    spark应用运行机制解析1
    spark streaming job生成与运行
    spark的Task的序列化
    spark将计算结果写入到hdfs的两种方法
    spark的runJob函数2
    SVG---------SVG sprite 使用示例
    段落边框——paraBox.scss
    背景条纹——bgStripes.scss
    css3动画——基本准则
  • 原文地址:https://www.cnblogs.com/ggzhangxiaochao/p/9094251.html
Copyright © 2011-2022 走看看