zoukankan      html  css  js  c++  java
  • pandas demo 示例

    #构造

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)

    输出:

                       A         B         C         D
    2018-03-10  0.474957 -0.789351  0.827287  0.632483
    2018-03-11 -0.147661  2.093837  0.565236 -0.282967
    2018-03-12  0.871652 -0.492781  0.213760  1.046995
    2018-03-13  0.735719  0.827546  0.139042  1.764413
    2018-03-14  0.442560 -0.065412 -1.209434  0.690070
    2018-03-15 -0.303560  1.389159 -0.397401 -0.650598

    #切片选择指定行

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    #切片选择指定行
    print(df[0:3])

    输出

                       A         B         C         D
    2018-03-10  0.474957 -0.789351  0.827287  0.632483
    2018-03-11 -0.147661  2.093837  0.565236 -0.282967
    2018-03-12  0.871652 -0.492781  0.213760  1.046995
    2018-03-13  0.735719  0.827546  0.139042  1.764413
    2018-03-14  0.442560 -0.065412 -1.209434  0.690070
    2018-03-15 -0.303560  1.389159 -0.397401 -0.650598
                       A         B         C         D
    2018-03-10  0.474957 -0.789351  0.827287  0.632483
    2018-03-11 -0.147661  2.093837  0.565236 -0.282967
    2018-03-12  0.871652 -0.492781  0.213760  1.046995

    #通过行标记获取指定行(包含两端)

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    #通过行标记获取指定行(包含两端)
    print(df['20180311':'20180313'])

    输出

    2018-03-10  0.474957 -0.789351  0.827287  0.632483
    2018-03-11 -0.147661  2.093837  0.565236 -0.282967
    2018-03-12  0.871652 -0.492781  0.213760  1.046995
    2018-03-13  0.735719  0.827546  0.139042  1.764413
    2018-03-14  0.442560 -0.065412 -1.209434  0.690070
    2018-03-15 -0.303560  1.389159 -0.397401 -0.650598
                       A         B         C         D
    2018-03-11 -0.147661  2.093837  0.565236 -0.282967
    2018-03-12  0.871652 -0.492781  0.213760  1.046995
    2018-03-13  0.735719  0.827546  0.139042  1.764413

    #输出指定行指定列的数据

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    #输出指定行指定列的数据
    print(df.loc['20180312', ['A','B','C','D']])

    输出

                       A         B         C         D
    2018-03-10  0.474957 -0.789351  0.827287  0.632483
    2018-03-11 -0.147661  2.093837  0.565236 -0.282967
    2018-03-12  0.871652 -0.492781  0.213760  1.046995
    2018-03-13  0.735719  0.827546  0.139042  1.764413
    2018-03-14  0.442560 -0.065412 -1.209434  0.690070
    2018-03-15 -0.303560  1.389159 -0.397401 -0.650598
    
    A    0.871652
    B   -0.492781
    C    0.213760
    D    1.046995
    Name: 2018-03-12 00:00:00, dtype: float64


    #输出第三行第一列的数据

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    #输出第三行第一列的数据
    print(df.iloc[3, 1])

    输出

                       A         B         C         D
    2018-03-10  0.474957 -0.789351  0.827287  0.632483
    2018-03-11 -0.147661  2.093837  0.565236 -0.282967
    2018-03-12  0.871652 -0.492781  0.213760  1.046995
    2018-03-13  0.735719  0.827546  0.139042  1.764413
    2018-03-14  0.442560 -0.065412 -1.209434  0.690070
    2018-03-15 -0.303560  1.389159 -0.397401 -0.650598
    
    0.8275459967949839

    #df.A 选择某列

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    
    #或者df.A 选择某列
    print(df['A'])

    输出

                       A         B         C         D
    2018-03-10 -1.537480  1.082599  0.174229 -1.841898
    2018-03-11 -1.691014 -0.164473 -2.199268 -1.488417
    2018-03-12 -1.324199 -0.420854  0.104982  0.754717
    2018-03-13  0.138477  1.003904 -0.437110 -2.542149
    2018-03-14 -1.049416  0.318146  1.249720  0.781054
    2018-03-15 -1.595190 -0.391273  0.783752 -1.225756
    2018-03-10   -1.537480
    2018-03-11   -1.691014
    2018-03-12   -1.324199
    2018-03-13    0.138477
    2018-03-14   -1.049416
    2018-03-15   -1.595190
    Freq: D, Name: A, dtype: float64

    #进行切片选择,指定行,指定列

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    
    #进行切片选择,指定行,指定列
    print(df.iloc[2:5,0:2])

    输出

                       A         B         C         D
    2018-03-10 -1.537480  1.082599  0.174229 -1.841898
    2018-03-11 -1.691014 -0.164473 -2.199268 -1.488417
    2018-03-12 -1.324199 -0.420854  0.104982  0.754717
    2018-03-13  0.138477  1.003904 -0.437110 -2.542149
    2018-03-14 -1.049416  0.318146  1.249720  0.781054
    2018-03-15 -1.595190 -0.391273  0.783752 -1.225756
    
                       A         B
    2018-03-12 -1.324199 -0.420854
    2018-03-13  0.138477  1.003904
    2018-03-14 -1.049416  0.318146

    #进行不连续筛选

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    
    #进行不连续筛选
    print(df.iloc[[1,2,4],[0,2]])

    输出

                       A         B         C         D
    2018-03-10  0.900440 -0.062287 -1.483173 -1.586545
    2018-03-11 -0.351609 -2.337686  0.471770  0.122194
    2018-03-12  1.598436  0.795936  1.102541 -0.471931
    2018-03-13  2.753501  0.184064  0.610561 -0.577957
    2018-03-14 -2.081754  0.666256  0.345566  0.969266
    2018-03-15  0.089630 -0.310928 -0.439767  0.944149
                       A         C
    2018-03-11 -0.351609  0.471770
    2018-03-12  1.598436  1.102541
    2018-03-14 -2.081754  0.345566

    #筛选出df.A大于0的元素 布尔条件筛选

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    
    #筛选出df.A大于0的元素 布尔条件筛选
    print(df[df.A > 0])

    输出

                       A         B         C         D
    2018-03-10  0.892268  0.713791 -0.144297  0.739862
    2018-03-11  0.991796 -1.688081  1.333420 -0.524965
    2018-03-12  2.251776 -1.514738 -0.720530  1.052735
    2018-03-13 -0.297195 -0.945455 -1.796431  2.998356
    2018-03-14 -0.236509 -0.369757 -0.438734  0.408940
    2018-03-15  0.498061  0.778591 -0.282689  1.879702
                       A         B         C         D
    2018-03-10  0.892268  0.713791 -0.144297  0.739862
    2018-03-11  0.991796 -1.688081  1.333420 -0.524965
    2018-03-12  2.251776 -1.514738 -0.720530  1.052735
    2018-03-15  0.498061  0.778591 -0.282689  1.879702

    #将df.A大于0的值改变

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    
    #将df.A大于0的值改变
    df[df.A>0]=999
    print(df)

    输出

                       A         B         C         D
    2018-03-10 -0.587337  0.166438 -1.536473 -2.118008
    2018-03-11  0.327482 -0.383389  0.353157  0.592067
    2018-03-12 -0.483211 -2.066614 -0.313845  0.989347
    2018-03-13  1.230698 -1.196974 -1.465180  0.585245
    2018-03-14  0.180381 -1.289805  0.264123  0.731016
    2018-03-15  0.288694 -1.318865 -1.550989  0.467802
                         A           B           C           D
    2018-03-10   -0.587337    0.166438   -1.536473   -2.118008
    2018-03-11  999.000000  999.000000  999.000000  999.000000
    2018-03-12   -0.483211   -2.066614   -0.313845    0.989347
    2018-03-13  999.000000  999.000000  999.000000  999.000000
    2018-03-14  999.000000  999.000000  999.000000  999.000000
    2018-03-15  999.000000  999.000000  999.000000  999.000000

    #新增一列不赋值

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    
    #新增一列
    df['F']=np.nan
    print(df)

    输出

                       A         B         C         D
    2018-03-10 -0.227356  1.229450  0.488290 -0.910271
    2018-03-11  0.756934 -0.489924  1.125287  0.148251
    2018-03-12 -1.157556 -0.703575  1.488778 -0.713087
    2018-03-13  0.942155  0.972845 -1.765062  0.991459
    2018-03-14  1.053055 -0.685858  0.604448  0.837986
    2018-03-15  0.809910  0.771260  0.674058  0.420373
                       A         B         C         D   F
    2018-03-10 -0.227356  1.229450  0.488290 -0.910271 NaN
    2018-03-11  0.756934 -0.489924  1.125287  0.148251 NaN
    2018-03-12 -1.157556 -0.703575  1.488778 -0.713087 NaN
    2018-03-13  0.942155  0.972845 -1.765062  0.991459 NaN
    2018-03-14  1.053055 -0.685858  0.604448  0.837986 NaN
    2018-03-15  0.809910  0.771260  0.674058  0.420373 NaN

    #新增一列赋值

    import pandas as pd
    import pickle
    import numpy as np
    
    dates=pd.date_range('20180310',periods=6)
    df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
    print(df)
    
    #新增一列
    df['E']  = pd.Series([1,2,3,4,5,6], index=pd.date_range('20180310', periods=6))
    print(df)

    输出

                       A         B         C         D
    2018-03-10 -0.565898  0.647803  1.018365 -1.269129
    2018-03-11 -1.049725  0.718618  0.745133 -2.976616
    2018-03-12 -0.859447 -0.686062  0.332352 -0.065416
    2018-03-13 -0.291780  1.144493 -1.387311 -0.752532
    2018-03-14  0.469711  0.129786  0.677650  0.723333
    2018-03-15  0.876061  0.441140  1.566190 -1.628274
                       A         B         C         D  E
    2018-03-10 -0.565898  0.647803  1.018365 -1.269129  1
    2018-03-11 -1.049725  0.718618  0.745133 -2.976616  2
    2018-03-12 -0.859447 -0.686062  0.332352 -0.065416  3
    2018-03-13 -0.291780  1.144493 -1.387311 -0.752532  4
    2018-03-14  0.469711  0.129786  0.677650  0.723333  5
    2018-03-15  0.876061  0.441140  1.566190 -1.628274  6

  • 相关阅读:
    关于fill_parent ,wrap_content ,match_parent区别
    开发第六天
    开发第五天
    关于Android创建虚拟机出现Failed to allocate memory: 8解决办法
    第十一周总结
    开发第四天
    开发第三天
    开发第二天
    开发第一天
    用户场景分析
  • 原文地址:https://www.cnblogs.com/sea-stream/p/10319600.html
Copyright © 2011-2022 走看看