zoukankan      html  css  js  c++  java
  • Python笔记 #14# Pandas: Selection

     10 Minutes to pandas

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    
    dates = pd.date_range('20180116', periods=3) # 创建 16 17 18 等六个日期
    
    df = pd.DataFrame(np.random.randn(3,4), index=dates, columns=list('ABCD')) # 这是二维的,类似于一个
    
    # Getting
    
    # print(df['A']) # 选中一列
    # 2013-01-01    0.469112
    # 2013-01-02    1.212112
    # 2013-01-03   -0.861849
    # 2013-01-04    0.721555
    # 2013-01-05   -0.424972
    # 2013-01-06   -0.673690
    # Freq: D, Name: A, dtype: float64
    
    # print(df[0:3]) # 不包括第三行!
    #                    A         B         C         D
    # 2018-01-16 -0.621070 -0.558260 -0.068434 -1.225484
    # 2018-01-17  0.500783 -0.289074 -0.251468 -0.935832
    # 2018-01-18  0.299410  2.279664  0.325912  0.461620
    
    # print(df['20180116':'20180117']) # 顾名思义
    #                    A         B         C         D
    # 2018-01-16 -0.009937  0.545212  0.682592  0.666001
    # 2018-01-17  0.641140  0.539408  0.876006 -0.410707
    
    # Selection by Label
    # print(df)
    # print(df.loc[dates[0]])
    #                    A         B         C         D
    # 2018-01-16 -1.531173  0.473732 -0.017051 -0.911358
    # 2018-01-17 -2.153974  1.320710  1.970252 -1.992209
    # 2018-01-18 -0.829090  1.096573  0.997688 -0.401185
    # A   -1.531173
    # B    0.473732
    # C   -0.017051
    # D   -0.911358
    # Name: 2018-01-16 00:00:00, dtype: float64
    
    # print(df)
    # print(df.loc[:,['A','B']])
    #                    A         B         C         D
    # 2018-01-16  0.077497  1.364726  0.343679 -1.099019
    # 2018-01-17 -0.573355 -0.939503  0.020275  1.073868
    # 2018-01-18 -0.507676 -0.820279 -1.802128 -0.328978
    #                    A         B
    # 2018-01-16  0.077497  1.364726
    # 2018-01-17 -0.573355 -0.939503
    # 2018-01-18 -0.507676 -0.820279
    
    # print(df)
    # print(df.loc['20180116':'20180117',['A','B']])
    #                    A         B         C         D
    # 2018-01-16  2.526965  0.820404  0.095466  0.611306
    # 2018-01-17 -1.359352  1.602012  0.337596  2.380324
    # 2018-01-18 -0.453608  1.454857  1.443562  2.145979
    #                    A         B
    # 2018-01-16  2.526965  0.820404
    # 2018-01-17 -1.359352  1.602012
    
    # print(df)
    # print(df.loc['20180116',['A','B']])
    #                    A         B         C         D
    # 2018-01-16 -0.143268 -0.954798  0.637066 -1.433980
    # 2018-01-17  0.527822  1.673820  1.150244 -0.644368
    # 2018-01-18  0.550647  0.012898  1.065985  2.614110
    # A   -0.143268
    # B   -0.954798
    # Name: 2018-01-16 00:00:00, dtype: float64
    
    # print(df)
    # print(df.loc[dates[0],'A'])
    #                    A         B         C         D
    # 2018-01-16  0.557596 -0.140733  0.921194 -0.618365
    # 2018-01-17  0.499742 -0.709669 -0.128449 -3.033026
    # 2018-01-18  0.014871 -1.198496 -0.241682 -0.502687
    # 0.5575964215814226
    
    # print(df)
    # print(df.at[dates[0],'A'])
    # at的使用方法与loc类似,但是比loc有更快的访问数据的速度,而且只能访问单个元素,不能访问多个元素。
    #                    A         B         C         D
    # 2018-01-16  0.557596 -0.140733  0.921194 -0.618365
    # 2018-01-17  0.499742 -0.709669 -0.128449 -3.033026
    # 2018-01-18  0.014871 -1.198496 -0.241682 -0.502687
    # 0.5575964215814226
    
    # Selection by Position
    
    # print(df)
    # print(df.iloc[0])
    # print(df.iloc[2])
    #                    A         B         C         D
    # 2018-01-16 -0.660315  0.116266 -0.914127  0.598307
    # 2018-01-17 -1.882812  1.715777 -0.355752 -0.192475
    # 2018-01-18  0.628092  0.700135  0.402080  0.949126
    # A   -0.660315
    # B    0.116266
    # C   -0.914127
    # D    0.598307
    # Name: 2018-01-16 00:00:00, dtype: float64
    # A    0.628092
    # B    0.700135
    # C    0.402080
    # D    0.949126
    # Name: 2018-01-18 00:00:00, dtype: float64
    
    # print(df)
    # print(df.iloc[0:1,1:3]) # [0:1] 不包括 1 , [1:3] 不包括 3
    #                    A         B         C         D
    # 2018-01-16 -0.685245  1.835675 -0.630813 -0.408195
    # 2018-01-17 -0.899057  0.257409  0.305275 -0.956311
    # 2018-01-18 -1.111117  0.280925 -0.463713  0.882284
    #                    B         C
    # 2018-01-16  1.835675 -0.630813
    
    # print(df)
    # print(df.iloc[[1,2,0],[0,2]]) # 选第2行、第3行、第0行,第1列第3列
    # print(df.iloc[1:2,:])
    # print(df.iloc[:,1:2])
    #                    A         B         C         D
    # 2018-01-16  0.221714  0.357890 -0.905870 -0.099446
    # 2018-01-17 -0.636384 -1.428893 -0.471488 -1.197841
    # 2018-01-18  1.044619 -0.346529 -0.164955  0.201145
    #                    A         C
    # 2018-01-17 -0.636384 -0.471488
    # 2018-01-18  1.044619 -0.164955
    # 2018-01-16  0.221714 -0.905870
    #                    A         B         C         D
    # 2018-01-17 -0.636384 -1.428893 -0.471488 -1.197841
    #                    B
    # 2018-01-16  0.357890
    # 2018-01-17 -1.428893
    # 2018-01-18 -0.346529
    
    # print(df.iloc[1,1])
    # print(df.iat[1,1]) # 访问确切的值 比上面的快?
    # -0.2891820477026986
    # -0.2891820477026986
    
    # Boolean Indexing
    # print(df[df.A > 0]) # 多随机几次是有可能 empty set 的,选中的就是 df.A > 0 的那些行!
    #                    A         B         C         D
    # 2018-01-17  0.322452  0.803659 -0.982818  0.149446
    # 2018-01-18  0.501591 -0.114393 -0.306871 -2.258557
    # 上面几列都是 A 列数字大于 0 的
    
    # print(df[df > 0]) # 这个是全局选值
    #                    A         B         C         D
    # 2018-01-16  1.453356       NaN  0.120802  0.368208
    # 2018-01-17  0.459706  0.802484       NaN       NaN
    # 2018-01-18       NaN  0.569428  0.952326  0.541748
    
    # Setting
    
    # Setting a new column automatically aligns the data by the indexes
    # s1 = pd.Series([1, 2, 3], index=pd.date_range('20180116', periods=3))
    # print(s1)
    # print(df)
    # df['F'] = s1
    # print(df)
    #
    # 2018-01-16    1
    # 2018-01-17    2
    # 2018-01-18    3
    # Freq: D, dtype: int64
    #                    A         B         C         D
    # 2018-01-16 -0.261046 -0.561609 -2.263514  2.359545
    # 2018-01-17  0.563822 -1.301185  0.906939  0.478209
    # 2018-01-18  0.942304  1.231033 -0.016457  0.659738
    #                    A         B         C         D  F
    # 2018-01-16 -0.261046 -0.561609 -2.263514  2.359545  1
    # 2018-01-17  0.563822 -1.301185  0.906939  0.478209  2
    # 2018-01-18  0.942304  1.231033 -0.016457  0.659738  3
    
    
    # print(df)
    # df.at[dates[0],'A'] = 0 # Setting values by label
    # df.iat[0, 1] = 0 # Setting values by position
    # df.loc[:,'D'] = np.array([99] * len(df)) # Setting by assigning with a numpy array
    # print(df)
    #                    A         B         C         D
    # 2018-01-16  1.113651 -0.978514 -0.852811  0.933365
    # 2018-01-17 -1.395547 -0.158742 -1.509723 -0.917854
    # 2018-01-18  0.672396 -1.248654 -1.430043 -1.133012
    #                    A         B         C   D
    # 2018-01-16  0.000000  0.000000 -0.852811  99
    # 2018-01-17 -1.395547 -0.158742 -1.509723  99
    # 2018-01-18  0.672396 -1.248654 -1.430043  99
    
    
    # A where operation with setting.
    # df2 = df.copy()
    # print(df2)
    # df2[df2 > 0] = -df2
    # print(df2)
    #                    A         B         C         D
    # 2018-01-16  0.824635 -0.914218 -0.953014  0.166094
    # 2018-01-17 -0.037925  0.018838  0.927026  0.322848
    # 2018-01-18  0.596024  0.851863 -0.548556  0.243168
    #                    A         B         C         D
    # 2018-01-16 -0.824635 -0.914218 -0.953014 -0.166094
    # 2018-01-17 -0.037925 -0.018838 -0.927026 -0.322848
    # 2018-01-18 -0.596024 -0.851863 -0.548556 -0.243168
  • 相关阅读:
    【转载】Modelsim之 DO文件简介
    【原创】Quartus与Modelsim的联合仿真及脚本
    【原创】Quartus II 简单设计流程
    【原创】Modelsim仿真简单流程
    【原创】静态时序分析整理一
    【原创】DC的一些命令
    【转载】建立时间与保持时间
    linux 创建链接命令 ln -s 软链接
    Vim常用命令
    gvim查找与替换命令
  • 原文地址:https://www.cnblogs.com/xkxf/p/8313308.html
Copyright © 2011-2022 走看看