zoukankan      html  css  js  c++  java
  • DataFrame常用方法

    #Pandas数据结构Dataframe:基本技巧
    
    #数据查看、转置 / 添加、修改、删除值 / 对齐 / 排序
    
    import numpy as np
    import pandas as pd
    
    #数据查看、转置
    df = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
    print(df)
    print(df.head(2)) #查看头默认值是5条数据
    print(df.tail(3)) #查看尾
    
    print(df.T) #行列转置
    
               a          b
    0  13.001170  52.302971
    1  45.443235  17.136341
    2  87.749437  61.681361
    3  27.203306  54.923447
    4  27.661213  28.096315
               a          b
    0  13.001170  52.302971
    1  45.443235  17.136341
               a          b
    2  87.749437  61.681361
    3  27.203306  54.923447
    4  27.661213  28.096315
               0          1          2          3          4
    a  13.001170  45.443235  87.749437  27.203306  27.661213
    b  52.302971  17.136341  61.681361  54.923447  28.096315
    
    #添加,修改,删除
    #添加和修改:和字典的操作类似
    df = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
    df['c']=10 #添加列c全为10
    df.loc[5]=20 #添加行5值为20
    df.loc[1:3,'a':'c']=30 #修改行1-3,列a-c的值为30
    print(df)
    
    #删除,一般都用drop
    df1 = pd.DataFrame(np.random.rand(10).reshape(5,2)*100,columns=list('ab'))
    df1.drop(['a'],axis=1,inplace=True)#drop()删除列,需要加上axis = 1,inplace=True → 改变原数据
    df2 = df.drop([0],axis=0) #drop()默认删除行,本质是axis = 0,inplace=False → 生产新的数据,不改变原数据
    print(df1)
    print(df2)
    
    #删除列还可以有 del df['a'] 一般不使用
    
               a          b   c
    0   1.402976   7.213545  10
    1  30.000000  30.000000  30
    2  30.000000  30.000000  30
    3  30.000000  30.000000  30
    4  16.940650  71.386239  10
    5  20.000000  20.000000  20
               b
    0  20.961412
    1  86.214446
    2  35.080610
    3   9.396529
    4   9.427302
              a          b   c
    1  30.00000  30.000000  30
    2  30.00000  30.000000  30
    3  30.00000  30.000000  30
    4  16.94065  71.386239  10
    5  20.00000  20.000000  20
    
    # 对齐
    
    df1 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
    df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])
    print(df1)
    print(df2)
    print(df1 + df2)
    # DataFrame对象之间的数据自动按照列和索引(行标签)对齐相加,任何值加nan还是nan
    
              A         B         C         D
    0 -0.186700  0.654873 -0.675748  1.274324
    1 -0.203601  0.522645  1.327030 -1.211796
    2  1.210807 -0.333704 -0.068803  0.626071
    3  0.381998  1.352354 -1.122596 -0.039185
    4 -1.794919 -0.636484 -1.248661  0.595253
    5 -0.724729  0.845360 -0.318300  0.154419
    6 -1.363716  0.539871 -0.466797 -0.991755
    7 -1.746204 -0.211044  0.265923  1.479545
    8  0.964734  1.702910 -1.231199 -0.095801
    9  0.027144  0.565912  0.494676  0.297138
              A         B         C
    0 -0.266384 -0.697483 -0.787006
    1 -1.247451  1.418789 -0.728944
    2 -1.750087  0.108112  0.721652
    3  0.131859 -1.157439  0.169533
    4  1.051047 -0.395463  1.161415
    5 -0.232839 -0.203111 -0.135780
    6 -1.933584  0.280714 -1.493124
              A         B         C   D
    0 -0.453084 -0.042610 -1.462754 NaN
    1 -1.451051  1.941434  0.598086 NaN
    2 -0.539280 -0.225593  0.652849 NaN
    3  0.513857  0.194915 -0.953063 NaN
    4 -0.743872 -1.031947 -0.087246 NaN
    5 -0.957568  0.642249 -0.454080 NaN
    6 -3.297300  0.820585 -1.959921 NaN
    7       NaN       NaN       NaN NaN
    8       NaN       NaN       NaN NaN
    9       NaN       NaN       NaN NaN
    
    #排序
    #1.按值排序(单个值) sort_values()
    df1 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
                       columns = ['a','b','c','d'])
    print(df1)
    print(df1.sort_values(['a'], ascending = True))  # 升序
    print(df1.sort_values(['a'], ascending = False))  # 降序
    print('------')
    # ascending参数:设置升序降序,默认升序
    
    
    #按值排序(多个值,依次往后为备排序选项)
    df2 = pd.DataFrame({'a':[1,1,1,1,2,2,2,2],
                      'b':list(range(8)),
                      'c':list(range(8,0,-1))})
    print(df2)
    print(df2.sort_values(['a','c']))
    
    
    #2.按行索引排序 sort_index()
    df3 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
                      index = [5,4,3,2],
                       columns = ['a','b','c','d'])
    df4 = pd.DataFrame(np.random.rand(16).reshape(4,4)*100,
                      index = ['h','s','x','g'],
                       columns = ['a','b','c','d'])
    print(df3)
    print(df3.sort_index())
    print(df4)
    print(df4.sort_index())
    
               a          b          c          d
    0  14.363296  24.468750   0.862332  78.414560
    1  50.376623  70.058587  12.016014  43.824154
    2   9.164948   6.009718  59.899663  34.608598
    3  32.776729   5.877378  19.845187  18.427510
               a          b          c          d
    2   9.164948   6.009718  59.899663  34.608598
    0  14.363296  24.468750   0.862332  78.414560
    3  32.776729   5.877378  19.845187  18.427510
    1  50.376623  70.058587  12.016014  43.824154
               a          b          c          d
    1  50.376623  70.058587  12.016014  43.824154
    3  32.776729   5.877378  19.845187  18.427510
    0  14.363296  24.468750   0.862332  78.414560
    2   9.164948   6.009718  59.899663  34.608598
    ------
       a  b  c
    0  1  0  8
    1  1  1  7
    2  1  2  6
    3  1  3  5
    4  2  4  4
    5  2  5  3
    6  2  6  2
    7  2  7  1
       a  b  c
    3  1  3  5
    2  1  2  6
    1  1  1  7
    0  1  0  8
    7  2  7  1
    6  2  6  2
    5  2  5  3
    4  2  4  4
               a          b          c          d
    5  31.649529  84.868273  35.960909  17.991508
    4  70.713399   3.196341   1.838718   2.587589
    3  22.504265   3.529035  29.175653  61.559803
    2  10.746665  74.852949  49.412317  28.020466
               a          b          c          d
    2  10.746665  74.852949  49.412317  28.020466
    3  22.504265   3.529035  29.175653  61.559803
    4  70.713399   3.196341   1.838718   2.587589
    5  31.649529  84.868273  35.960909  17.991508
               a          b          c          d
    h  77.066414   3.475510  64.363116  51.800866
    s  40.764677  42.263241   9.385392  44.899110
    x  12.105920  29.185573  59.567246  79.056572
    g  67.852492  11.714584  84.943183  75.268707
               a          b          c          d
    g  67.852492  11.714584  84.943183  75.268707
    h  77.066414   3.475510  64.363116  51.800866
    s  40.764677  42.263241   9.385392  44.899110
    x  12.105920  29.185573  59.567246  79.056572
  • 相关阅读:
    设计模式之——浅谈strategy模式(策略模式)
    设计模式之——bridge模式
    验证ip地址
    查询sqlserver数据库表的记录数
    iis网站部署常见错误
    asp.net 向后台提交 html 代码段 包括 <> 标签
    jquery花式图片库——jqFancyTransitions
    为sqlserver数据库添加专用用户名
    sqlserver 收缩数据库/文件
    你使用的ie版本过低请。。。
  • 原文地址:https://www.cnblogs.com/Franciszw/p/13888659.html
Copyright © 2011-2022 走看看