zoukankan      html  css  js  c++  java
  • pandas 1 基本介绍

    import numpy as np
    import pandas as pd
    

    pd.Series() 构造数据

    s = pd.Series([1, 3, 5, np.nan, 44, 1])
    
    print(s)
    
    # 0     1.0
    # 1     3.0
    # 2     5.0
    # 3     NaN
    # 4    44.0
    # 5     1.0
    # dtype: float64
    

    pd.date_range() 生成数据

    dates = pd.date_range('20190225', periods=2)
    
    print(dates)  
    
    # DatetimeIndex(['2019-02-25', '2019-02-26'], dtype='datetime64[ns]', freq='D')
    

    pd.DataFrame() 构造数据

    df = pd.DataFrame(np.random.randn(2, 4), index=dates, columns=['a', 'b', 'c', 'd'])
    
    print(df)
    
    #                    a         b         c         d
    # 2019-02-25  1.236639 -0.918432 -0.211460  1.834082
    # 2019-02-26  1.191895 -1.680464  0.863866  0.171246
    

    pd.DataFrame() 构造数据

    df1 = pd.DataFrame(np.arange(12).reshape(3, 4)
    
    print(df1)
    
    #    0  1   2   3
    # 0  0  1   2   3
    # 1  4  5   6   7
    # 2  8  9  10  11
    

    pd.DataFrame() 构造数据

    df2 = pd.DataFrame({'A': 1.,
                        'B': pd.Timestamp('20130102'),
                        'C': pd.Series(1, index=list(range(5)), dtype='float32'),
                        'D': np.array([3] * 5, dtype='int32'),
                        'E': pd.Categorical(["test", "train", "test", "train", 'yzn']),
                        'F': 'foo'})
                        
    print(df2)
    
    #      A          B    C  D      E    F
    # 0  1.0 2013-01-02  1.0  3   test  foo
    # 1  1.0 2013-01-02  1.0  3  train  foo
    # 2  1.0 2013-01-02  1.0  3   test  foo
    # 3  1.0 2013-01-02  1.0  3  train  foo
    # 4  1.0 2013-01-02  1.0  3    yzn  foo
    

    属性 df2.dtypes df2.index df2.columns

    df2.values df2.describe() df2.T

    df.sort_index(axis=1, ascending=False) df2.sort_values(by='E')

    print(df2.dtypes)
    
    # A           float64
    # B    datetime64[ns]
    # C           float32
    # D             int32
    # E          category
    # F            object
    # dtype: object
    
    print(df2.index)
    
    # Int64Index([0, 1, 2, 3, 4], dtype='int64')
    
    print(df2.columns)
    
    # Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')
    
    print(df2.values)
    
    # [[1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
    #  [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']
    #  [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo']
    #  [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']
    #  [1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'yzn' 'foo']]
    
    print(df2.describe())
    
    #          A    C    D
    # count  5.0  5.0  5.0
    # mean   1.0  1.0  3.0
    # std    0.0  0.0  0.0
    # min    1.0  1.0  3.0
    # 25%    1.0  1.0  3.0
    # 50%    1.0  1.0  3.0
    # 75%    1.0  1.0  3.0
    # max    1.0  1.0  3.0
    
    print(df2.T)
    
    #                      0  ...                    4
    # A                    1  ...                    1
    # B  2013-01-02 00:00:00  ...  2013-01-02 00:00:00
    # C                    1  ...                    1
    # D                    3  ...                    3
    # E                 test  ...                  yzn
    # F                  foo  ...                  foo
    # [6 rows x 5 columns]
    
    print(df.sort_index(axis=1, ascending=False))
    
    #                    d         c         b         a
    # 2019-02-25 -0.086707  0.388089  0.513976 -0.148502
    # 2019-02-26 -0.237655 -0.799583 -1.722373  0.318766
    
    print(df.sort_index(axis=0, ascending=False))
    
    #                    a         b         c         d
    # 2019-02-26 -2.117756  0.453841 -2.900436  1.061481
    # 2019-02-25 -0.974467  0.598005 -0.552265 -2.487490
    
    print(df2.sort_values(by='E'))
    
    #      A          B    C  D      E    F
    # 0  1.0 2013-01-02  1.0  3   test  foo
    # 2  1.0 2013-01-02  1.0  3   test  foo
    # 1  1.0 2013-01-02  1.0  3  train  foo
    # 3  1.0 2013-01-02  1.0  3  train  foo
    # 4  1.0 2013-01-02  1.0  3    yzn  foo
    

    END

  • 相关阅读:
    列"xx"不在表Table中
    asp.net中自定义验证控件
    ASP.NET母版与内容页相对路径的问题
    html点小图看大图最快捷的方法
    ThinkCMF的跳转303 404等页面的方法
    关于ThinkCMF自带插件上传不了图片的解决方法
    js中百分比运算,大型数据会算错
    数据库价格汇总查询的方法
    信息资源5
    操作系统概论
  • 原文地址:https://www.cnblogs.com/yangzhaonan/p/10433059.html
Copyright © 2011-2022 走看看