zoukankan      html  css  js  c++  java
  • 1st_pandas

    笔记内容来自up主莫烦
    https://www.bilibili.com/video/BV1Ex411L7oT?p=9

    import pandas as pd
    import numpy as np

    """
    1. 创建连续时间字符串 pd.date_range(start,end,periods,freq)
    dates = pd.date_range('20160101',periods=6)


    2. pd.DataFrame(data,index,column,dtype,copy)的创建

    1)
    # 这里要注意 随机生成数据np.random.randn(6,4)
    df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=[1,2,3,4])

    print(df)
    1 2 3 4
    2016-01-01 0.232239 -2.057311 1.471347 -1.006878
    2016-01-02 0.644637 -1.303135 -0.457582 -0.847513
    2016-01-03 0.049504 -0.297996 0.640345 0.841291
    2016-01-04 -0.208046 -1.093770 -1.206976 0.977323
    2016-01-05 -0.952440 0.886028 1.401906 -0.898003
    2016-01-06 0.287711 1.075616 -1.715452 0.669161

    2) 不带 index col 名 ,则默认 index col 为数字
    df1 = pd.DataFrame(np.arange(12).reshape(3,4))
    0 1 2 3
    0 0 1 2 3
    1 4 5 6 7
    2 8 9 10 11

    3)用字典创建DataFranme 行数(index)会自动与最多项对齐
    df2 = pd.DataFrame({'A':[1,2,3,4],'B':2,'C':3})
    A B C
    0 1 2 3
    1 2 2 3
    2 3 2 3
    3 4 2 3

    3.DataFrame的属性访问

    1)print(df.index) # 输出 row 的名字
    DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
    '2016-01-05', '2016-01-06'],
    dtype='datetime64[ns]', freq='D')

    2)print(df.columns) # 输出 col 的名字
    Int64Index([1, 2, 3, 4], dtype='int64')

    3)print(df.values) # 输出 data
    [[ 0.23223861 -2.05731101 1.47134695 -1.00687769]
    [ 0.64463711 -1.30313538 -0.45758167 -0.84751345]
    [ 0.04950445 -0.29799635 0.64034504 0.84129072]
    [-0.20804606 -1.09376993 -1.2069759 0.97732253]
    [-0.95243955 0.88602791 1.40190587 -0.89800295]
    [ 0.28771058 1.07561617 -1.71545208 0.66916143]]

    4)print(df.describe()) # 只能计算 数值,忽视 data,str 等
    1 2 3 4
    count 6.000000 6.000000 6.000000 6.000000
    mean 0.008934 -0.465095 0.022265 -0.044103
    std 0.548574 1.253790 1.352495 0.963071
    min -0.952440 -2.057311 -1.715452 -1.006878
    25% -0.143658 -1.250794 -1.019627 -0.885381
    50% 0.140872 -0.695883 0.091382 -0.089176
    75% 0.273843 0.590022 1.211516 0.798258
    max 0.644637 1.075616 1.471347 0.977323

    4. pd.DataFrame()的排序
    1) df.sort_index() 只根据 行列排序
    print(df.sort_index(axis=1,ascending=False))
    4 3 2 1
    2016-01-01 -1.006878 1.471347 -2.057311 0.232239
    2016-01-02 -0.847513 -0.457582 -1.303135 0.644637
    2016-01-03 0.841291 0.640345 -0.297996 0.049504
    2016-01-04 0.977323 -1.206976 -1.093770 -0.208046
    2016-01-05 -0.898003 1.401906 0.886028 -0.952440
    2016-01-06 0.669161 -1.715452 1.075616 0.287711

    print(df.sort_index(axis=0,ascending=False))

    1 2 3 4
    2016-01-06 0.287711 1.075616 -1.715452 0.669161
    2016-01-05 -0.952440 0.886028 1.401906 -0.898003
    2016-01-04 -0.208046 -1.093770 -1.206976 0.977323
    2016-01-03 0.049504 -0.297996 0.640345 0.841291
    2016-01-02 0.644637 -1.303135 -0.457582 -0.847513
    2016-01-01 0.232239 -2.057311 1.471347 -1.006878

    2)df.sort_values 根据 某一行/列的大小来排序
    print(df.sort_values(by=3,ascending=False))
    1 2 3 4
    2016-01-01 0.232239 -2.057311 1.471347 -1.006878
    2016-01-05 -0.952440 0.886028 1.401906 -0.898003
    2016-01-03 0.049504 -0.297996 0.640345 0.841291
    2016-01-02 0.644637 -1.303135 -0.457582 -0.847513
    2016-01-04 -0.208046 -1.093770 -1.206976 0.977323
    2016-01-06 0.287711 1.075616 -1.715452 0.669161


    print(df.sort_values(by='20160101',axis=1,ascending=False))
    3 1 4 2
    2016-01-01 1.471347 0.232239 -1.006878 -2.057311
    2016-01-02 -0.457582 0.644637 -0.847513 -1.303135
    2016-01-03 0.640345 0.049504 0.841291 -0.297996
    2016-01-04 -1.206976 -0.208046 0.977323 -1.093770
    2016-01-05 1.401906 -0.952440 -0.898003 0.886028
    2016-01-06 -1.715452 0.287711 0.669161 1.075616
    """
    pd.date_range()

    s = pd.Series([1,3,5,np.nan,44,1],dtype=float)
    print(s)

    dates = pd.date_range('20160101',periods=6)
    print(dates)

    df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=[1,2,3,4])
    print(df)


    df1 = pd.DataFrame(np.arange(12).reshape(3,4))
    print(df1)

    df2 = pd.DataFrame({'A':[1,2,3,4],'B':2,'C':3})
    print(df2)

    print(df.dtypes)


    # 输出 row 的名字
    print(df.index)
    # 输出 col 的名字
    print(df.columns)
    # 输出 属性
    print(df.values)

    # 只能计算 数值,忽视 data,str 等
    print(df.describe())

    # 使列方向 倒叙排序
    print(df.sort_index(axis=1,ascending=False))
    # 使行方向 倒叙排序
    print(df.sort_index(axis=0,ascending=False))

    print(df.sort_values(by=3,ascending=False))

    print(df.sort_values(by='20160101',axis=1,ascending=False))
  • 相关阅读:
    逆光拍摄常见的问题(解决大光比问题)
    HDP和包围曝光
    直方图
    linux查找文件的命令【转】
    100篇大数据文章[转]
    squid
    修改/etc/resolv.conf又恢复到原来的状态?[转]
    python字符串及正则表达式[转]
    GraphLab介绍[转]
    Scala 中的 apply 和 update 方法[转]
  • 原文地址:https://www.cnblogs.com/ChevisZhang/p/12911133.html
Copyright © 2011-2022 走看看