zoukankan html css js c++ java

1st_pandas

笔记内容来自up主莫烦
https://www.bilibili.com/video/BV1Ex411L7oT?p=9


import pandas as pd
import numpy as np

"""
1. 创建连续时间字符串  pd.date_range(start,end,periods,freq)
    dates = pd.date_range('20160101',periods=6)
    
    
2. pd.DataFrame(data,index,column,dtype,copy)的创建
    
    1）
    # 这里要注意  随机生成数据np.random.randn(6,4)
    df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=[1,2,3,4])
    
    print(df)
                           1         2         3         4
        2016-01-01  0.232239 -2.057311  1.471347 -1.006878
        2016-01-02  0.644637 -1.303135 -0.457582 -0.847513
        2016-01-03  0.049504 -0.297996  0.640345  0.841291
        2016-01-04 -0.208046 -1.093770 -1.206976  0.977323
        2016-01-05 -0.952440  0.886028  1.401906 -0.898003
        2016-01-06  0.287711  1.075616 -1.715452  0.669161
    
    2） 不带 index col 名 ，则默认 index col 为数字
    df1 = pd.DataFrame(np.arange(12).reshape(3,4))
           0  1   2   3
        0  0  1   2   3
        1  4  5   6   7
        2  8  9  10  11
    
    3）用字典创建DataFranme 行数（index）会自动与最多项对齐
    df2  = pd.DataFrame({'A':[1,2,3,4],'B':2,'C':3})
           A  B  C
        0  1  2  3
        1  2  2  3
        2  3  2  3
        3  4  2  3

3.DataFrame的属性访问

    1）print(df.index)  # 输出 row 的名字 
        DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
                       '2016-01-05', '2016-01-06'],
                      dtype='datetime64[ns]', freq='D')
                      
    2）print(df.columns)   # 输出 col 的名字
        Int64Index([1, 2, 3, 4], dtype='int64')
        
    3）print(df.values)   # 输出 data
        [[ 0.23223861 -2.05731101  1.47134695 -1.00687769]
         [ 0.64463711 -1.30313538 -0.45758167 -0.84751345]
         [ 0.04950445 -0.29799635  0.64034504  0.84129072]
         [-0.20804606 -1.09376993 -1.2069759   0.97732253]
         [-0.95243955  0.88602791  1.40190587 -0.89800295]
         [ 0.28771058  1.07561617 -1.71545208  0.66916143]]
         
    4）print(df.describe())   # 只能计算 数值，忽视 data,str 等
                      1         2         3         4
        count  6.000000  6.000000  6.000000  6.000000
        mean   0.008934 -0.465095  0.022265 -0.044103
        std    0.548574  1.253790  1.352495  0.963071
        min   -0.952440 -2.057311 -1.715452 -1.006878
        25%   -0.143658 -1.250794 -1.019627 -0.885381
        50%    0.140872 -0.695883  0.091382 -0.089176
        75%    0.273843  0.590022  1.211516  0.798258
        max    0.644637  1.075616  1.471347  0.977323

4. pd.DataFrame()的排序 
    1） df.sort_index()  只根据 行列排序
        print(df.sort_index(axis=1,ascending=False))
                               4         3         2         1
            2016-01-01 -1.006878  1.471347 -2.057311  0.232239
            2016-01-02 -0.847513 -0.457582 -1.303135  0.644637
            2016-01-03  0.841291  0.640345 -0.297996  0.049504
            2016-01-04  0.977323 -1.206976 -1.093770 -0.208046
            2016-01-05 -0.898003  1.401906  0.886028 -0.952440
            2016-01-06  0.669161 -1.715452  1.075616  0.287711
            
        print(df.sort_index(axis=0,ascending=False))

                               1         2         3         4
            2016-01-06  0.287711  1.075616 -1.715452  0.669161
            2016-01-05 -0.952440  0.886028  1.401906 -0.898003
            2016-01-04 -0.208046 -1.093770 -1.206976  0.977323
            2016-01-03  0.049504 -0.297996  0.640345  0.841291
            2016-01-02  0.644637 -1.303135 -0.457582 -0.847513
            2016-01-01  0.232239 -2.057311  1.471347 -1.006878
    
    2）df.sort_values  根据 某一行/列的大小来排序
    print(df.sort_values(by=3,ascending=False))
                           1         2         3         4
        2016-01-01  0.232239 -2.057311  1.471347 -1.006878
        2016-01-05 -0.952440  0.886028  1.401906 -0.898003
        2016-01-03  0.049504 -0.297996  0.640345  0.841291
        2016-01-02  0.644637 -1.303135 -0.457582 -0.847513
        2016-01-04 -0.208046 -1.093770 -1.206976  0.977323
        2016-01-06  0.287711  1.075616 -1.715452  0.669161
        
        
    print(df.sort_values(by='20160101',axis=1,ascending=False))
                       3         1         4         2
        2016-01-01  1.471347  0.232239 -1.006878 -2.057311
        2016-01-02 -0.457582  0.644637 -0.847513 -1.303135
        2016-01-03  0.640345  0.049504  0.841291 -0.297996
        2016-01-04 -1.206976 -0.208046  0.977323 -1.093770
        2016-01-05  1.401906 -0.952440 -0.898003  0.886028
        2016-01-06 -1.715452  0.287711  0.669161  1.075616
"""
pd.date_range()

s = pd.Series([1,3,5,np.nan,44,1],dtype=float)
print(s)

dates = pd.date_range('20160101',periods=6)
print(dates)

df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=[1,2,3,4])
print(df)


df1 = pd.DataFrame(np.arange(12).reshape(3,4))
print(df1)

df2  = pd.DataFrame({'A':[1,2,3,4],'B':2,'C':3})
print(df2)

print(df.dtypes)


# 输出 row 的名字
print(df.index)
# 输出 col 的名字
print(df.columns)
# 输出 属性
print(df.values)

# 只能计算 数值，忽视 data,str 等
print(df.describe())

# 使列方向 倒叙排序
print(df.sort_index(axis=1,ascending=False))
# 使行方向 倒叙排序
print(df.sort_index(axis=0,ascending=False))

print(df.sort_values(by=3,ascending=False))

print(df.sort_values(by='20160101',axis=1,ascending=False))

查看全文

相关阅读:
29 友盟大数据--flume源码查看分析ExecSource--UmengExecSource 改造exec源：监控目录、收集新文件---增加个守护线程不断监控目录
 28 友盟大数据--flume源码查看分析- ExecSource-参照主机名拦截器HostInterceptor ---写限速拦截器
 Demo
分布式爬虫-Kafka监控
 SQL优化
 MySQL
Spring
Mybatis
类加载器
 数据仓库分层

原文地址：https://www.cnblogs.com/ChevisZhang/p/12911133.html