zoukankan      html  css  js  c++  java
  • pandas-Notes2

    #coding = utf-8
    import pandas as pd
    import numpy as np
    import  matplotlib as plt
    
    dates = pd.date_range('20170601', periods=6)
    # make a random 6*4 matrix
    df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
    
    print df
    
    # statistic basics. exclude missing data in general
    # mean. mean of cols as default
    print df.mean()
    '''
    A   -0.640908
    B   -0.216183
    C    0.316962
    D   -0.634263
    dtype: float64
    '''
    # mean of rows
    print df.mean(1)
    
    # move down
    s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
    #print s
    '''
    2017-06-01    NaN
    2017-06-02    NaN
    2017-06-03    1.0
    2017-06-04    3.0
    2017-06-05    5.0
    2017-06-06    NaN
    Freq: D, dtype: float64
    '''
    
    # df-s. pandas will make Series into DataFrame
    # df will change
    print df
    print df.sub(s, axis='index')
    
    # cumulate by rows. default is by cols
    print df.apply(np.cumsum, axis=1)
    # apply lambda
    print df.apply(lambda x: x.max() - x.min())
    
    s = pd.Series(np.random.randint(0, 7, size=10))
    # there are duplicate values
    # value_counts behaves like histogram
    print s.value_counts()
    
    # string methods
    # s.str.lower() means to lowercase
    
    print df
    # first 3 rows. index by rows in default
    print df[:3]
    
    # concat. use list as parameter
    pieces = [df[:3], df[4:]]
    print pd.concat(pieces)
    
    # join.
    left = pd.DataFrame({'key':['1', '2'], 'lvar':['leftVar1', 'leftVar2']})
    right = pd.DataFrame({'key':['1', '2'], 'rvar':['rightVar1', 'rightVar2']})
    print left
    print right
    # merge by same key value
    print pd.merge(left, right, on='key')
    '''
      key      lvar       rvar
    0   1  leftVar1  rightVar1
    1   2  leftVar2  rightVar2
    '''
    
    # append. add a row to the tail
    # ignore_index = False, the index will be appended too. If True, then all index will be 0...n (int)
    print df.append(df.iloc[3], ignore_index=False)
    
    # group
    df1 = pd.DataFrame({'A' : ['f', 'b', 'f', 'f', 'b'],
                        'B' : ['1', '2', '2', '1', '2'],
                        'C' : np.random.randn(5)})
    # use sum()
    print df1.groupby(['A', 'B']).sum()
    
    # stack unstack means transformation between matrix and DataFrame
    
    # pivot_table means group by index and cols, use values. if there's function, execute it
    # pivot_table(df, values='D', index=['A', 'B'], columnes=['C'])
    
    # time series for time
    
    # categoricals
    
    # declare as category
    s1 = pd.Series(['A', 'B', 'B', 'C', 'A', 'E']).astype("category")
    # set category. Must same number of unique levels
    s1.cat.categories = ["good", "bad", 'A', 'B']
    print s1
    # df.sort_values(by="categoryName")
    # df.groupby("categoryName").size()
    
    # plot
    df2 = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
    df2 = df2.cumsum()
    # four lines, four colors. with legend.
    df2.plot()
    #plt.pyplot.show()
    
    # file in & out
    df2.to_csv("df2.csv")
    
    df3 = pd.read_csv("df2.csv")
    print df3.head(3)
    
    #df2.to_hdf("df2.h5", 'df')
    #pd.read_hdf('df2.h5', 'df')
    
    # need module openpyxl...
    df2.to_excel('df2.xlsx', sheet_name='sheet1')
    pd.read_excel('df2.xlsx', 'sheet1', index_col=None, na_values=['NA'])
    

      

  • 相关阅读:
    【cocos2d-js官方文档】十七、事件分发机制
    【cocos2d-js官方文档】十一、cc.path
    c# 类成员的定义 定义方法、字段和属性【转】
    【转】算法的流程图表示
    C#中接口的深入浅出【转】
    C#中动态创建数据库和数据表,很经典【转】
    【转】c# winform 创建文件,把值写入文件,读取文件里的值,修改文件的值,对文件的创建,写入,修改
    在txt文本后追加内容
    net 提供了Thread类用于线程的操作
    美到极致是疯狂
  • 原文地址:https://www.cnblogs.com/pxy7896/p/6946569.html
Copyright © 2011-2022 走看看