zoukankan      html  css  js  c++  java
  • pandas模块

    pandas模块:

    读取excel / json / sql / csv /ini 文件--

    import pandas as pd
    
    

    DataFrame数据结构:

    DataFrame是一个表格型的数据结构,含有一组有序的列。

    DataFrame可以被看做是由Series组成的字典,并且共用一个索引。

    产生时间对象数组:date_range

    #参数
    start	开始时间
    end	结束时间
    periods	时间长度
    freq	时间频率,默认为'D',
    
    #属性
    dtype是	查看数据类型
    index	查看行序列或者索引
    columns	查看各列的标签
    values	查看数据框内的数据,也即不含表头索引的数据
    describe	查看数据每一列的极值,均值,中位数,只可用于数值型数据
    transpose	转置,也可用T来操作
    sort_index	排序,可按行或列index排序输出
    sort_values	按数据值来排序
    
    import pandas as pd
    import numpy as np
    #--
    date = pd.date_range('20190101',periods=6,frep='M')
    print(date)
    #列参数
    DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
                   '2019-05-31', '2019-06-30'],
                  dtype='datetime64[ns]', freq='M')
    #生出随机数
    np.random.seed(1)
    arr = 10*np.random.randon(6,4)
    print(arr)
    #行参数
    df = pd.DaDataFrame(arr, index=dates,columns=['c1','c2','c3','c4'])
    print(df)  --表格
    
    
    #查看数据类型
    print(df.dtypes)
    print(df.conumns)
    -Index(['c1', 'c2', 'c3', 'c4'], dtype='object')
    
    # 按行标签[c1, c2, c3, c4]从大到小排序
    df.sort_index(axis=0)
    # 按列标签[2019-01-01, 2019-01-02...]从大到小排序
    df.sort_index(axis=1)
    
    # 通过自定义的行标签选择数据
    df.loc['2019-01-01':'2019-01-03']
    #类似numpy数组取值
    df.iloc[1:4, 1:4]
    #逻辑判断取值
    df[df['c1'] > 0]
    
    DataFrame值替换
    df.iloc[0:3, 0:2] = 0
    
    #读取csv文件
    import pandas as pd
    from io import StringIO
    test_data = '''
    5.1,,1.4,0.2
    4.9,3.0,1.4,0.2
    4.7,3.2,,0.2
    7.0,3.2,4.7,1.4
    6.4,3.2,4.5,1.5
    6.9,3.1,4.9,
    ,,,
    '''
    
    test_data = StringIO(test_data)
    df = pd.read_csv(test_data, header=None)
    df.columns = ['c1', 'c2', 'c3', 'c4']
    df
    	c1	c2	c3	c4
    0	5.1	NaN	1.4	0.2
    1	4.9	3.0	1.4	0.2
    2	4.7	3.2	NaN	0.2
    3	7.0	3.2	4.7	1.4
    4	6.4	3.2	4.5	1.5
    5	6.9	3.1	4.9	NaN
    6	NaN	NaN	NaN	NaN
    
    #读取json文件
    strtext = '[{"ttery":"min","issue":"20130801-3391","code":"8,4,5,2,9","code1":"297734529","code2":null,"time":1013395466000},
    {"ttery":"min","issue":"20130801-3390","code":"7,8,2,1,2","code1":"298058212","code2":null,"time":1013395406000},
    {"ttery":"min","issue":"20130801-3389","code":"5,9,1,2,9","code1":"298329129","code2":null,"time":1013395346000},
    {"ttery":"min","issue":"20130801-3388","code":"3,8,7,3,3","code1":"298588733","code2":null,"time":1013395286000},
    {"ttery":"min","issue":"20130801-3387","code":"0,8,5,2,7","code1":"298818527","code2":null,"time":1013395226000}]'
    
    df = pd.read_json(strtext, orient='records')
    df
    
    @读取sql语句
    import numpy as np
    import pandas as pd
    import pymysql
    
    
    def conn(sql):
        # 连接到mysql数据库
        conn = pymysql.connect(
            host="localhost",
            port=3306,
            user="root",
            passwd="123",
            db="db1",
        )
        try:
            data = pd.read_sql(sql, con=conn)
            return data
        except Exception as e:
            print("SQL is not correct!")
        finally:
            conn.close()
    
    
    sql = "select * from test1 limit 0, 10"  # sql语句
    data = conn(sql)
    print(data.columns.tolist())  # 查看字段
    print(data)  # 查看数据
    
  • 相关阅读:
    Hibernate初级
    Servlet, Listener 、 Filter.
    DBCP数据源
    数据库连接池
    MySQL入门笔记
    20170330 webservice代理类测试
    20170330 ABAP代理生成
    20170329 隐士增强问题
    ABAP rfc 发布webservice 错误
    ABAP 性能优化001
  • 原文地址:https://www.cnblogs.com/shaozheng/p/11608413.html
Copyright © 2011-2022 走看看