zoukankan      html  css  js  c++  java
  • Pandas基本功能之层次化索引及层次化汇总

    层次化索引

    层次化也就是在一个轴上拥有多个索引级别

    Series的层次化索引

    data=Series(np.random.randn(10),index=[
        ['a','a','a','b','b','b','c','c','d','d'],
        [1,2,3,1,2,3,1,2,2,3]
    ])
    data
    
    a  1    0.965999
       2   -0.271733
       3    0.133910
    b  1   -0.806885
       2   -0.622905
       3   -0.355330
    c  1   -0.659194
       2   -1.082872
    d  2   -0.043984
       3   -1.125324
    dtype: float64
    
    # 选取数据子集
    data['b']
    1   -0.806885
    2   -0.622905
    3   -0.355330
    dtype: float64
    
    data['b':'c'] # 在pandas中顾头也顾尾
    
    b  1   -0.806885
       2   -0.622905
       3   -0.355330
    c  1   -0.659194
       2   -1.082872
    dtype: float64
    
    data.ix[['b','d']] # 按行索引名称选择
    b  1   -0.806885
       2   -0.622905
       3   -0.355330
    d  2   -0.043984
       3   -1.125324
    dtype: float64
    
    # 在内层中进行选取,选择所有的行索引中的2这一行
    data[:,2]
    a   -0.271733
    b   -0.622905
    c   -1.082872
    d   -0.043984
    dtype: float64
    
    # 层次化索引在数据重塑和基于分组的操作中扮演着重要的角色
    # 这个函数会把层次化索引转为DataFrame格式,最外层的行索引作为DataFrame的行索引,内层的索引作为列索引
    data.unstack()
    
    	1	          2	          3
    a	0.965999	-0.271733	0.133910
    b	-0.806885	-0.622905	-0.355330
    c	-0.659194	-1.082872	NaN
    d	NaN	        -0.043984	-1.125324
    
    # unstack()的逆运算,转回来
    data.unstack().stack()
    
    a  1    0.965999
       2   -0.271733
       3    0.133910
    b  1   -0.806885
       2   -0.622905
       3   -0.355330
    c  1   -0.659194
       2   -1.082872
    d  2   -0.043984
       3   -1.125324
    dtype: float64
    

    DataFrame的层次化索引

    frame = pd.DataFrame(np.arange(12).reshape(4,3),index=[['a','a','b','b'],[1,2,1,2]],
                columns=[['ohio','ohio','color'],['green','red','green']]
                )
    frame
    
    	ohio	color
       green	red	green
    a	1	0	1	2
        2	3	4	5
    b	1	6	7	8
        2	9	10	11
    
    # 给层级行索引加名字
    frame.index.names = ['key1','key2']
    # 给层级列索引加名字
    frame.columns.names = ['state','color']
    frame
    
       state	ohio	color
       color	green	red	green
    key1	key2			
    a	1	0	1	2
        2	3	4	5
    b	1	6	7	8
        2	9	10	11
    
    frame['ohio']
    
       color	green	red
    key1	key2		
    a	1	0	1
        2	3	4
    b	1	6	7
        2	9	10
    

    重排分级顺序

    frame
    
    state	ohio	color
    color	green	red	green
    key1	key2			
    a	1	0	1	2
        2	3	4	5
    b	1	6	7	8
        2	9	10	11
    
    # 这里sortlevel()括号里的0指把key2和key1交换后按key2排序
    frame.swaplevel(0,1).sortlevel(0)
    
    	state	ohio	color
    color	green	red	green
    key2	key1			
    1	a	0	1	2
        b	6	7	8
    2	a	3	4	5
        b	9	10	11
    
    
    # 1指按key1排序
    frame.swaplevel(0,1).sortlevel(1)
    
    state	ohio	color
    color	green	red	green
    key2	key1			
    1	a	0	1	2
    2	a	3	4	5
    1	b	6	7	8
    2	b	9	10	11
    

    根据层次索引级别汇总统计

    frame
    
    state	ohio	color
    color	green	red	green
    key1	key2			
    a	1	0	1	2
        2	3	4	5
    b	1	6	7	8
        2	9	10	11
    
    # 以key2的1和1相加,2和2索引相加
    frame.sum(level='key2')
    
    state	ohio	color
    color	green	red	green
    key2			
    1	6	8	10
    2	12	14	16
    
    # 以行索引的green索引相加,red没有不做改变
    frame.sum(level='color',axis=1)
    
    color	green	red
    key1	key2		
    a	1	2	1
        2	8	4
    b	1	14	7
        2	20	10
    

    使用DataFrame的列

    frame1 = pd.DataFrame({'a':range(7),'b':range(7,0,-1),
                         'c':['one','one','one','two','two','two','two'],
                          'd':[0,1,2,0,1,2,3]
                         })
    
    frame1
    
       a	b	c	d
    0	0	7	one	0
    1	1	6	one	1
    2	2	5	one	2
    3	3	4	two	0
    4	4	3	two	1
    5	5	2	two	2
    6	6	1	two	3
    
    #把c/d设置为行索引,默认会删除这两列,如果不想删除,可以吧drop=False开启
    frame1.set_index(['c','d'])
    
            a	b
    c	d		
    one	0	0	7
        1	1	6
        2	2	5
    two	0	3	4
        1	4	3
        2	5	2
        3	6	1
    
    
    # reset_index会把cd设置为列索引,了解就行
    frame2.reset_index()
    
       index	a	b	c	d
    0	0	0	7	one	0
    1	1	1	6	one	1
    2	2	2	5	one	2
    3	3	3	4	two	0
    4	4	4	3	two	1
    5	5	5	2	two	2
    6	6	6	1	two	3
    
  • 相关阅读:
    模块(相当于Java里的包)
    if_else_while_for
    用户交互
    Python入门
    BigInteger类及方法应用
    selenium+java破解极验滑动验证码的示例代码
    Postman 使用详解
    Postman用法简介
    伟大架构师的秘密【转载】
    深入理解HTTP协议(转)
  • 原文地址:https://www.cnblogs.com/lishi-jie/p/9960026.html
Copyright © 2011-2022 走看看