zoukankan      html  css  js  c++  java
  • groupby模块

    groupby()把迭代器中相邻的重复元素挑出来放在一起:

    import itertools
    for key, group in itertools.groupby('AAABBBCCAAA'):
    print key, list(group) #因为group是一个迭代器,所以这里要用这里要用list()函数
    
    A ['A', 'A', 'A']
    B ['B', 'B', 'B']
    C ['C', 'C']
    A ['A', 'A', 'A']

    挑选规则是通过函数完成的,只要作用于函数的两个元素返回的值相等,这两个元素就被认为是同一组的,而函数返回值作为组的key

    例子1:将相同国家的人员信息进行归纳

    d1={'name':'Lilei','age':15,'country':'China'}
    d2={'name':'jack','age':19,'country':'USA'}
    d3={'name':'苍老师','age':22,'country':'JP'}
    d4={'name':'tom','age':22,'country':'USA'}
    d5={'name':'lucy','age':22,'country':'USA'}
    d6={'name':'Hanmeimei','age':15,'country':'China'}
    lst=[d1,d2,d3,d4,d5,d6]

    from itertools import groupby
    
    #必须先排序,才可以分组
    lst.sort(key=lambda x:x['country'])
    print(lst)
    # [{'name': 'Lilei', 'age': 15, 'country': 'China'}, {'name': 'Hanmeimei', 'age': 15, 'country': 'China'}, {'name': '苍老师', 'age': 22, 'country': 'JP'}, {'name': 'jack', 'age': 19, 'country': 'USA'}, {'name': 'tom', 'age': 22, 'country': 'USA'}, {'name': 'lucy', 'age': 22, 'country': 'USA'}]
    
    
    lst_g = groupby(lst,key=lambda x:x['country'])
    print(list(lst_g))
    # [('China', <itertools._grouper object at 0x000002ACC7105A90>), ('JP', <itertools._grouper object at 0x000002ACC7105B00>), ('USA', <itertools._grouper object at 0x000002ACC7105EB8>)]
    
    
    for c, g in lst_g:
        print({c:[v for v in g]})
    
    # {'China': [{'name': 'Lilei', 'age': 15, 'country': 'China'}, {'name': 'Hanmeimei', 'age': 15, 'country': 'China'}]}
    # {'JP': [{'name': '苍老师', 'age': 22, 'country': 'JP'}]}
    # {'USA': [{'name': 'jack', 'age': 19, 'country': 'USA'}, {'name': 'tom', 'age': 22, 'country': 'USA'}, {'name': 'lucy', 'age': 22, 'country': 'USA'}]}

    例子2:归纳列表中连续的数字

    from itertools import groupby
    
    lst = [2, 3, 5, 6, 7, 8,1, 11, 12, 13,15,27,28,29]
    
    lst.sort()
    print(lst)
    # [1, 2, 3, 5, 6, 7, 8, 11, 12, 13, 15, 27, 28, 29]
    
    print(list(enumerate(lst)))
    # [(0, 1), (1, 2), (2, 3), (3, 5), (4, 6), (5, 7), (6, 8), (7, 11), (8, 12), (9, 13), (10, 15), (11, 27), (12, 28), (13, 29)]
    #  相连的整数与序号的差值是相等的,所以可以归纳为一组
    
    # for k, g in groupby(enumerate(lst), key=lambda x:x[1]-x[0]):
    #     print(list(g))
    # [(0, 1), (1, 2), (2, 3)]
    # [(3, 5), (4, 6), (5, 7), (6, 8)]
    # [(7, 11), (8, 12), (9, 13)]
    # [(10, 15)]
    # [(11, 27), (12, 28), (13, 29)]
    
    for k, g in groupby(enumerate(lst), key=lambda x:x[1]-x[0]):
        print([v for i,v in g])
        # [1, 2, 3]
        # [5, 6, 7, 8]
        # [11, 12, 13]
        # [15]
        # [27, 28, 29]

    例子3:归纳列表中连续的ip

    import ipaddress
    
    ip_list = [
    '10.16.49.113',
    '10.202.255.127',
    '10.202.255.125',
    '10.202.255.126',
    '10.202.255.145',
    '10.202.255.175',
    '10.202.255.174',
    '10.202.255.144',
    '10.202.255.173'
    ]
    
    ip_list_int = [ipaddress.ip_address(ip) for ip in ip_list]
    ip_list_int.sort()
    # print(ip_list_int)
    
    
    ip_list_int = [int(ipaddress.ip_address(ip)) for ip in ip_list]
    ip_list_int.sort()
    # print(ip_list_int)
    # [168833393, 181075837, 181075838, 181075839, 181075856, 181075857, 181075885, 181075886, 181075887]
    
    rst = []
    
    for i,j in groupby(enumerate(ip_list_int), key=lambda x:x[1]-x[0]):
        # print(list(j))
        # [(0, 168833393)]
        # [(1, 181075837), (2, 181075838), (3, 181075839)]
        # [(4, 181075856), (5, 181075857)]
        # [(6, 181075885), (7, 181075886), (8, 181075887)]
    
        # print([v for k,v in j])
        # [168833393, 181075837, 181075838, 181075839, 181075856, 181075857, 181075885, 181075886, 181075887]
        # [168833393]
        # [181075837, 181075838, 181075839]
        # [181075856, 181075857]
        # [181075885, 181075886, 181075887]
    
        ip_range_list = [v for k,v in j]
        if len(ip_range_list)>1:
            ip_range = ipaddress.summarize_address_range(ipaddress.ip_address(ip_range_list[0]),ipaddress.ip_address(ip_range_list[-1]))
            for ip_summ in ip_range:
                rst.append(str(ip_summ))
        else:
          rst.append(str(ipaddress.ip_address(ip_range_list[0])))
    
    print(rst)
    # ['10.16.49.113', '10.202.255.125/32', '10.202.255.126/31', '10.202.255.144/31', '10.202.255.173/32', '10.202.255.174/31']

     

  • 相关阅读:
    Java核心技术 卷一 笔记四 库类的直接使用
    Java核心技术 卷一 笔记三 大数值及数组
    Java核心技术 卷一 笔记2 字符串的复制
    Java核心技术 卷一 笔记1
    修改css 样式后, hover事件 不生效
    修改 element ui input 输入框 样式不生效问题
    css3 计算属性
    Vue3 改动系列
    浏览器实现,向下滑动 鼠标滚轮,页面横向移动
    linux ceont0s7 vue 打包压缩图片 一直报错
  • 原文地址:https://www.cnblogs.com/dxnui119/p/13079795.html
Copyright © 2011-2022 走看看