zoukankan      html  css  js  c++  java
  • itertools模块

    itertools

    这里主要介绍itertools的常用函数

    accumulate(iterable[, func])

    将一个二元操作的函数作用于一个可迭代对象上,每次循环计算时,函数的两个参数一个是可迭代对象中当前值,另一个是上次计算得到的结果。函数的返回值是一个由每次计算得到的结果组成的可迭代对象。
    相当于如下功能:

    def accumulate(iterable, func=operator.add):
        'Return running totals'
        # accumulate([1,2,3,4,5]) --> 1 3 6 10 15
        # accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120
        it = iter(iterable)
        try:
            total = next(it)
        except StopIteration:
            return
        yield total
        for element in it:
            total = func(total, element)
            yield total
    

    二元操作函数可以是如下:

    • min(): 计算最小值
    • max(): 计算最大值
    • operator.mul(): 叠乘
    • operator.add(): 叠加

    使用示例:

    >>> data = [3, 4, 6, 2, 1, 9, 0, 7, 5, 8]
    >>> list(accumulate(data, operator.mul))     # 叠乘
    [3, 12, 72, 144, 144, 1296, 0, 0, 0, 0]
    >>> list(accumulate(data, max))              # 计算最大值
    [3, 4, 6, 6, 6, 9, 9, 9, 9, 9]
    # 将上一个结果乘1.25,然后加上下一个迭代值
    >>> cashflows = [1000, -90, -90, -90, -90]
    >>> list(accumulate(cashflows, lambda bal, pmt: bal*1.05 + pmt))
    [1000, 960.0, 918.0, 873.9000000000001, 827.5950000000001]
    # 这个示例相当于单目迭代运算
    >>> logistic_map = lambda x, _:  r * x * (1 - x)
    >>> r = 3.8
    >>> x0 = 0.4
    >>> inputs = repeat(x0, 36)     # 初始化值
    >>> [format(x, '.2f') for x in accumulate(inputs, logistic_map)]
    ['0.40', '0.91', '0.30', '0.81', '0.60', '0.92', '0.29', '0.79', '0.63',
     '0.88', '0.39', '0.90', '0.33', '0.84', '0.52', '0.95', '0.18', '0.57',
     '0.93', '0.25', '0.71', '0.79', '0.63', '0.88', '0.39', '0.91', '0.32',
     '0.83', '0.54', '0.95', '0.20', '0.60', '0.91', '0.30', '0.80', '0.60']
    

    chain(*iterables)

    将多个可迭代对象进行合并,相当于如下代码:

    def chain(*iterables):
        # chain('ABC', 'DEF') --> A B C D E F
        for it in iterables:
            for element in it:
                yield element
    

    使用示例:

    >>> from itertools import chain
    >>> chain([1, 2, 3], [4, 5, 6])
    <itertools.chain object at 0x7f751ad90b70>
    >>> a = chain([1, 2, 3], [4, 5, 6])
    >>> for i in a:
    ...     print(i)
    ... 
    1
    2
    3
    4
    5
    6
    

    combinations(iterable, r)

    将可迭代对象中每r个元素按序进行组合,功能相当于:

    def combinations(iterable, r):
        # combinations('ABCD', 2) --> AB AC AD BC BD CD
        # combinations(range(4), 3) --> 012 013 023 123
        pool = tuple(iterable)
        n = len(pool)
        if r > n:
            return
        indices = list(range(r))
        yield tuple(pool[i] for i in indices)
        while True:
            for i in reversed(range(r)):
                if indices[i] != i + n - r:
                    break
            else:
                return
            indices[i] += 1
            for j in range(i+1, r):
                indices[j] = indices[j-1] + 1
            yield tuple(pool[i] for i in indices)
    

    使用示例:

    >>> from itertools import combinations
    >>> comb = combinations('abcd', 3)
    >>> for i in comb:
    ...     print(i)
    ... 
    ('a', 'b', 'c')
    ('a', 'b', 'd')
    ('a', 'c', 'd')
    ('b', 'c', 'd')
    

    combinations_with_replacement(iterable, r)

    按照顺序从可迭代对象中取r个元素进行组合,允许使用重复的元素,功能相当于:

    def combinations_with_replacement(iterable, r):
        # combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC
        pool = tuple(iterable)
        n = len(pool)
        if not n and r:
            return
        indices = [0] * r
        yield tuple(pool[i] for i in indices)
        while True:
            for i in reversed(range(r)):
                if indices[i] != n - 1:
                    break
            else:
                return
            indices[i:] = [indices[i] + 1] * (r - i)
            yield tuple(pool[i] for i in indices)
    

    使用示例:

    >>> from itertools import combinations_with_replacement
    >>> a = combinations_with_replacement('abc', 2)
    >>> for i in a:
    ...     print(i)
    ... 
    ('a', 'a')
    ('a', 'b')
    ('a', 'c')
    ('b', 'b')
    ('b', 'c')
    ('c', 'c')
    

    compress(data, selectors)

    将selectors中为值为True的位置在data对应的值返回,相当于如下代码:

    def compress(data, selectors):
        # compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F
        return (d for d, s in zip(data, selectors) if s)
    

    使用示例:

    >>> from itertools import compress
    >>> a = compress('abcdef', [1, 0, 1, 0, 1, 1])
    >>> for i in a:
    ...     print(i)
    ... 
    a
    c
    e
    f
    

    count(start=0, step=1)

    从start开始每次加step组成一个可迭代对象,相当于:

    def count(start=0, step=1):
        # count(10) --> 10 11 12 13 14 ...
        # count(2.5, 0.5) -> 2.5 3.0 3.5 ...
        n = start
        while True:
            yield n
            n += step
    

    cycle(iterable)

    循环迭代,依次从一个可迭代对象中取元素,当到达最后一个元素之后又返回至第一个元素,相当于:

    def cycle(iterable):
        # cycle('ABCD') --> A B C D A B C D A B C D ...
        saved = []
        for element in iterable:
            yield element
            saved.append(element)
        while saved:
            for element in saved:
                  yield element
    

    dropwhile(predicate, iterable)

    从第一个元素开始,移除满足predicate为True的元素直到遇到使predicate的值为False,返回后面的所有元素。相当于:

    def dropwhile(predicate, iterable):
        # dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1
        iterable = iter(iterable)
        for x in iterable:
            if not predicate(x):
                yield x
                break
        for x in iterable:
            yield x
    

    使用示例:

    >>> from itertools import dropwhile
    >>> dropwhile(lambda x: x<5, [1, 4, 6, 4, 1])
    <itertools.dropwhile object at 0x7f94c3507888>
    >>> for x in dropwhile(lambda x: x<5, [1, 4, 6, 4, 1]):
    ...     print(x)
    ... 
    6
    4
    1
    

    filterfalse(predicate, iterable)

    移除所有使得predicate为False的元素,相当于:

    def filterfalse(predicate, iterable):
        # filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8
        if predicate is None:
            predicate = bool
        for x in iterable:
            if not predicate(x):
                yield x
    

    groupby(iterable, key=None)

    按照key定义的规则对可迭代对象进行分组,相当于:

    class groupby:
        # [k for k, g in groupby('AAAABBBCCDAABBB')] --> A B C D A B
        # [list(g) for k, g in groupby('AAAABBBCCD')] --> AAAA BBB CC D
        def __init__(self, iterable, key=None):
            if key is None:
                key = lambda x: x
            self.keyfunc = key
            self.it = iter(iterable)
            self.tgtkey = self.currkey = self.currvalue = object()
        def __iter__(self):
            return self
        def __next__(self):
            while self.currkey == self.tgtkey:
                self.currvalue = next(self.it)    # Exit on StopIteration
                self.currkey = self.keyfunc(self.currvalue)
            self.tgtkey = self.currkey
            return (self.currkey, self._grouper(self.tgtkey))
        def _grouper(self, tgtkey):
            while self.currkey == tgtkey:
                yield self.currvalue
                try:
                    self.currvalue = next(self.it)
                except StopIteration:
                    return
                self.currkey = self.keyfunc(self.currvalue)
    

    使用示例:

    >>> from itertools import groupby
    >>> a = ['aa', 'ab', 'abc', 'bcd', 'abcde']
    >>> for i, k in groupby(a, len):
    ...     print(i, list(k))
    ... 
    2 ['aa', 'ab']
    3 ['abc', 'bcd']
    5 ['abcde']
    
    >>> from itertools import groupby
    >>> qs = [{'data': 1}, {'data': 2}]
    >>> [(name, list(group)) for name, group in groupby(qs, lambda p: p['data'])]
    [(1, [{'data': 1}]), (2, [{'data': 2}])]
    

    islice(iterable, stop)

    islice(iterable, start, stop[, step])

    通过起始位置和步长从可迭代对象中取出元素,相当于:

    def islice(iterable, *args):
        # islice('ABCDEFG', 2) --> A B
        # islice('ABCDEFG', 2, 4) --> C D
        # islice('ABCDEFG', 2, None) --> C D E F G
        # islice('ABCDEFG', 0, None, 2) --> A C E G
        s = slice(*args)
        it = iter(range(s.start or 0, s.stop or sys.maxsize, s.step or 1))
        try:
            nexti = next(it)
        except StopIteration:
            return
        for i, element in enumerate(iterable):
            if i == nexti:
                yield element
                nexti = next(it)
    

    permutations(iterable, r=None)

    从可迭代对象中取出任意r个元素排列组合,返回所有可能的结果,相当于:

    def permutations(iterable, r=None):
        # permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
        # permutations(range(3)) --> 012 021 102 120 201 210
        pool = tuple(iterable)
        n = len(pool)
        r = n if r is None else r
        if r > n:
            return
        indices = list(range(n))
        cycles = list(range(n, n-r, -1))
        yield tuple(pool[i] for i in indices[:r])
        while n:
            for i in reversed(range(r)):
                cycles[i] -= 1
                if cycles[i] == 0:
                    indices[i:] = indices[i+1:] + indices[i:i+1]
                    cycles[i] = n - i
                else:
                    j = cycles[i]
                    indices[i], indices[-j] = indices[-j], indices[i]
                    yield tuple(pool[i] for i in indices[:r])
                    break
            else:
                return
    

    product(*args, repeat=1)

    返回多个可迭代对象的笛卡尔集,相当于:

    def product(*args, repeat=1):
        # product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy
        # product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111
        pools = [tuple(pool) for pool in args] * repeat
        result = [[]]
        for pool in pools:
            result = [x+[y] for x in result for y in pool]
        for prod in result:
            yield tuple(prod)
    

    使用示例:

    >>> from itertools import product
    >>> a = (1, 2, 3)
    >>> b = ('A', 'B', 'C')
    >>> c = ('d', 'e', 'f')
    >>> pros = product(a, b, c)
    >>> for elem in pros:
    ...     print(elem)
    ... 
    (1, 'A', 'd')
    (1, 'A', 'e')
    (1, 'A', 'f')
    (1, 'B', 'd')
    (1, 'B', 'e')
    (1, 'B', 'f')
    (1, 'C', 'd')
    (1, 'C', 'e')
    (1, 'C', 'f')
    (2, 'A', 'd')
    (2, 'A', 'e')
    (2, 'A', 'f')
    (2, 'B', 'd')
    (2, 'B', 'e')
    (2, 'B', 'f')
    (2, 'C', 'd')
    (2, 'C', 'e')
    (2, 'C', 'f')
    (3, 'A', 'd')
    (3, 'A', 'e')
    (3, 'A', 'f')
    (3, 'B', 'd')
    (3, 'B', 'e')
    (3, 'B', 'f')
    (3, 'C', 'd')
    (3, 'C', 'e')
    (3, 'C', 'f')
    

    repeat(object[, times])

    重复一个对象times次,如果没有定义times则一直重复,相当于:

    def repeat(object, times=None):
        # repeat(10, 3) --> 10 10 10
        if times is None:
            while True:
                yield object
        else:
            for i in range(times):
                yield object
    

    starmap(function, iterable)

    将可迭代对象的每个元素作为参数执行function,相当于:

    def starmap(function, iterable):
        # starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000
        for args in iterable:
            yield function(*args)
    

    takewhile(predicate, iterable)

    从可迭代对象的第一个元素开始,返回满足predicate为True的值,当predicate值为False则终止,相当于:

    def takewhile(predicate, iterable):
        # takewhile(lambda x: x<5, [1,4,6,4,1]) --> 1 4
        for x in iterable:
            if predicate(x):
                yield x
            else:
                break
    

    tee(iterable, n=2)

    从 iterable 创建 n 个独立的迭代器,以元组的形式返回,n 的默认值是 2,相当于:

    def tee(iterable, n=2):
        it = iter(iterable)
        deques = [collections.deque() for i in range(n)]
        def gen(mydeque):
            while True:
                if not mydeque:             # when the local deque is empty
                    try:
                        newval = next(it)   # fetch a new value and
                    except StopIteration:
                        return
                    for d in deques:        # load it to all the deques
                        d.append(newval)
                yield mydeque.popleft()
        return tuple(gen(d) for d in deques)
    

    使用示例:

    >>> from itertools import tee
    >>> iter1, iter2 = tee('abcde')
    >>> list(iter1)
    ['a', 'b', 'c', 'd', 'e']
    >>> list(iter2)
    ['a', 'b', 'c', 'd', 'e']
    >>> tee('abcde', 3)
    (<itertools._tee object at 0x7f94c3507dc8>, <itertools._tee object at 0x7f94c3507d48>, <itertools._tee object at 0x7f94c3507e08>)
    

    zip_longest(*iterables, fillvalue=None)

    依次从每个iterables中取出一个元素进行组合,当短的iterable取完了时用fillvalue进行填充,相当于:

    class ZipExhausted(Exception):
        pass
    
    def zip_longest(*args, **kwds):
        # zip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
        fillvalue = kwds.get('fillvalue')
        counter = len(args) - 1
        def sentinel():
            nonlocal counter
            if not counter:
                raise ZipExhausted
            counter -= 1
            yield fillvalue
        fillers = repeat(fillvalue)
        iterators = [chain(it, sentinel(), fillers) for it in args]
        try:
            while iterators:
                yield tuple(map(next, iterators))
        except ZipExhausted:
            pass
    
  • 相关阅读:
    Python统计词频的几种方式
    GO语言系列之 基本数据类型和操作符
    GO语言系列之 初识go语言
    Python实现聚类算法AP
    最大似然估计
    Breastcancer社区评论下载
    数据可视化之pyecharts
    Requests库
    nginx内置变量
    Tomcat内部结构、工作原理、工作模式和运行模式
  • 原文地址:https://www.cnblogs.com/suraer/p/8444154.html
Copyright © 2011-2022 走看看