zoukankan      html  css  js  c++  java
  • Python自动化开发从浅入深-语言基础(collection)

    -- collection是对内置数据类型的一种扩充,其主要扩充类型包括:

      1.namedtuple(): 生成可以使用名字来访问元素内容的tuple子类,以增强可读性。

    def namedtuple(typename, field_names, verbose=False, rename=False):
        """Returns a new subclass of tuple with named fields.
       返回一个新的命名域元组子类,typename为类型名,field_names为数据变量域名 >>> Point = namedtuple('Point', ['x', 'y']) #定义一个命名元组,类型为Point,数据变量域为['x','y'] >>> Point.__doc__ # docstring for the new class 'Point(x, y)' >>> p = Point(11, y=22) # 定义一个Point,x=11,y=22,(instantiate with positional args or keywords) >>> p[0] + p[1] # 也可以用下标,p[0]即x,p[1]即y,(indexable like a plain tuple) 33 >>> x, y = p # 也可以这样赋值,p即p[0]和p[1]或p.x和p.y,(unpack like a regular tuple) >>> x, y (11, 22) >>> p.x + p.y # fields also accessable by name 33 >>> d = p._asdict() # convert to a dictionary >>> d['x'] 11 >>> Point(**d) # convert from a dictionary Point(x=11, y=22) >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields Point(x=100, y=22)

     

      2.deque: 双端队列,可以快速的从另外一侧追加和推出对象。deque其实是 double-ended queue 的缩写,翻译过来就是双端队列,它最大的好处就是实现了从队列 头部快速增加和取出对象: .popleft(), .appendleft() 。

    从_collection模块得到:

    class
    deque(object): """ deque([iterable[, maxlen]]) --> deque object A list-like sequence optimized for data accesses near its endpoints. """ def append(self, *args, **kwargs): # real signature unknown """ Add an element to the right side of the deque.从队列右边增加一个元素
        如:
          d = deque()
          d.append('1')
          d.append('2')
          d.append('3')
          len(d)
          d[0]
          d[-1]
         print(d)
         print("d[0]=%s,d[1]=%s,d[2]=%s"%(d[0],d[1],d[2]))
        结果
        deque(['1', '2', '3'])
        d[0]=1,d[1]=2,d[2]=3

        """
            pass
    
        def appendleft(self, *args, **kwargs): # real signature unknown
            """ Add an element to the left side of the deque. 从队列左边增加一个元素"""
            pass
    
        def clear(self, *args, **kwargs): # real signature unknown
            """ Remove all elements from the deque. 清除所有队列数据"""
            pass
    
        def copy(self, *args, **kwargs): # real signature unknown
            """ Return a shallow copy of a deque. 返回一个浅拷贝队列"""
            pass
    
        def count(self, value): # real signature unknown; restored from __doc__
            """ D.count(value) -> integer -- return number of occurrences of value """
            return 0
    
        def extend(self, *args, **kwargs): # real signature unknown
            """ Extend the right side of the deque with elements from the iterable 从右边扩充队列值"""
            pass
    
        def extendleft(self, *args, **kwargs): # real signature unknown
            """ Extend the left side of the deque with elements from the iterable """
            pass
    
        def index(self, value, start=None, stop=None): # real signature unknown; restored from __doc__
            """
            D.index(value, [start, [stop]]) -> integer -- return first index of value.返回值得第一个下标
            Raises ValueError if the value is not present.
            """
            return 0
    
        def insert(self, index, p_object): # real signature unknown; restored from __doc__
            """ D.insert(index, object) -- insert object before index """
            pass
    
        def pop(self, *args, **kwargs): # real signature unknown
            """ Remove and return the rightmost element. """
            pass
    
        def popleft(self, *args, **kwargs): # real signature unknown
            """ Remove and return the leftmost element. """
            pass
    
        def remove(self, value): # real signature unknown; restored from __doc__
            """ D.remove(value) -- remove first occurrence of value. """
            pass
    
        def reverse(self): # real signature unknown; restored from __doc__
            """ D.reverse() -- reverse *IN PLACE* """
            pass
    
        def rotate(self, *args, **kwargs): # real signature unknown
            """ Rotate the deque n steps to the right (default n=1).  If n is negative, rotates left. """
            pass

      3.Counter: 计数器,主要用来计数,是对字典的一种扩充。

      下面是Counter类,从dict继承而来。

    class Counter(dict):
        '''Dict subclass for counting hashable items.  Sometimes called a bag
        or multiset.  Elements are stored as dictionary keys and their counts
        are stored as dictionary values.
    
        >>> c = Counter('abcdeabcdabcaba')  # 从字串中计算元素个数
      Counter({'a': 5, 'b': 4, 'c': 3, 'd': 2, 'e': 1}) >>> c.most_common(3) # 选出3个元素最多的值 [('a', 5), ('b', 4), ('c', 3)] >>> sorted(c) # 对每个独立的元素进行列表排序 ['a', 'b', 'c', 'd', 'e'] >>> ''.join(sorted(c.elements())) # 按排序列出重复元素 'aaaaabbbbcccdde' >>> sum(c.values()) # 求出字串元素的总个数 15 >>> c['a'] # 计算c字串中a元素的个数 5
    #遍历'shazam'字串,为每个遍历到的元素数量加1,所以总的a元素数量为7
       >>> for elem in 'shazam': # update counts from an iterable ... c[elem] += 1 # by adding 1 to each element's count >>> c['a'] # now there are seven 'a' 7   
       >>> del c['b'] # 删除所有的b元素 >>> c['b'] # now there are zero 'b' 0 >>> d = Counter('simsalabim') # 生成一个新的计数器 >>> c.update(d) # 将新的计数器d加到原来的计数器c中 >>> c['a'] # 此时计算的a元素为9个 9 >>> c.clear() # 清空计数器 >>> c Counter() Note: If a count is set to zero or reduced to zero, it will remain in the counter until the entry is deleted or the counter is cleared: >>> c = Counter('aaabbc') >>> c['b'] -= 2 # 将b元素减少2个 >>> c.most_common() # 此时b仍然存在,但计数数量为0 [('a', 3), ('c', 1), ('b', 0)]
    '''

      

      4.OrderedDict: 有序字典 

    class OrderedDict(dict):
        'Dictionary that remembers insertion order'
        # An inherited dict maps keys to values.
        # The inherited dict provides __getitem__, __len__, __contains__, and get.
        # The remaining methods are order-aware.
        # Big-O running times for all methods are the same as regular dictionaries.
    
        # The internal self.__map dict maps keys to links in a doubly linked list.
        # The circular doubly linked list starts and ends with a sentinel element.
        # The sentinel element never gets deleted (this simplifies the algorithm).
        # The sentinel is in self.__hardroot with a weakref proxy in self.__root.
        # The prev links are weakref proxies (to prevent circular references).
        # Individual links are kept alive by the hard reference in self.__map.
        # Those hard references disappear when a key is deleted from an OrderedDict.

    def clear(self):
    'od.clear() -> None. Remove all items from od.'
    root = self.__root
    root.prev = root.next = root
    self.__map.clear()
    dict.clear(self)

    def popitem(self, last=True):
    '''od.popitem() -> (k, v), return and remove a (key, value) pair.
    Pairs are returned in LIFO order if last is true or FIFO order if false.

    '''
    if not self:
    raise KeyError('dictionary is empty')
    root = self.__root
    if last:
    link = root.prev
    link_prev = link.prev
    link_prev.next = root
    root.prev = link_prev
    else:
    link = root.next
    link_next = link.next
    root.next = link_next
    link_next.prev = root
    key = link.key
    del self.__map[key]
    value = dict.pop(self, key)
    return key, value

    def move_to_end(self, key, last=True):
    '''Move an existing element to the end (or beginning if last==False).

    Raises KeyError if the element does not exist.
    When last=True, acts like a fast version of self[key]=self.pop(key).

    '''
    link = self.__map[key]
    link_prev = link.prev
    link_next = link.next
    link_prev.next = link_next
    link_next.prev = link_prev
    root = self.__root
    if last:
    last = root.prev
    link.prev = last
    link.next = root
    last.next = root.prev = link
    else:
    first = root.next
    link.prev = root
    link.next = first
    root.next = first.prev = link

    def __sizeof__(self):
    sizeof = _sys.getsizeof
    n = len(self) + 1 # number of links including root
    size = sizeof(self.__dict__) # instance dictionary
    size += sizeof(self.__map) * 2 # internal dict and inherited dict
    size += sizeof(self.__hardroot) * n # link objects
    size += sizeof(self.__root) * n # proxy objects
    return size

    update = __update = MutableMapping.update

    def keys(self):
    "D.keys() -> a set-like object providing a view on D's keys"
    return _OrderedDictKeysView(self)

    def items(self):
    "D.items() -> a set-like object providing a view on D's items"
    return _OrderedDictItemsView(self)

    def values(self):
    "D.values() -> an object providing a view on D's values"
    return _OrderedDictValuesView(self)

    __ne__ = MutableMapping.__ne__

    __marker = object()

    def pop(self, key, default=__marker):
    '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
    value. If key is not found, d is returned if given, otherwise KeyError
    is raised.

    '''
    if key in self:
    result = self[key]
    del self[key]
    return result
    if default is self.__marker:
    raise KeyError(key)
    return default

    def setdefault(self, key, default=None):
    'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
    if key in self:
    return self[key]
    self[key] = default
    return default

    @_recursive_repr()
    def __repr__(self):
    'od.__repr__() <==> repr(od)'
    if not self:
    return '%s()' % (self.__class__.__name__,)
    return '%s(%r)' % (self.__class__.__name__, list(self.items()))

    def __reduce__(self):
    'Return state information for pickling'
    inst_dict = vars(self).copy()
    for k in vars(OrderedDict()):
    inst_dict.pop(k, None)
    return self.__class__, (), inst_dict or None, None, iter(self.items())

    def copy(self):
    'od.copy() -> a shallow copy of od'
    return self.__class__(self)

    @classmethod
    def fromkeys(cls, iterable, value=None):
    '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
    If not specified, the value defaults to None.

    '''
    self = cls()
    for key in iterable:
    self[key] = value
    return self

    def __eq__(self, other):
    '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
    while comparison to a regular mapping is order-insensitive.

    '''
    if isinstance(other, OrderedDict):
    return dict.__eq__(self, other) and all(map(_eq, self, other))
    return dict.__eq__(self, other)


    try:
    from _collections import OrderedDict
    except ImportError:
    # Leave the pure Python version in place.
    pass
     

       

    5.defaultdict: 带有默认值的字典

      我们都知道,在使用Python原生的数据结构dict的时候,如果用 d[key] 这样的方式访问, 当指定的key不存在时,是会抛出KeyError异常的。

    但是,如果使用defaultdict,只要你传入一个默认的工厂方法,那么请求一个不存在的key时, 便会调用这个工厂方法使用其结果来作为这个key的默认值。

    默认值可以很方便

    众所周知,在Python中如果访问字典中不存在的键,会引发KeyError异常(JavaScript中如果对象中不存在某个属性,则返回undefined)。但是有时候,字典中的每个键都存在默认值是非常方便的。例如下面的例子:

    strings = ('puppy', 'kitten', 'puppy', 'puppy',
               'weasel', 'puppy', 'kitten', 'puppy')
    counts = {}
    
    for kw in strings:
        counts[kw] += 1

    该例子统计strings中某个单词出现的次数,并在counts字典中作记录。单词每出现一次,在counts相对应的键所存的值数字加1。但是事实上,运行这段代码会抛出KeyError异常,出现的时机是每个单词第一次统计的时候,因为Python的dict中不存在默认值的说法,可以在Python命令行中验证:

    >>> counts = dict()
    >>> counts
    {}
    >>> counts['puppy'] += 1
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    KeyError: 'puppy'

     

    使用判断语句检查

    既然如此,首先可能想到的方法是在单词第一次统计的时候,在counts中相应的键存下默认值1。这需要在处理的时候添加一个判断语句:

    strings = ('puppy', 'kitten', 'puppy', 'puppy',
               'weasel', 'puppy', 'kitten', 'puppy')
    counts = {}
    
    for kw in strings:
        if kw not in counts:
            counts[kw] = 1
        else:
            counts[kw] += 1
    
    # counts:
    # {'puppy': 5, 'weasel': 1, 'kitten': 2}

    使用dict.setdefault()方法

    也可以通过dict.setdefault()方法来设置默认值:

    strings = ('puppy', 'kitten', 'puppy', 'puppy',
               'weasel', 'puppy', 'kitten', 'puppy')
    counts = {}
    
    for kw in strings:
        counts.setdefault(kw, 0)
        counts[kw] += 1 # 原PPT中这里有一个笔误

    dict.setdefault()方法接收两个参数,第一个参数是健的名称,第二个参数是默认值。假如字典中不存在给定的键,则返回参数中提供的默认值;反之,则返回字典中保存的值。利用dict.setdefault()方法的返回值可以重写for循环中的代码,使其更加简洁:

    strings = ('puppy', 'kitten', 'puppy', 'puppy',
               'weasel', 'puppy', 'kitten', 'puppy')
    counts = {}
    
    for kw in strings:
        counts[kw] = counts.setdefault(kw, 0) + 1

    使用collections.defaultdict

    以上的方法虽然在一定程度上解决了dict中不存在默认值的问题,但是这时候我们会想,有没有一种字典它本身提供了默认值的功能呢?答案是肯定的,那就是collections.defaultdict

    defaultdict类就好像是一个dict,但是它是使用一个类型来初始化的:

    >>> from collections import defaultdict
    >>> dd = defaultdict(list)
    >>> dd
    defaultdict(<type 'list'>, {})

    defaultdict类的初始化函数接受一个类型作为参数,当所访问的键不存在的时候,可以实例化一个值作为默认值:

    >>> dd['foo']
    []
    >>> dd
    defaultdict(<type 'list'>, {'foo': []})
    >>> dd['bar'].append('quux')
    >>> dd
    defaultdict(<type 'list'>, {'foo': [], 'bar': ['quux']})

    需要注意的是,这种形式的默认值只有在通过dict[key]或者dict.__getitem__(key)访问的时候才有效,这其中的原因在下文会介绍。

    >>> from collections import defaultdict
    >>> dd = defaultdict(list)
    >>> 'something' in dd
    False
    >>> dd.pop('something')
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    KeyError: 'pop(): dictionary is empty'
    >>> dd.get('something')
    >>> dd['something']
    []

    defaultdict类除了接受类型名称作为初始化函数的参数之外,还可以使用任何不带参数的可调用函数,到时该函数的返回结果作为默认值,这样使得默认值的取值更加灵活。下面用一个例子来说明,如何用自定义的不带参数的函数zero()作为defaultdict类的初始化函数的参数:

    >>> from collections import defaultdict
    >>> def zero():
    ...     return 0
    ...
    >>> dd = defaultdict(zero)
    >>> dd
    defaultdict(<function zero at 0xb7ed2684>, {})
    >>> dd['foo']
    0
    >>> dd
    defaultdict(<function zero at 0xb7ed2684>, {'foo': 0})

    利用collections.defaultdict来解决最初的单词统计问题,代码如下:

    from collections import defaultdict
    
    strings = ('puppy', 'kitten', 'puppy', 'puppy',
               'weasel', 'puppy', 'kitten', 'puppy')
    counts = defaultdict(lambda: 0)  # 使用lambda来定义简单的函数
    
    for s in strings:
        counts[s] += 1

    defaultdict类是如何实现的

    通过上面的内容,想必大家已经了解了defaultdict类的用法,那么在defaultdict类中又是如何来实现默认值的功能呢?这其中的关键是使用了看__missing__()这个方法:

    >>> from collections import defaultdict
    >>> print defaultdict.__missing__.__doc__
    __missing__(key) # Called by __getitem__ for missing key; pseudo-code:
      if self.default_factory is None: raise KeyError(key)
      self[key] = value = self.default_factory()
      return value

    通过查看__missing__()方法的docstring,可以看出当使用__getitem__()方法访问一个不存在的键时(dict[key]这种形式实际上是__getitem__()方法的简化形式),会调用__missing__()方法获取默认值,并将该键添加到字典中去。

    关于__missing__()方法的具体介绍可以参考Python官方文档中的"Mapping Types — dict"一节。

    文档中介绍,从2.5版本开始,如果派生自dict的子类定义了__missing__()方法,当访问不存在的键时,dict[key]会调用__missing__()方法取得默认值。

    从中可以看出,虽然dict支持__missing__()方法,但是在dict本身是不存在这个方法的,而是需要在派生的子类中自行实现这个方法。可以简单的验证这一点:

    >>> print dict.__missing__.__doc__
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    AttributeError: type object 'dict' has no attribute '__missing__'

    同时,我们可以进一步的做实验,定义一个子类Missing并实现__missing__()方法:

    >>> class Missing(dict):
    ...     def __missing__(self, key):
    ...         return 'missing'
    ...
    >>> d = Missing()
    >>> d
    {}
    >>> d['foo']
    'missing'
    >>> d
    {}

    返回结果反映了__missing__()方法确实发挥了作用。在此基础上,我们稍许修改__missing__()方法,使得该子类同defautldict类一样为不存在的键设置一个默认值:

    >>> class Defaulting(dict):
    ...     def __missing__(self, key):
    ...         self[key] = 'default'
    ...         return 'default'
    ...
    >>> d = Defaulting()
    >>> d
    {}
    >>> d['foo']
    'default'
    >>> d
    {'foo': 'default'}

    在旧版本的Python中实现类defaultdict的功能

    defaultdict类是从2.5版本之后才添加的,在一些旧版本中并不支持它,因此为旧版本实现一个兼容的defaultdict类是必要的。这其实很简单,虽然性能可能未必如2.5版本中自带的defautldict类好,但在功能上是一样的。

    首先,__getitem__()方法需要在访问键失败时,调用__missing__()方法:

    class defaultdict(dict):
        def __getitem__(self, key):
            try:
                return dict.__getitem__(self, key)
            except KeyError:
                return self.__missing__(key)

    其次,需要实现__missing__()方法用来设置默认值:

    class defaultdict(dict):
        def __getitem__(self, key):
            try:
                return dict.__getitem__(self, key)
            except KeyError:
                return self.__missing__(key)
    
        def __missing__(self, key):
            self[key] = value = self.default_factory()
            return value

    然后,defaultdict类的初始化函数__init__()需要接受类型或者可调用函数参数:

    class defaultdict(dict):
        def __init__(self, default_factory=None, *a, **kw):
            dict.__init__(self, *a, **kw)
            self.default_factory = default_factory
    
        def __getitem__(self, key):
            try:
                return dict.__getitem__(self, key)
            except KeyError:
                return self.__missing__(key)
    
        def __missing__(self, key):
            self[key] = value = self.default_factory()
            return value

    最后,综合以上内容,通过以下方式完成兼容新旧Python版本的代码:

    try:
        from collections import defaultdict
    except ImportError:
        class defaultdict(dict):
          def __init__(self, default_factory=None, *a, **kw):
              dict.__init__(self, *a, **kw)
              self.default_factory = default_factory
    
          def __getitem__(self, key):
              try:
                  return dict.__getitem__(self, key)
              except KeyError:
                  return self.__missing__(key)
    
          def __missing__(self, key):
              self[key] = value = self.default_factory()
              return value
    class defaultdict(dict):
        """
        defaultdict(default_factory[, ...]) --> dict with default factory
        
        The default factory is called without arguments to produce
        a new value when a key is not present, in __getitem__ only.
        A defaultdict compares equal to a dict with the same items.
        All remaining arguments are treated the same as if they were
        passed to the dict constructor, including keyword arguments.
        """
        def copy(self): # real signature unknown; restored from __doc__
            """ D.copy() -> a shallow copy of D. """
            pass
    
        def __copy__(self, *args, **kwargs): # real signature unknown
            """ D.copy() -> a shallow copy of D. """
            pass
    
        def __getattribute__(self, *args, **kwargs): # real signature unknown
            """ Return getattr(self, name). """
            pass
    
        def __init__(self, default_factory=None, **kwargs): # known case of _collections.defaultdict.__init__
            """
            defaultdict(default_factory[, ...]) --> dict with default factory
            
            The default factory is called without arguments to produce
            a new value when a key is not present, in __getitem__ only.
            A defaultdict compares equal to a dict with the same items.
            All remaining arguments are treated the same as if they were
            passed to the dict constructor, including keyword arguments.
            
            # (copied from class doc)
            """
            pass
    
        def __missing__(self, key): # real signature unknown; restored from __doc__
            """
            __missing__(key) # Called by __getitem__ for missing key; pseudo-code:
              if self.default_factory is None: raise KeyError((key,))
              self[key] = value = self.default_factory()
              return value
            """
            pass
    
        def __reduce__(self, *args, **kwargs): # real signature unknown
            """ Return state information for pickling. """
            pass
    
        def __repr__(self, *args, **kwargs): # real signature unknown
            """ Return repr(self). """
            pass
    
        default_factory = property(lambda self: object(), lambda self, v: None, lambda self: None)  # default
        """Factory for default value called by __missing__()."""
  • 相关阅读:
    LeetCode 32. 最长有效括号(Longest Valid Parentheses)
    LeetCode 141. 环形链表(Linked List Cycle)
    LeetCode 160. 相交链表(Intersection of Two Linked Lists)
    LeetCode 112. 路径总和(Path Sum)
    LeetCode 124. 二叉树中的最大路径和(Binary Tree Maximum Path Sum)
    LightGBM新特性总结
    sql service 事务与锁
    C#泛型实例详解
    C# 中的委托和事件(详解)
    C# DateTime日期格式化
  • 原文地址:https://www.cnblogs.com/whiggzhaohong/p/5180094.html
Copyright © 2011-2022 走看看