zoukankan      html  css  js  c++  java
  • Python 源码剖析(四)【LIST对象】

    四、LIST对象

    1、PyListObject对象

    2、PyListObject的创建与维护

    3、PyListObject 对象缓冲池

    4、Hack PyListObject


    1、PyListObject对象

    PyListObject 对象是变长对象,而且还是一个可变对象:

    [listobject.h] 
    
    typedef struct {
    
        PyObject_VAR_HEAD
    
        /* Vector of pointers to list elements.  list[0] is ob_item[0], etc. */
    
        PyObject **ob_item;
    
        int allocated;
    
    } PyListObject;
    PyObject_VAR_HEAD 中有一个ob_size和allocated,allocated 指申请了内存的大小,ob_size指使用内存的大小,0<=ob_size<=allcated。


    2、PyListObject的创建与维护

    2.1、创建

    唯一创建方法PyList_New:

    listobject.c] 
    
    PyObject* PyList_New(int size)
    {
        PyListObject *op;
        size_t nbytes;
     
        nbytes = size * sizeof(PyObject *);
        /* Check for overflow */
        if (nbytes / sizeof(PyObject *) != (size_t)size)
            return PyErr_NoMemory();
     
        //为PyListObject申请空间
        if (num_free_lists) {
            //使用缓冲池
            num_free_lists--;
            op = free_lists[num_free_lists];
            _Py_NewReference((PyObject *)op);
       } else {
            //缓冲池中没有可用的对象,创建对象
            op = PyObject_GC_New(PyListObject, &PyList_Type);
        }
        //为PyListObject对象中维护的元素列表申请空间
        if (size <= 0)
            op->ob_item = NULL;
        else {
            op->ob_item = (PyObject **) PyMem_MALLOC(nbytes);
            memset(op->ob_item, 0, nbytes);
        }
        op->ob_size = size;
        op->allocated = size;
        _PyObject_GC_TRACK(op);
        return (PyObject *) op;
    }

    先进行检查,再判断缓冲池是否可用,不可则malloc在堆上新建PyListObject。PyListObject缓冲池为free_lists:

    /* Empty list reuse scheme to save calls to malloc and free */ 
    
    #define MAXFREELISTS 80 
    
    static PyListObject *free_lists[MAXFREELISTS]; 
    
    static int num_free_lists = 0; 

    放元素到List指定位置:

    [listobject.c] 
    
    int PyList_SetItem(register PyObject *op, register int i, register PyObject   *newitem)
    {
        register PyObject *olditem;
        register PyObject **p;
        if (!PyList_Check(op)) {
            ……
        }
        if (i < 0 || i >= ((PyListObject *)op) -> ob_size) {
            Py_XDECREF(newitem);
            PyErr_SetString(PyExc_IndexError,
                    "list assignment index out of range");
            return -1;
        }
        p = ((PyListObject *)op) -> ob_item + i;
        olditem = *p;
        *p = newitem;
        Py_XDECREF(olditem);
        return 0;
    }


    2.2、添加

    插入元素:

    [listobject.c] 
    
    int PyList_Insert(PyObject *op, int where, PyObject *newitem)
    {
        ......//类型检查
        return ins1((PyListObject *)op, where, newitem);
    }
    static int ins1(PyListObject *self, int where, PyObject *v)
    {
        int i, n = self->ob_size;
        PyObject **items;
        ......
        if (list_resize(self, n+1) == -1)
            return -1;
    
        if (where < 0) {
            where += n;
            if (where < 0)
                where = 0;
        }
        if (where > n)
            where = n;
        items = self->ob_item;
        for (i = n; --i >= where; )
            items[i+1] = items[i];
        Py_INCREF(v);
        items[where] = v;
        return 0;
    }

    看看list_resize:

    listobject.c]
    static int list_resize(PyListObject *self, int newsize)
    {
        PyObject **items;
        size_t new_allocated;
        int allocated = self->allocated;
     
    
        /* Bypass realloc() when a previous overallocation is large enough
           to accommodate the newsize.  If the newsize falls lower than half
           the allocated size, then proceed with the realloc() to shrink the list.
        */
        if (allocated >= newsize && newsize >= (allocated >> 1)) {
            assert(self->ob_item != NULL || newsize == 0);
            self->ob_size = newsize;
            return 0;
        }
     
    
        /* This over-allocates proportional to the list size, making room
         * for additional growth.  The over-allocation is mild, but is
         * enough to give linear-time amortized behavior over a long
         * sequence of appends() in the presence of a poorly-performing
         * system realloc().
         * The growth pattern is:  0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
         */
        new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6) + newsize;
        if (newsize == 0)
            new_allocated = 0;
        items = self->ob_item;
        if (new_allocated <= ((~(size_t)0) / sizeof(PyObject *)))
            PyMem_RESIZE(items, PyObject *, new_allocated);
        else
            items = NULL;
        if (items == NULL) {
            PyErr_NoMemory();
            return -1;
        }
        self->ob_item = items;
        self->ob_size = newsize;
        self->allocated = new_allocated;
        return 0;
    }

    插入的时候,Python分四种情况处理:

    1.      newsize > ob_size && newsize < allocated :简单调整ob_size值。

    2.      newsize < ob_size && newsize > allocated/2 :简单调整ob_size值。

    3.      newsize < ob_size && newsize < allocated/2 :调用realloc,重新分配空间。

    4.      newsize > ob_size && newsize > allocated :调用realloc,重新分配空间。

    调整完空间后开始插入元素,其中体现负值索引的特性:

    static int ins1(PyListObject *self, int where, PyObject *v)
    {
        ......
        if (where < 0) {
            where += n;
            if (where < 0)
                where = 0;
        }
        if (where > n)
            where = n;
        items = self->ob_item;
        for (i = n; --i >= where; )
            items[i+1] = items[i];
        Py_INCREF(v);
        items[where] = v;
        return 0;
    }

    PyListObject 与 C++中的vector相似。

    再看看list中的append:

    [listobject.c]
    int PyList_Append(PyObject *op, PyObject *newitem)
    {
        if (PyList_Check(op) && (newitem != NULL))
            return app1((PyListObject *)op, newitem);
        PyErr_BadInternalCall();
        return -1;
    }
     
    
    
    static PyObject* listappend(PyListObject *self, PyObject *v)
    {
        if (app1(self, v) == 0)
            Py_RETURN_NONE;
        return NULL;
    }
     
    
    
    static int app1(PyListObject *self, PyObject *v)
    {
        int n = PyList_GET_SIZE(self);
    ......
        if (list_resize(self, n+1) == -1)
            return -1;
     
    
        Py_INCREF(v);
        PyList_SET_ITEM(self, n, v);
        return 0;
    } 

    添加的元素添加在ob_size位置,而不是allocated位置上。

    2.3、删除

    PyListObject的删除remove:

    [listobject.c] 
    
    static PyObject * listremove(PyListObject *self, PyObject *v)
    {
        int i;
        for (i = 0; i < self->ob_size; i++) {
            int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ);
            if (cmp > 0) {
                if (list_ass_slice(self, i, i+1,(PyObject *)NULL) == 0)
                    Py_RETURN_NONE;
                return NULL;
            }
            else if (cmp < 0)
                return NULL;
        }
        PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
        return NULL;
    }

    先用PyObject_RichCompareBool匹配元素是否在list上,在的话用list_ass_slice 删除:

    [listobject.c]
    static int list_ass_slice(PyListObject *a, int ilow, int ihigh, PyObject *v)
    {
       PyObject *recycle_on_stack[8];
        PyObject **recycle = recycle_on_stack; /* will allocate more if needed */ 
    
        PyObject **item;
        int n; /* # of elements in replacement list */
        int norig; /* # of elements in list getting replaced */
        int d; /* Change in size */
    #define b ((PyListObject *)v)
        if (v == NULL)
            n = 0;
    else {
    ……
    }
     
    
        norig = ihigh - ilow;
        d = n - norig;
    item = a->ob_item;
        //[1] 
    
        s = norig * sizeof(PyObject *); 
    
        if (s > sizeof(recycle_on_stack)) { 
    
            recycle = (PyObject **)PyMem_MALLOC(s); 
    
            if (recycle == NULL) { 
    
                PyErr_NoMemory(); 
    
                goto Error; 
    
            } 
    
        } 
    
        memcpy(recycle, &item[ilow], s); 
    
     
    
    //[2] 
    
        if (d < 0) { /* Delete -d items */
            memmove(&item[ihigh+d], &item[ihigh],
    (a->ob_size - ihigh)*sizeof(PyObject *));
            list_resize(a, a->ob_size + d);
            item = a->ob_item;
        }
    ……
    //[3] 
    
    for (k = norig - 1; k >= 0; --k) 
    
            Py_XDECREF(recycle[k]);
    #undef b
    }

    当v为NULL时执行删除动作,删除个数为ihigh-ilow=1,最后通过memove执行删除元素。PyListObject删除元素时会引起内存搬移动作。

    list_ass_slice不仅仅是用做删除元素,它还可以进行插入元素的动作:

    a[ilow:ihigh] = v if v != NULL.

    del a[ilow:ihigh] if v == NULL. 


    3、PyListObject 对象缓冲池

    刚刚提到的free_lists是在PyListObject销毁时产生的:

    [listobject.c] 
    
    static void list_dealloc(PyListObject *op)
    
    {
    
        int i;
    
        PyObject_GC_UnTrack(op);
    
        Py_TRASHCAN_SAFE_BEGIN(op)
    
        if (op->ob_item != NULL) {
    
            /* Do it backwards, for Christian Tismer.
    
               There's a simple test case where somehow this reduces
    
               thrashing when a *very* large list is created and
    
               immediately deleted. */
    
            i = op->ob_size;
    
            while (--i >= 0) {
    
                Py_XDECREF(op->ob_item[i]);
    
            }
    
            PyMem_FREE(op->ob_item);
    
        }
    
        if (num_free_lists < MAXFREELISTS && PyList_CheckExact(op))
    
            free_lists[num_free_lists++] = op;
    
        else
    
            op->ob_type->tp_free((PyObject *)op);
    
        Py_TRASHCAN_SAFE_END(op)
    
    }

    在销毁PyListObject时,先减少list中的每个元素的引用计数,然后判断free_lists是否满了,没满就加上要删除的PyListObject,而下次创建PyListObject时,会优先从free_lists获取内存。而删除对象原来拥有的PyObject*列表,会因引用计数减少各归各处。


    4、Hack PyListObject

    可在list_print中添加:

    printf("
    allocated=%d, ob_size=%d
    ", op->allocated, op->ob_size); 

    观察PyListObject对内存的管理。

    也可打印num_free_lists观察增删元素时对num_free_lists影响。

  • 相关阅读:
    mmseg4j 中文分词 1.6 版发布
    中文分词 mmseg4j 在 lucene 中的使用示例
    Solr Multicore 试用小记
    学习Javascript和Jquery必备资料
    前端开发 IE 中的常用调试工具
    jQuery学习教程(九):动画
    【jquery插件】SELECT美化,下拉框美化
    Jquery 基础实例源码分享
    关于Jquery显示隐藏的分析
    jQuery判断一个元素是否为另一个元素的子元素(或者其本身)
  • 原文地址:https://www.cnblogs.com/GO-NO-1/p/6516017.html
Copyright © 2011-2022 走看看