zoukankan      html  css  js  c++  java
  • STL源码笔记1 —— allocators

    STL源码笔记1 —— allocators

    简述

    allocators是STL中很重要的一个幕后英雄的角色,STL中的容器在使用过程中需要不断的放元素进去和取元素出来,而在此过程中,如何更高效的申请和释放内存是十分影响STL容器的性能的。

    operator new() 和 malloc()

    首先内存的分配动作,一层层调用下去,最终到了CRT的层面上都是调用malloc()来分配,而malloc再根据所在的操作系统,调用不同的操作系统api才能真正的拿到内存。下面是vs2015和gcc 2.95的源码,里面的 operator new 最终调用 malloc 进行内存分配:

    //VS2015中,Microsoft Visual Studio 14.0VCcrtsrcvcruntime
    ew_debug.cpp中的源码
    //debug模式的_malloc_dbg
    void* __CRTDECL operator new(
        size_t const size,
        int const    block_use,
        char const*  file_name,
        int const    line_number
        )
    {
        for (;;)
        {
            if (void* const block = _malloc_dbg(size, block_use, file_name, line_number))
            {
                return block;
            }
    
            if (_callnewh(size) == 0)
            {
                if (size == SIZE_MAX)
                {
                    __scrt_throw_std_bad_array_new_length();
                }
                else
                {
                    __scrt_throw_std_bad_alloc();
                }
            }
        }
    }
    
    //VS2015中,Microsoft Visual Studio 14.0VCcrtsrclinkopts
    othrownew.cpp中的源码
    //不抛异常的new 直接调用malloc
    void* operator new(size_t size)
    {
        for (;;)
        {
            if (void* const block = malloc(size))
                return block;
    
            if (_callnewh(size) == 0)
                return nullptr;
    
            // The new handler was successful; try to allocate again...
        }
    }
    
    
    //在gcc 2.95里面 gcc-2.95.1gcccp
    ew1.cc
    void * operator new (size_t sz, const std::nothrow_t&) throw()
    {
      void *p;
    
      /* malloc (0) is unpredictable; avoid it.  */
      if (sz == 0)
        sz = 1;
      p = (void *) malloc (sz);
      while (p == 0)
        {
          new_handler handler = __new_handler;
          if (! handler)
    	return 0;
          try
    	{
    	  handler ();
    	}
          catch (bad_alloc &)
    	{
    	  return 0;
    	}
    
          p = (void *) malloc (sz);
        }
    
      return p;
    }
    
    void * operator new (size_t sz) throw (std::bad_alloc)
    {
      void *p;
    
      /* malloc (0) is unpredictable; avoid it.  */
      if (sz == 0)
        sz = 1;
      p = (void *) malloc (sz);
      while (p == 0)
        {
          new_handler handler = __new_handler;
          if (! handler)
    	throw bad_alloc ();
          handler ();
          p = (void *) malloc (sz);
        }
    
      return p;
    }
    

    然而malloc分配的内存如果在debug模式下,会有许多额外的信息(包括大小、前后块指针、使用情况等信息),而即使是在release模式下,也至少会有标识大小的字节被占用。那么每次申请的内存就会有额外的开销,如果申请的空间很小,额外的开销占比就会很大。因此,产生了一种使用内存管理,减少这种开销的想法,这也是STL的allocators分配器最核心的功能。

    VS2015 中的allocator

    在VS中,几个容器的设计是这样的:

    //VS中的vector
    template<class _Ty,
    	class _Alloc = allocator<_Ty> >
    	class vector
    		: public _Vector_alloc<_Vec_base_types<_Ty, _Alloc> >
    	{	// varying size array of values
            ...
        };
    
    //VS中的list
    template<class _Ty,
    	class _Alloc = allocator<_Ty> >
    	class list
    		: public _List_buy<_Ty, _Alloc>
    	{	// bidirectional linked list
            ...
        };
    

    可以看到在VS的容器里,默认使用的是allocator这个class,那么再去 Microsoft Visual Studio 14.0VCincludexmemory0 观察allocator的实现:

    template<class _Ty>
    	class allocator
    	{	// generic allocator for objects of class _Ty
    public:
        ...
        
    	_DECLSPEC_ALLOCATOR pointer allocate(size_type _Count)
    		{	// allocate array of _Count elements
    		return (static_cast<pointer>(_Allocate(_Count, sizeof (_Ty))));
    		}
    
    	_DECLSPEC_ALLOCATOR pointer allocate(size_type _Count, const void *)
    		{	// allocate array of _Count elements, ignore hint
    		return (allocate(_Count));
    		}
        };
    

    allocate调用了_Allocate,再去看_Allocate的实现:

    	_DECLSPEC_ALLOCATOR void *_Allocate(size_t _Count, size_t _Sz,
    		bool _Try_aligned_allocation = true)
    	{	// allocate storage for _Count elements of size _Sz
    	void *_Ptr = 0;
    
    	if (_Count == 0)
    		return (_Ptr);
    
    	// check overflow of multiply
    	if ((size_t)(-1) / _Sz < _Count)
    		_Xbad_alloc();	// report no memory
    	const size_t _User_size = _Count * _Sz;
    
     #if defined(_M_IX86) || defined(_M_X64)
    	if (_Try_aligned_allocation
    		&& _BIG_ALLOCATION_THRESHOLD <= _User_size)
    		{	// allocate large block
    		static_assert(sizeof (void *) < _BIG_ALLOCATION_ALIGNMENT,
    			"Big allocations should at least match vector register size");
    		const size_t _Block_size = _NON_USER_SIZE + _User_size;
    		if (_Block_size <= _User_size)
    			_Xbad_alloc();	// report no memory
    		const uintptr_t _Ptr_container =
    			reinterpret_cast<uintptr_t>(::operator new(_Block_size));
    		_SCL_SECURE_ALWAYS_VALIDATE(_Ptr_container != 0);
    		_Ptr = reinterpret_cast<void *>((_Ptr_container + _NON_USER_SIZE)
    			& ~(_BIG_ALLOCATION_ALIGNMENT - 1));
    		static_cast<uintptr_t *>(_Ptr)[-1] = _Ptr_container;
    
     #ifdef _DEBUG
    		static_cast<uintptr_t *>(_Ptr)[-2] = _BIG_ALLOCATION_SENTINEL;
     #endif /* _DEBUG */
    		}
    	else
     #endif /* defined(_M_IX86) || defined(_M_X64) */
    
    		{	// allocate normal block
    		_Ptr = ::operator new(_User_size);
    		_SCL_SECURE_ALWAYS_VALIDATE(_Ptr != 0);
    		}
    	return (_Ptr);
    	}
    

    里面就是调用_Xbad_alloc()或者 ::operator new 来实现内存的分配,所以实际上VS并没有对STL的allocator做特别的优化。
    同样的,通过查看 deallocate() 对应的源代码,也看出来VS在释放的时候也只是一个对 operator delete 的封装而已。因此,可以认为VS在这一方面并没有做特殊设计。

    GCC2.9 中的allocator

    同样,我们也去看GCC中容器的实现:

    //gcc-2.95.1libstdc++stlstl_vector.h
    template <class _Tp, class _Alloc = __STL_DEFAULT_ALLOCATOR(_Tp) >
    class vector : protected _Vector_base<_Tp, _Alloc> 
    {
        ···
    };
    
    //gcc-2.95.1libstdc++stlstl_list.h
    template <class _Tp, class _Alloc = __STL_DEFAULT_ALLOCATOR(_Tp) >
    class list : protected _List_base<_Tp, _Alloc> {
        ...
    };
    
    //在stl_config.h中
    # ifndef __STL_DEFAULT_ALLOCATOR
    #   ifdef __STL_USE_STD_ALLOCATORS
    #     define __STL_DEFAULT_ALLOCATOR(T) allocator<T>
    #   else
    #     define __STL_DEFAULT_ALLOCATOR(T) alloc
    #   endif
    # endif
    

    可以看到,宏 __STL_DEFAULT_ALLOCATOR,如果没有特别说明的情况下,是使用alloc的,那么我们来看看alloc类的实现:

    //默认情况下 __NODE_ALLOCATOR_THREADS 为false,所以是单线程的
    # ifdef _NOTHREADS
    #   define __NODE_ALLOCATOR_LOCK
    #   define __NODE_ALLOCATOR_UNLOCK
    #   define __NODE_ALLOCATOR_THREADS false
    #   define __VOLATILE
    # endif
    typedef __default_alloc_template<__NODE_ALLOCATOR_THREADS, 0> alloc;
    
    template <bool threads, int inst>
    class __default_alloc_template {
        ...
    private:
        enum {_ALIGN = 8};
        enum {_MAX_BYTES = 128};
        enum {_NFREELISTS = _MAX_BYTES/_ALIGN};
    
        union _Obj {
            union _Obj* _M_free_list_link;
            char _M_client_data[1];    /* The client sees this.        */
        };
    
        static _Obj* __VOLATILE _S_free_list[_NFREELISTS];
        // Chunk allocation state.
        static char* _S_start_free;
        static char* _S_end_free;
        static size_t _S_heap_size;
    
        ...
    };
    
    

    借用侯捷老师在课程中用的图,其实在gcc的alloc中,设计了一个16条链表的数组 _S_free_list ,每一条链表负责某个大小的特定区块的分配,分别负责从8个字节到128个字节的区块。当容器需要分配内存的时候,都是从这个分配器中去申请,然后大小向上调整到8的倍数(例如120字节会调整到128字节),然后到分配器里再去对应的链表中搜索是否有空闲的区块(例如128字节需要到 __S_free_list[15] 里面搜索),如果该链表中没有挂着空闲内存,才会通过malloc向系统申请一大块内存再切割。

    avatar

    同样,再往下看,看看该类中的 allocate 和 deallocate的实现:

    template <bool threads, int inst>
    class __default_alloc_template {
        ...
    
          static  size_t _S_freelist_index(size_t __bytes) {
            return (((__bytes) + _ALIGN-1)/_ALIGN - 1);
          }
    
        public:
          /* __n must be > 0      */
          static void* allocate(size_t __n)
          {
            _Obj* __VOLATILE* __my_free_list;
            _Obj* __RESTRICT __result;
    
            if (__n > (size_t) _MAX_BYTES) {
                return(malloc_alloc::allocate(__n));
            }
            __my_free_list = _S_free_list + _S_freelist_index(__n);
    
            __result = *__my_free_list;
            if (__result == 0) {
                void* __r = _S_refill(_S_round_up(__n));
                return __r;
            }
            *__my_free_list = __result -> _M_free_list_link;
            return (__result);
          };
    
          /* __p may not be 0 */
          static void deallocate(void* __p, size_t __n)
          {
            _Obj* __q = (_Obj*)__p;
            _Obj* __VOLATILE* __my_free_list;
    
            if (__n > (size_t) _MAX_BYTES) {
                malloc_alloc::deallocate(__p, __n);
                return;
            }
            __my_free_list = _S_free_list + _S_freelist_index(__n);
    
            __q -> _M_free_list_link = *__my_free_list;
            *__my_free_list = __q;
            // lock is released here
          }
    
      ...
    };
    
    

    _S_freelist_index(__n) 这个函数可以计算出需要的空间会落在 _S_free_list 这个数组的哪个index上,再从中取出一块空余内存分配。而如果没有空余内存,则会调用 _S_refill(_S_round_up(__n)) 该函数重新分配一段内存,源代码如下:

    template <bool __threads, int __inst>
    void*
    __default_alloc_template<__threads, __inst>::_S_refill(size_t __n)
    {
        int __nobjs = 20;
        char* __chunk = _S_chunk_alloc(__n, __nobjs);
        _Obj* __VOLATILE* __my_free_list;
        _Obj* __result;
        _Obj* __current_obj;
        _Obj* __next_obj;
        int __i;
    
        if (1 == __nobjs) return(__chunk);
        __my_free_list = _S_free_list + _S_freelist_index(__n);
    
        /* Build free list in chunk */
          __result = (_Obj*)__chunk;
          *__my_free_list = __next_obj = (_Obj*)(__chunk + __n);
          for (__i = 1; ; __i++) {
            __current_obj = __next_obj;
            __next_obj = (_Obj*)((char*)__next_obj + __n);
            if (__nobjs - 1 == __i) {
                __current_obj -> _M_free_list_link = 0;
                break;
            } else {
                __current_obj -> _M_free_list_link = __next_obj;
            }
          }
        return(__result);
    }
    

    其中,真正申请空间的函数就是 _S_chunk_alloc(__n, __nobjs) ,其中 __nobjs = 20,阅读下面的代码可以看到如果重新分配一块内存,会申请一块20倍大小再多一点的内存:

    size_t __total_bytes = __size * __nobjs;
    size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size >> 4);
    

    然后再从该空间中分出一块作为当前分配,具体源码如下:

    template <bool __threads, int __inst>
    char*
    __default_alloc_template<__threads, __inst>::_S_chunk_alloc(size_t __size,
                                                                int& __nobjs)
    {
        char* __result;
        size_t __total_bytes = __size * __nobjs;
        size_t __bytes_left = _S_end_free - _S_start_free;
    
        if (__bytes_left >= __total_bytes) {
            __result = _S_start_free;
            _S_start_free += __total_bytes;
            return(__result);
        } else if (__bytes_left >= __size) {
            __nobjs = (int)(__bytes_left/__size);
            __total_bytes = __size * __nobjs;
            __result = _S_start_free;
            _S_start_free += __total_bytes;
            return(__result);
        } else {
            size_t __bytes_to_get =
    	  2 * __total_bytes + _S_round_up(_S_heap_size >> 4);
            // Try to make use of the left-over piece.
            if (__bytes_left > 0) {
                _Obj* __VOLATILE* __my_free_list =
                            _S_free_list + _S_freelist_index(__bytes_left);
    
                ((_Obj*)_S_start_free) -> _M_free_list_link = *__my_free_list;
                *__my_free_list = (_Obj*)_S_start_free;
            }
            _S_start_free = (char*)malloc(__bytes_to_get);
            if (0 == _S_start_free) {
                size_t __i;
                _Obj* __VOLATILE* __my_free_list;
    	    _Obj* __p;
                // Try to make do with what we have.  That can't
                // hurt.  We do not try smaller requests, since that tends
                // to result in disaster on multi-process machines.
                for (__i = __size; __i <= _MAX_BYTES; __i += _ALIGN) {
                    __my_free_list = _S_free_list + _S_freelist_index(__i);
                    __p = *__my_free_list;
                    if (0 != __p) {
                        *__my_free_list = __p -> _M_free_list_link;
                        _S_start_free = (char*)__p;
                        _S_end_free = _S_start_free + __i;
                        return(_S_chunk_alloc(__size, __nobjs));
                        // Any leftover piece will eventually make it to the
                        // right free list.
                    }
                }
    	    _S_end_free = 0;	// In case of exception.
                _S_start_free = (char*)malloc_alloc::allocate(__bytes_to_get);
                // This should either throw an
                // exception or remedy the situation.  Thus we assume it
                // succeeded.
            }
            _S_heap_size += __bytes_to_get;
            _S_end_free = _S_start_free + __bytes_to_get;
            return(_S_chunk_alloc(__size, __nobjs));
        }
    }
    
  • 相关阅读:
    超级小白使用pip安装第三方库的正确姿势
    selenium+python自动化测试--解决无法启动IE浏览器及报错问题
    microsoft edge浏览器安装驱动
    超详细MySQL安装及基本使用教程
    Navicat15最新版本破解 亲测可用!!!
    Ubuntu 16.04安装JMeter测试工具
    JMeter_Ubuntu上安装jmeter
    韩国vps推荐-kdatacenter
    全栈之js入门篇
    Web前端之CSS_day5
  • 原文地址:https://www.cnblogs.com/zhqherm/p/12156450.html
Copyright © 2011-2022 走看看