zoukankan      html  css  js  c++  java
  • std::unordered_map

    std::unordered_map

      版本XcodeDefault.xctoolchain/usr/include/c++/v1

    1:unorderd_map typedef

       例子:typedef std::unordered_map<std::string, int> 

        模板参数:

     1 template <class _Key, class _Tp, class _Hash = hash<_Key>, class _Pred = equal_to<_Key>,
     2           class _Alloc = allocator<pair<const _Key, _Tp> > >
     3 class _LIBCPP_TEMPLATE_VIS unordered_map
     4 {
     5 public:
     6     // types
     7     typedef _Key                                           key_type;
     8     typedef _Tp                                            mapped_type;
     9     typedef _Hash                                          hasher;
    10     typedef _Pred                                          key_equal;
    11     typedef _Alloc                                         allocator_type;
    12     typedef pair<const key_type, mapped_type>              value_type;
    13     typedef value_type&                                    reference;
    14     typedef const value_type&                              const_reference;
    15     static_assert((is_same<value_type, typename allocator_type::value_type>::value),
    16                   "Invalid allocator::value_type");
    17 
    18 private:
    19     typedef __hash_value_type<key_type, mapped_type>                 __value_type;
    20     typedef __unordered_map_hasher<key_type, __value_type, hasher>   __hasher;
    21     typedef __unordered_map_equal<key_type, __value_type, key_equal> __key_equal;
    22     typedef typename __rebind_alloc_helper<allocator_traits<allocator_type>,
    23                                                  __value_type>::type __allocator_type;
    24 
    25     typedef __hash_table<__value_type, __hasher,
    26                          __key_equal,  __allocator_type>   __table;
    27 
    28     __table __table_;
    29 
    30     ......
    31       
    32 }
    • key_type -> _Key -> std::string
    • mapped_type -> _Tp -> int
    • hasher - > _Hash = hash<_Key>   -> hash<std::string> 
    • key_equal -> _Pred = equal_to<_Key>  -> equal_to<std::string>
    • _Alloc = allocator<pair<const _Key, _Tp> > > -> allocator<pair<const std::string, int> > 

      unorderd_map内部持有__hash_table对象,std::unordered_map<std::string, int>特化模板的_hash_table类型应该是

      __hash_table<

        pair<const std::string, int>,

        hash<std::string>,

        equal_to<std::string>,

        allocator<pair<const std::string, int> >

      >

     1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
     2 class __hash_table
     3 {
     4 public:
     5     typedef _Tp    value_type;
     6     typedef _Hash  hasher;
     7     typedef _Equal key_equal;
     8     typedef _Alloc allocator_type;
     9 
    10 private:
    11     typedef unique_ptr<__next_pointer[], __bucket_list_deleter> __bucket_list;
    12     // --- Member data begin ---
    13     __bucket_list                                         __bucket_list_;
    14     __compressed_pair<__first_node, __node_allocator>     __p1_;
    15     __compressed_pair<size_type, hasher>                  __p2_;
    16     __compressed_pair<float, key_equal>                   __p3_;
    17     // --- Member data end ---
    18     
    19    ......
    20   
    21 }

      __hash_table内部持有4个局部变量,

    • __bucket_list_,__next_pointer数组,储存插入节点node,内部含有多个bucket(node节点的集合),以node节点的形式链式组织
    • __p1_,head node  --  node分配器;
    • __p2_,node总数量 -- hash key size_t计算器;每成功插入一个node,node总数量+1
    • __p3_,负载因子 -- 数据比较器;负载因子调整bucket的数量(rehash方法),数据比较器用于比较参数和bucket node中_Key是否相同(因为是bucket是链式储存,在hash key sizt_t到bucket index之后,会从bucket的头node开始,逐一比较node是否和参数相同)

      模板推导出类型后,就可以得知unorder map的几个关键要点

    • __p2_->second, hash<std::string>,提供string到hash key sizt_t的计算
    • __bucket_list_,unorderd_map的存储区
    • __p3_->first, 负载因子, rebase,决定bucket数量
    • hash key sizt_t -> bucket index, __constrain_hash
    • __p3_ -> second, equal_to<std::string>,数据的比较器

    2: 散列计算器,string -> hash

      hash<std::string>, 在std::string实现。提供operator()操作符,作为计算hash数值的入口方法

     1 template <class _CharT, class _Allocator>
     2 struct _LIBCPP_TEMPLATE_VIS
     3     hash<basic_string<_CharT, char_traits<_CharT>, _Allocator> >
     4     : public unary_function<
     5           basic_string<_CharT, char_traits<_CharT>, _Allocator>, size_t>
     6 {
     7     size_t
     8     operator()(const basic_string<_CharT, char_traits<_CharT>, _Allocator>& __val) const _NOEXCEPT
     9     { return __do_string_hash(__val.data(), __val.data() + __val.size()); }
    10 };

      hash<std::string>::operator()   调用 __do_string_hash

      __do_string_hash 调用 __murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len)

      __murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len) 按照字符串长度__len,分成若干计算

     1 template <class _Size>
     2 _Size
     3 __murmur2_or_cityhash<_Size, 64>::operator()(const void* __key, _Size __len)
     4 {
     5   const char* __s = static_cast<const char*>(__key);
     6   if (__len <= 32) {
     7     if (__len <= 16) {
     8       return __hash_len_0_to_16(__s, __len);
     9     } else {
    10       return __hash_len_17_to_32(__s, __len);
    11     }
    12   } else if (__len <= 64) {
    13     return __hash_len_33_to_64(__s, __len);
    14   }
    15 
    16   // For strings over 64 bytes we hash the end first, and then as we
    17   // loop we keep 56 bytes of state: v, w, x, y, and z.
    18   _Size __x = __loadword<_Size>(__s + __len - 40);
    19   _Size __y = __loadword<_Size>(__s + __len - 16) +
    20               __loadword<_Size>(__s + __len - 56);
    21   _Size __z = __hash_len_16(__loadword<_Size>(__s + __len - 48) + __len,
    22                           __loadword<_Size>(__s + __len - 24));
    23   pair<_Size, _Size> __v = __weak_hash_len_32_with_seeds(__s + __len - 64, __len, __z);
    24   pair<_Size, _Size> __w = __weak_hash_len_32_with_seeds(__s + __len - 32, __y + __k1, __x);
    25   __x = __x * __k1 + __loadword<_Size>(__s);
    26 
    27   // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
    28   __len = (__len - 1) & ~static_cast<_Size>(63);
    29   do {
    30     __x = __rotate(__x + __y + __v.first + __loadword<_Size>(__s + 8), 37) * __k1;
    31     __y = __rotate(__y + __v.second + __loadword<_Size>(__s + 48), 42) * __k1;
    32     __x ^= __w.second;
    33     __y += __v.first + __loadword<_Size>(__s + 40);
    34     __z = __rotate(__z + __w.first, 33) * __k1;
    35     __v = __weak_hash_len_32_with_seeds(__s, __v.second * __k1, __x + __w.first);
    36     __w = __weak_hash_len_32_with_seeds(__s + 32, __z + __w.second,
    37                                         __y + __loadword<_Size>(__s + 16));
    38     std::swap(__z, __x);
    39     __s += 64;
    40     __len -= 64;
    41   } while (__len != 0);
    42   return __hash_len_16(
    43       __hash_len_16(__v.first, __w.first) + __shift_mix(__y) * __k1 + __z,
    44       __hash_len_16(__v.second, __w.second) + __x);
    45 }

      举例,__hash_len_0_to_16

     1 static _Size __hash_len_0_to_16(const char* __s, _Size __len)
     2      _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
     3   {
     4     if (__len > 8) {
     5       const _Size __a = __loadword<_Size>(__s);
     6       const _Size __b = __loadword<_Size>(__s + __len - 8);
     7       return __hash_len_16(__a, __rotate_by_at_least_1(__b + __len, __len)) ^ __b;
     8     }
     9     if (__len >= 4) {
    10       const uint32_t __a = __loadword<uint32_t>(__s);
    11       const uint32_t __b = __loadword<uint32_t>(__s + __len - 4);
    12       return __hash_len_16(__len + (__a << 3), __b);
    13     }
    14     if (__len > 0) {
    15       const unsigned char __a = __s[0];
    16       const unsigned char __b = __s[__len >> 1];
    17       const unsigned char __c = __s[__len - 1];
    18       const uint32_t __y = static_cast<uint32_t>(__a) +
    19                            (static_cast<uint32_t>(__b) << 8);
    20       const uint32_t __z = __len + (static_cast<uint32_t>(__c) << 2);
    21       return __shift_mix(__y * __k2 ^ __z * __k3) * __k2;
    22     }
    23     return __k2;
    24   }

       同理,其余类型type均实现hash<type>::operator()方法

     

    3:构造bucket

      __p3_->first, 负载因子, rehash,决定bucket数量

     1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
     2 _LIBCPP_INLINE_VISIBILITY
     3 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
     4 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare(
     5     size_t __hash, value_type& __value)
     6 {
     7     size_type __bc = bucket_count();
     8 
     9     if (__bc != 0)
    10     {
    11         size_t __chash = __constrain_hash(__hash, __bc);
    12         __next_pointer __ndptr = __bucket_list_[__chash];
    13         if (__ndptr != nullptr)
    14         {
    15             for (__ndptr = __ndptr->__next_; __ndptr != nullptr &&
    16                                              __constrain_hash(__ndptr->__hash(), __bc) == __chash;
    17                                                      __ndptr = __ndptr->__next_)
    18             {
    19                 if (key_eq()(__ndptr->__upcast()->__value_, __value))
    20                     return __ndptr;
    21             }
    22         }
    23     }
    24     if (size()+1 > __bc * max_load_factor() || __bc == 0)
    25     {
    26         rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
    27                                      size_type(ceil(float(size() + 1) / max_load_factor()))));
    28     }
    29     return nullptr;
    30 }

      插入node时,如果满足公式

      size()+1 > __bc * max_load_factor() || __bc == 0,则调用rehash方法,构造

      _VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc), size_type(ceil(float(size() + 1) / max_load_factor())))

      hash_table默认构造函数,提供的负载因子是1,rehash传入的参数为1  

     1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
     2 void
     3 __hash_table<_Tp, _Hash, _Equal, _Alloc>::rehash(size_type __n)
     4 _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
     5 {
     6     if (__n == 1)
     7         __n = 2;
     8     else if (__n & (__n - 1))
     9         __n = __next_prime(__n);
    10     size_type __bc = bucket_count();
    11     if (__n > __bc)
    12         __rehash(__n);
    13     else if (__n < __bc)
    14     {
    15         __n = _VSTD::max<size_type>
    16               (
    17                   __n,
    18                   __is_hash_power2(__bc) ? __next_hash_pow2(size_t(ceil(float(size()) / max_load_factor()))) :
    19                                            __next_prime(size_t(ceil(float(size()) / max_load_factor())))
    20               );
    21         if (__n < __bc)
    22             __rehash(__n);
    23     }
    24 }

      rehash内部接收到__n == 1,调整__n = 2。然后调用__rehash方法创建2个bucket

     1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
     2 void
     3 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc)
     4 {
     5 #if _LIBCPP_DEBUG_LEVEL >= 2
     6     __get_db()->__invalidate_all(this);
     7 #endif  // _LIBCPP_DEBUG_LEVEL >= 2
     8     __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc();
     9     __bucket_list_.reset(__nbc > 0 ?
    10                       __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr);
    11     __bucket_list_.get_deleter().size() = __nbc;
    12     if (__nbc > 0)
    13     {
    14         for (size_type __i = 0; __i < __nbc; ++__i)
    15             __bucket_list_[__i] = nullptr;
    16         __next_pointer __pp = __p1_.first().__ptr();
    17         __next_pointer __cp = __pp->__next_;
    18         if (__cp != nullptr)
    19         {
    20             size_type __chash = __constrain_hash(__cp->__hash(), __nbc);
    21             __bucket_list_[__chash] = __pp;
    22             size_type __phash = __chash;
    23             for (__pp = __cp, __cp = __cp->__next_; __cp != nullptr;
    24                                                            __cp = __pp->__next_)
    25             {
    26                 __chash = __constrain_hash(__cp->__hash(), __nbc);
    27                 if (__chash == __phash)
    28                     __pp = __cp;
    29                 else
    30                 {
    31                     if (__bucket_list_[__chash] == nullptr)
    32                     {
    33                         __bucket_list_[__chash] = __pp;
    34                         __pp = __cp;
    35                         __phash = __chash;
    36                     }
    37                     else
    38                     {
    39                         __next_pointer __np = __cp;
    40                         for (; __np->__next_ != nullptr &&
    41                                key_eq()(__cp->__upcast()->__value_,
    42                                         __np->__next_->__upcast()->__value_);
    43                                                            __np = __np->__next_)
    44                             ;
    45                         __pp->__next_ = __np->__next_;
    46                         __np->__next_ = __bucket_list_[__chash]->__next_;
    47                         __bucket_list_[__chash]->__next_ = __cp;
    48 
    49                     }
    50                 }
    51             }
    52         }
    53     }
    54 }

    4:插入操作(碰撞冲突,二次探测)

     hash key size_t定位到bucket index的计算方法  

    1 inline _LIBCPP_INLINE_VISIBILITY
    2 size_t
    3 __constrain_hash(size_t __h, size_t __bc)
    4 {
    5     return !(__bc & (__bc - 1)) ? __h & (__bc - 1) :
    6         (__h < __bc ? __h : __h % __bc);
    7 }

      第一个参数为hash值,第二个参数为bucket数量

      !(__bc & (__bc - 1))  ->  满足表达式为true,则__bc为2的N次方

      __h & (__bc - 1) -> __bc - 1,形如0x111,直接取__h作为存储地址

      (__h < __bc ? __h : __h % __bc) -> 如果__h < __bc,则直接获取__h 作为存储地址;否则,取模运算为存储地址,__h % __bc(除留余数法)

      插入bucket之前,先探测bucket内是否有相同散列地址。

      bucket内部采用链表存储node,从头节点开始,顺序比较hash值,如果hash值相同,再使用key_eq比较具体数值

     1 // Prepare the container for an insertion of the value __value with the hash
     2 // __hash. This does a lookup into the container to see if __value is already
     3 // present, and performs a rehash if necessary. Returns a pointer to the
     4 // existing element if it exists, otherwise nullptr.
     5 //
     6 // Note that this function does forward exceptions if key_eq() throws, and never
     7 // mutates __value or actually inserts into the map.
     8 template <class _Tp, class _Hash, class _Equal, class _Alloc>
     9 _LIBCPP_INLINE_VISIBILITY
    10 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__next_pointer
    11 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare(
    12     size_t __hash, value_type& __value)
    13 {
    14     size_type __bc = bucket_count();
    15 
    16     if (__bc != 0)
    17     {
    18         size_t __chash = __constrain_hash(__hash, __bc);
    19         __next_pointer __ndptr = __bucket_list_[__chash];
    20         if (__ndptr != nullptr)
    21         {
    22             for (__ndptr = __ndptr->__next_; __ndptr != nullptr &&
    23                                              __constrain_hash(__ndptr->__hash(), __bc) == __chash;
    24                                                      __ndptr = __ndptr->__next_)
    25             {
    26                 if (key_eq()(__ndptr->__upcast()->__value_, __value))
    27                     return __ndptr;
    28             }
    29         }
    30     }
    31     if (size()+1 > __bc * max_load_factor() || __bc == 0)
    32     {
    33         rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
    34                                      size_type(ceil(float(size() + 1) / max_load_factor()))));
    35     }
    36     return nullptr;
    37 }

       如果未发现相同hash值,则插入节点

     1 // Insert the node __nd into the container by pushing it into the right bucket,
     2 // and updating size(). Assumes that __nd->__hash is up-to-date, and that
     3 // rehashing has already occurred and that no element with the same key exists
     4 // in the map.
     5 template <class _Tp, class _Hash, class _Equal, class _Alloc>
     6 _LIBCPP_INLINE_VISIBILITY
     7 void
     8 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_perform(
     9     __node_pointer __nd) _NOEXCEPT
    10 {
    11     size_type __bc = bucket_count();
    12     size_t __chash = __constrain_hash(__nd->__hash(), __bc);
    13     // insert_after __bucket_list_[__chash], or __first_node if bucket is null
    14     __next_pointer __pn = __bucket_list_[__chash];
    15     if (__pn == nullptr)
    16     {
    17         __pn =__p1_.first().__ptr();
    18         __nd->__next_ = __pn->__next_;
    19         __pn->__next_ = __nd->__ptr();
    20         // fix up __bucket_list_
    21         __bucket_list_[__chash] = __pn;
    22         if (__nd->__next_ != nullptr)
    23             __bucket_list_[__constrain_hash(__nd->__next_->__hash(), __bc)] = __nd->__ptr();
    24     }
    25     else
    26     {
    27         __nd->__next_ = __pn->__next_;
    28         __pn->__next_ = __nd->__ptr();
    29     }
    30     ++size();
    31 }

      将新建节点插入bucket头部

      __nd->__next_ = __pn->__next_;

      __pn->__next_ = __nd->__ptr();   

    5:查找操作

      __p3_ -> second, equal_to<std::string>,数据的比较器 

     1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
     2 template <class _Key>
     3 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
     4 __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k)
     5 {
     6     size_t __hash = hash_function()(__k);
     7     size_type __bc = bucket_count();
     8     if (__bc != 0)
     9     {
    10         size_t __chash = __constrain_hash(__hash, __bc);
    11         __next_pointer __nd = __bucket_list_[__chash];
    12         if (__nd != nullptr)
    13         {
    14             for (__nd = __nd->__next_; __nd != nullptr &&
    15                 (__nd->__hash() == __hash
    16                   || __constrain_hash(__nd->__hash(), __bc) == __chash);
    17                                                            __nd = __nd->__next_)
    18             {
    19                 if ((__nd->__hash() == __hash)
    20                     && key_eq()(__nd->__upcast()->__value_, __k))
    21 #if _LIBCPP_DEBUG_LEVEL >= 2
    22                     return iterator(__nd, this);
    23 #else
    24                     return iterator(__nd);
    25 #endif
    26             }
    27         }
    28     }
    29     return end();
    30 }

      查找方法:

    • 生成入参hash key size_t : size_t __hash= hash_function()(__k); 
    • 获取bucket数量:size_type __bc = bucket_count();
    • 生成bucket index:size_t __chash = __constrain_hash(__hash, __bc);
    • 获取bucket头节点指针:__next_pointer __nd = __bucket_list_[__chash];
    • 循环比较node hash key size_t 和 入参hash key size_t
    • 比较入参 和 node key:key_eq()(__nd->__upcast()->__value_, __k)
    • 返回结果
  • 相关阅读:
    docker 单kafka ,多分区
    spring data jpa + mysql使用json 类型
    C++ Multithread Tutorial
    GDB 调试程序
    C++ Project 积累(四)
    GDB 调试 C/C++ Project
    makefile 学习(一)
    Ubuntu 下配置 boost + eclipse
    C++ Project 积累(3)
    Leetcode Sudoku Solver
  • 原文地址:https://www.cnblogs.com/hgwang/p/13493045.html
Copyright © 2011-2022 走看看