STL源码分析之hash表(gnu-c++ 2.9)

1、基本概念
关于hash表的概念这里就不再多说，hash表的变化一般都在哈希函数和退避方法上。STL采用的是开链法，即每个hash桶里面维持一个链表，hash函数计算出位置后，就将节点插入该位置的链表上，因此，底层实现为hash表的容器，迭代器的实现都类似于deque，不仅要维持在链表上移动还得实现在到达一条链表的末端时候，下次移动可以进入到下一个hash桶里面的链表。
2、 STL哈希表结构
开链法：

template<class _Val>
struct _Hashtable_node
{
  _Hashtable_node* _M_next;
  _Val _M_val;
};

哈希表定义时要指定数组大小n，不过实际分配的数组长度是一个根据n计算而来的质数(下段)。

void _M_initialize_buckets(size_type __n)
  {
    const size_type __n_buckets = _M_next_size(__n);
    _M_buckets.reserve(__n_buckets);
    _M_buckets.insert(_M_buckets.end(), __n_buckets, (_Node*) 0);
    _M_num_elements = 0;
  }
 inline unsigned long
  __stl_next_prime(unsigned long __n)
  {
    const unsigned long* __first = _Hashtable_prime_list<unsigned long>::_S_get_prime_list();
    const unsigned long* __last = __first + (int)_S_num_primes;
    const unsigned long* pos = std::lower_bound(__first, __last, __n);
    return pos == __last ? *(__last - 1) : *pos;
  }

从 prime_list中找到第一个大于n的数，list是已经计算好的静态数组，包含了29个质数（之后版本有所改变）.

template<typename _PrimeType> const _PrimeType
  _Hashtable_prime_list<_PrimeType>::__stl_prime_list[_S_num_primes] =
{
  5ul,          53ul,         97ul,         193ul,       389ul,
  769ul,        1543ul,       3079ul,       6151ul,      12289ul,
  24593ul,      49157ul,      98317ul,      196613ul,    393241ul,
  786433ul,     1572869ul,    3145739ul,    6291469ul,   12582917ul,
  25165843ul,   50331653ul,   100663319ul,  201326611ul, 402653189ul,
  805306457ul,  1610612741ul, 3221225473ul, 4294967291ul
};

__stl_prime_list数组中，后一个数总是大约等于前一个数的两倍，当插入数据时，如果所有元素个数大于哈希表数组长度，为了使哈希表的负载因子（元素个数和hash桶之比）永远小于1，就必须调用resize重新分配，增长速度跟vector差不多，每次分配数组长度差不多翻倍

template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
void
hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
resize(size_type __num_elements_hint)
{
  const size_type __old_n = _M_buckets.size();
  if (__num_elements_hint > __old_n)
    {
      const size_type __n = _M_next_size(__num_elements_hint);
      if (__n > __old_n)
        {
          _Vector_type __tmp(__n, (_Node*)(0), _M_buckets.get_allocator());
          __try
            {
              for (size_type __bucket = 0; __bucket < __old_n; ++__bucket)
                {
                  _Node* __first = _M_buckets[__bucket];
                  while (__first)
                    {
                      size_type __new_bucket = _M_bkt_num(__first->_M_val,
                                                          __n);
                      _M_buckets[__bucket] = __first->_M_next;
                      __first->_M_next = __tmp[__new_bucket];
                      __tmp[__new_bucket] = __first;
                      __first = _M_buckets[__bucket];
                    }
                }
              _M_buckets.swap(__tmp);
            }
          __catch(...)
            {
              for (size_type __bucket = 0; __bucket < __tmp.size();
                ++__bucket)
                {
                  while (__tmp[__bucket])
                    {
                      _Node* __next = __tmp[__bucket]->_M_next;
                      _M_delete_node(__tmp[__bucket]);
                      __tmp[__bucket] = __next;
                    }
                }
              __throw_exception_again;
            }
        }
    }
}

每次新插入的元素都放在链表的第一个节点前面。

template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
pair<typename hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::iterator, bool>
hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
insert_unique_noresize(const value_type& __obj)
{
  const size_type __n = _M_bkt_num(__obj);
  _Node* __first = _M_buckets[__n];

  for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
    if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
      return pair<iterator, bool>(iterator(__cur, this), false);

  _Node* __tmp = _M_new_node(__obj);
  __tmp->_M_next = __first;
  _M_buckets[__n] = __tmp;
  ++_M_num_elements;
  return pair<iterator, bool>(iterator(__tmp, this), true);
}

3、哈希函数

size_type
  _M_bkt_num_key(const key_type& __key, size_t __n) const
  { return _M_hash(__key) % __n; }

（偏特化）

inline size_t
  __stl_hash_string(const char* __s)
  {
    unsigned long __h = 0;
    for ( ; *__s; ++__s)
      __h = 5 * __h + *__s;
    return size_t(__h);
  }



template<>
struct hash<char*>
{
  size_t
  operator()(const char* __s) const
  { return __stl_hash_string(__s); }
};



template<>
struct hash<const char*>
{
  size_t
  operator()(const char* __s) const
  { return __stl_hash_string(__s); }
};



template<>
struct hash<char>
{ 
  size_t
  operator()(char __x) const
  { return __x; }
};


template<>
struct hash<int>
{ 
  size_t 
  operator()(int __x) const 
  { return __x; }
};


template<>
struct hash<unsigned int>
{ 
  size_t
  operator()(unsigned int __x) const
  { return __x; }
};


template<>
struct hash<long>
{
  size_t
  operator()(long __x) const
  { return __x; }
};

STL源码分析之hash表(gnu-c++ 2.9)

猜你喜欢