1、基本概念
关于hash表的概念这里就不再多说,hash表的变化一般都在哈希函数和退避方法上。STL采用的是开链法,即每个hash桶里面维持一个链表,hash函数计算出位置后,就将节点插入该位置的链表上,因此,底层实现为hash表的容器,迭代器的实现都类似于deque,不仅要维持在链表上移动还得实现在到达一条链表的末端时候,下次移动可以进入到下一个hash桶里面的链表。
2、 STL哈希表结构
开链法:
template<class _Val>
struct _Hashtable_node
{
_Hashtable_node* _M_next;
_Val _M_val;
};
哈希表定义时要指定数组大小n,不过实际分配的数组长度是一个根据n计算而来的质数(下段)。
void _M_initialize_buckets(size_type __n)
{
const size_type __n_buckets = _M_next_size(__n);
_M_buckets.reserve(__n_buckets);
_M_buckets.insert(_M_buckets.end(), __n_buckets, (_Node*) 0);
_M_num_elements = 0;
}
inline unsigned long
__stl_next_prime(unsigned long __n)
{
const unsigned long* __first = _Hashtable_prime_list<unsigned long>::_S_get_prime_list();
const unsigned long* __last = __first + (int)_S_num_primes;
const unsigned long* pos = std::lower_bound(__first, __last, __n);
return pos == __last ? *(__last - 1) : *pos;
}
从 prime_list中找到第一个大于n的数,list是已经计算好的静态数组,包含了29个质数(之后版本有所改变).
template<typename _PrimeType> const _PrimeType
_Hashtable_prime_list<_PrimeType>::__stl_prime_list[_S_num_primes] =
{
5ul, 53ul, 97ul, 193ul, 389ul,
769ul, 1543ul, 3079ul, 6151ul, 12289ul,
24593ul, 49157ul, 98317ul, 196613ul, 393241ul,
786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul,
25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul,
805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
};
__stl_prime_list数组中,后一个数总是大约等于前一个数的两倍,当插入数据时,如果所有元素个数大于哈希表数组长度,为了使哈希表的负载因子(元素个数和hash桶之比)永远小于1,就必须调用resize重新分配,增长速度跟vector差不多,每次分配数组长度差不多翻倍
template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
void
hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
resize(size_type __num_elements_hint)
{
const size_type __old_n = _M_buckets.size();
if (__num_elements_hint > __old_n)
{
const size_type __n = _M_next_size(__num_elements_hint);
if (__n > __old_n)
{
_Vector_type __tmp(__n, (_Node*)(0), _M_buckets.get_allocator());
__try
{
for (size_type __bucket = 0; __bucket < __old_n; ++__bucket)
{
_Node* __first = _M_buckets[__bucket];
while (__first)
{
size_type __new_bucket = _M_bkt_num(__first->_M_val,
__n);
_M_buckets[__bucket] = __first->_M_next;
__first->_M_next = __tmp[__new_bucket];
__tmp[__new_bucket] = __first;
__first = _M_buckets[__bucket];
}
}
_M_buckets.swap(__tmp);
}
__catch(...)
{
for (size_type __bucket = 0; __bucket < __tmp.size();
++__bucket)
{
while (__tmp[__bucket])
{
_Node* __next = __tmp[__bucket]->_M_next;
_M_delete_node(__tmp[__bucket]);
__tmp[__bucket] = __next;
}
}
__throw_exception_again;
}
}
}
}
每次新插入的元素都放在链表的第一个节点前面。
template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
pair<typename hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::iterator, bool>
hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
insert_unique_noresize(const value_type& __obj)
{
const size_type __n = _M_bkt_num(__obj);
_Node* __first = _M_buckets[__n];
for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
return pair<iterator, bool>(iterator(__cur, this), false);
_Node* __tmp = _M_new_node(__obj);
__tmp->_M_next = __first;
_M_buckets[__n] = __tmp;
++_M_num_elements;
return pair<iterator, bool>(iterator(__tmp, this), true);
}
3、 哈希函数
size_type
_M_bkt_num_key(const key_type& __key, size_t __n) const
{ return _M_hash(__key) % __n; }
(偏特化)
inline size_t
__stl_hash_string(const char* __s)
{
unsigned long __h = 0;
for ( ; *__s; ++__s)
__h = 5 * __h + *__s;
return size_t(__h);
}
template<>
struct hash<char*>
{
size_t
operator()(const char* __s) const
{ return __stl_hash_string(__s); }
};
template<>
struct hash<const char*>
{
size_t
operator()(const char* __s) const
{ return __stl_hash_string(__s); }
};
template<>
struct hash<char>
{
size_t
operator()(char __x) const
{ return __x; }
};
template<>
struct hash<int>
{
size_t
operator()(int __x) const
{ return __x; }
};
template<>
struct hash<unsigned int>
{
size_t
operator()(unsigned int __x) const
{ return __x; }
};
template<>
struct hash<long>
{
size_t
operator()(long __x) const
{ return __x; }
};