在上一篇博客中,我们简单地介绍了哈希表,以及解决哈希冲突的办法;今天我们介绍解决哈希冲突的另一种方法——开散列法。开散列法又叫链地址法,首先对关键码集合用散列函数计算散列地址,具有相同地址的关键码归于同一子集合,每一个子集合称为一个桶,各个桶中的元素通过一个单链表链接起来,各链表的头结点组成一个向量。说起来可能很复杂,其实很简单,看一个图就明白了:假设哈希函数为Hash(key)=key%10
- 假如哈希函数被破解了,可能会对哈希桶进行攻击,从而将哈希桶退化成单链表,此时查找效率将会大大降低,我们可以把单链表换成红黑树,提高查找效率。
- 当哈希桶的个数与插入的结点个数相等时,我们就增加容量。
以下是我实现的哈希桶:
// 获得一个适合的素数
const int _PrimeSize = 28;//素数表
static const unsigned long _PrimeList[_PrimeSize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
size_t GetNextPrime(size_t num)
{
for (size_t i = 0; i < _PrimeSize; i++)
{
if (_PrimeList[i]>num)
return _PrimeList[i];
}
return _PrimeList[_PrimeSize - 1];
}
template <class K>
class KeyToIntDef
{
public:
size_t operator()(const K& key)
{
return key;
}
};
// 将字符串转换成整型
static size_t BKDRHash(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
size_t ret = (hash & 0x7FFFFFFF);
return ret;
}
class StringToInt
{
public:
size_t operator()(const string& key)
{
return BKDRHash(key.c_str());
}
};
#include <iostream>
#include <vector>
#include <string>
using namespace std;
template <class K, class V>
struct HashNode//结点
{
pair<K, V> _kv;
HashNode<K, V>* _pNext;
HashNode(const pair<K, V>& kv)
: _kv(kv)
, _pNext(NULL)
{}
};
template <class K, class V, class KeyToInt>
class HashTable;//前置声明
//迭代器
template <class K, class V, class KeyToInt = KeyToIntDef<K>>
struct HashTableIterator
{
typedef HashNode<K, V> Node;
typedef Node* PNode;
typedef HashTableIterator<K, V, KeyToInt> Self;
PNode _pCur;
HashTable<K, V, KeyToInt>* _ht;
public:
HashTableIterator()
: _pCur(NULL)
, _ht(NULL)
{}
HashTableIterator(const PNode pCur, HashTable<K, V, KeyToInt>* ht)
: _pCur(pCur)
, _ht(ht)
{}
HashTableIterator(Self& s)
: _pCur(s._pCur)
, _ht(s._ht)
{}
Self& operator++()
{
Next();
return *this;
}
Self operator++(int)
{
HashTableIterator temp(*this);
Next();
return temp;
}
pair<K, V>& operator*()
{
return _pCur->_kv;
}
pair<K, V>* operator->()
{
return &(operator*());
}
bool operator==(const Self& s)
{
return _pCur == s._pCur;
}
bool operator!=(const Self& s)
{
return _pCur != s._pCur;
}
private:
void Next()
{
if (_pCur->_pNext)
_pCur = _pCur->_pNext;
else
{//找下一个非空桶
size_t bucketNo = _ht->HashFunc(_pCur->_kv.first) + 1;
for (; bucketNo < _ht->_hashTable.capacity(); bucketNo++)
{
if (_ht->_hashTable[bucketNo])
{
_pCur = _ht->_hashTable[bucketNo];
return;
}
}
_pCur = NULL;//没有找到
}
return;
}
};
//哈希桶
template <class K, class V, class KeyToInt = KeyToIntDef<K>>
class HashTable
{
typedef HashNode<K, V> Node;
typedef Node* PNode;
friend HashTableIterator<K, V, KeyToInt>;
public:
typedef HashTableIterator<K, V, KeyToInt> Iterator;
public:
HashTable(size_t capacity = 10)
{
capacity = GetNextPrimer(capacity);
_hashTable.resize(capacity);
_size = 0;
}
Iterator Begin()
{
for (size_t bucketNo = 0; bucketNo < _hashTable.capacity(); bucketNo++)
{
if (_hashTable[bucketNo])
return Iterator(_hashTable[bucketNo], this);
}
return Iterator(NULL, this);
}
Iterator End()
{
return Iterator(NULL, this);
}
//////////////////////////插入,查找及删除/////////////////////////////////
pair<Iterator, bool> InsertEqual(const pair<K, V>& kv)//允许重复
{
CheckCapacity();
size_t bucketNo = HashFunc(kv.first);
PNode pNewNode = new Node(kv);
pNewNode->_pNext = _hashTable[bucketNo];//头插
_hashTable[bucketNo] = pNewNode;
_size++;
return make_pair(Iterator(pNewNode, this), true);
}
pair<Iterator, bool> InsertUnique(const pair<K, V>& kv)//key值唯一
{
CheckCapacity();
size_t bucketNo = HashFunc(kv.first);
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
if (kv.first == pCur->_kv.first)
return make_pair(Iterator(pCur, this), false);
pCur = pCur->_pNext;
}
pCur = new Node(kv);
pCur->_pNext = _hashTable[bucketNo];
_hashTable[bucketNo] = pCur;
_size++;
return make_pair(Iterator(pCur, this), true);
}
Iterator Find(const K& key)
{
size_t bucketNo = HashFunc(key);
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
if (pCur->_kv.first == key)
return Iterator(pCur, this);
pCur = pCur->_pNext;
}
return Iterator(NULL, this);
}
Iterator Erase(Iterator pos)//key值唯一
{
if (pos._pCur == NULL)
return Iterator(NULL, this);
size_t key = pos._pCur->_kv.first;
size_t bucketNo = HashFunc(key);
PNode pCur = _hashTable[bucketNo];
PNode pPre = NULL;
while (pCur)
{
if (pCur->_kv.first == key)
{
if (_hashTable[bucketNo] == pCur)//pCur是首元素结点
_hashTable[bucketNo] = pCur->_pNext;
else
pPre->_pNext = pCur->_pNext;
delete pCur;
pCur = NULL;
_size--;
return Iterator(pPre, this);
}
else
{
pPre = pCur;
pCur = pCur->_pNext;
}
}
return Iterator(NULL, this);
}
size_t Erase(const K& key)//key值可以重复
{
size_t oldsize = _size;
size_t bucketNo = HashFunc(key);
PNode pCur = _hashTable[bucketNo];
PNode pPre = NULL;
while (pCur)
{
if (pCur->_kv.first == key)
{
if (pCur == _hashTable[bucketNo])
{
_hashTable[bucketNo] = pCur->_pNext;
delete pCur;
pCur = _hashTable[bucketNo];
}
else
{
pPre->_pNext = pCur->_pNext;
delete pCur;
pCur = pPre->_pNext;
}
_size--;
}
else
{
pPre = pCur;
pCur = pPre->_pNext;
}
}
if (oldsize == _size)
return 0;
else
return oldsize - _size;
}
///////////////////////////其它常用函数///////////////////////////////
size_t Count(const K key)//值为key的元素个数
{
size_t count = 0;
size_t bucketNo = HashFunc(key);
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
if (pCur->_kv.first == key)
count++;
pCur = pCur->_pNext;
}
return count;
}
size_t BucketCount()const//桶的个数
{
return _hashTable.capacity();
}
size_t BucketSize(size_t bucketNo)const//桶内元素个数
{
size_t count = 0;
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
count++;
pCur = pCur->_pNext;
}
return count;
}
V& FindORInsert(const K& key)//查找值为key,如果找到了,返回对应的value,
//如果没有找到插入该结点,返回缺省的value
{
Iterator ret = InsertUnique(make_pair(key, V())).first;
return (*ret).second;
}
bool Empty()const//是否为空
{
return _size == 0;
}
size_t Size()const//插入的总数
{
return _size;
}
void Clear()//清空
{
for (size_t bucketNo = 0; bucketNo < _hashTable.capacity(); bucketNo++)
{
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
_hashTable[bucketNo] = pCur->_pNext;
delete pCur;
pCur = _hashTable[bucketNo];
_size--;
}
}
}
~HashTable()
{
Clear();
}
private:
size_t HashFunc(const K& key)//哈希函数
{
return KeyToInt()(key) % _hashTable.capacity();
}
void CheckCapacity()//扩容
{
size_t capacity = _hashTable.capacity();
if (_size == capacity)
{
HashTable<K, V, KeyToInt> newHt(GetNextPrime(capacity));
for (size_t bucketNo = 0; bucketNo < capacity; bucketNo++)
{
PNode pCur = _hashTable[bucketNo];
while (pCur)
{
newHt.InsertEqual(pCur->_kv);
pCur = pCur->_pNext;
}
}
_hashTable.swap(newHt._hashTable);
}
}
private:
vector<PNode> _hashTable;
size_t _size;
};
void test()
{
HashTable<int, int> ht;
ht.InsertEqual(make_pair(1, 1));
ht.InsertEqual(make_pair(5, 5));
ht.InsertEqual(make_pair(15, 15));
ht.InsertEqual(make_pair(15, 15));
ht.InsertEqual(make_pair(35, 35));
ht.InsertEqual(make_pair(9, 9));
HashTable<int, int>::Iterator it = ht.Begin();
if (!ht.Empty())
{
while (it != ht.End())
{
cout << it->first << " " ;
cout << (*it).second << endl;
it++;
}
cout << endl;
cout << ht.BucketSize(5) << endl;
cout << ht.BucketCount() << endl;
cout << ht.Count(15) << endl;
}
it = ht.Begin();
cout << ht.Erase(15) << endl;
HashTable<int, int>::Iterator ret = ht.Find(1);
ht.Erase(ret);
cout << ht.Size() << endl;
ht.Clear();
HashTable<string, string, StringToInt> ht1;
ht1.InsertUnique(make_pair("111", "111"));
ht1.InsertUnique(make_pair("111", "111"));
cout << ht1.FindORInsert("111") << endl;
}