C++中用哈希实现map、set详解

因为本人之前一直写的是电子笔记，对自己学会的东西作一个总结，所以基本都是文字，本来想全发成博客的形式，发现全发成博客比较花费时间，而且一直发博客质量不是很好，而且通过发博客学到的东西也会变少，所以准备先把笔记发出来，后续再将它们改成博客的形式，争取2天至少改一篇博客，觉得我总结的还行的可以先关注我，后续会发成博客形式，内容也会更加完善

**
实现map和set的哈希版：
它们的实现都是用哈希桶做的，逻辑并不是很难主要是结构
以数组挂链表为例，首先它做为一个容器，一个哈希桶要实现4个容器，所以是用模板做的，因为参数最多的是map型的有pair对象，set只用存一个类型的数据，所以在设计模板的时候，至少要传有两个模板参数，因为pair对象就需要两个，但因为set只用一个，所以set的那两个模板参数是相同的，因为传进来的可能是字符串，字符串的话要用哈希函数来计算哈希值，所以要分情况，因为只有两种情况，一种是数字一种是字符串，所以我们使用了特化的模板来做做一个自动的选取类型，所以这里我们就需要第三个参数，但第三个参数不需要我们来传，直接给它一个默认的为哈希函数这个结构体类型，然后给它再传进我们first类型，如果是字符串，因为我们使用了模板特化，所以可以自动的调用字符串类型的模板函数，在哈希表中要有一个计算哈希值的函数，然后参数就是我们传入的第三个参数，也就是哈希函数的结构体，之后我们可以通过实例化的对象，调用函数来进行计算（我觉得不直接把它的哈希值计算好传入的原因，这样需要在哈希表中再加一个参数，并且要有一个构造来初始化，而set和map中都必须要写一个这样的函数，既然都要弄，而且方法一样，还不如在哈希表中做，
在给底层哈希表传参时，不仅要传刚才的那三个参数，还要再加一个参数，因为set是一个类型的数据map是pair型的数据，所以在进行存储，计算哈希值时，只能用一个，set当然是用它的唯一一个值了，但pair是用它的first值的，所以用一个结构体来做一个括号操作符的重载，让不同类型传进去的比较方式不同，这样就可以一个哈希表实现不同的容器
然后因为是容器，我们来访问它的元素时，就得用送代器，所以我们必需手动实现一个送代器，这个送代器，没有–的操作，因为我们的数据本来就是无序的，所以没必要用–的操作，只提供一个++的操作，这个++的实现肯定是一个操作符重载，因为我们的++需要访问下一个元素，只给送代器一个节点是不能完成任务的，所以需要把那张表的指针也传过来，所以这个送代器至少要传两个参数，具体++的实现方法是，拿到这个节点后，先判断，它的next是否存在，如果存在直接返回这个节点就好了，如果不存在，就要开始计算它在这张表中的位置了，计算好后，因为刚才已经判断过了，它没有下一个节点，所以这个单元中的数据已经全部走过了，所以让它的去下一个单元看，如果下一个单元存在，就把这个头节点返回出来，如果不存在就继续去下一个单元中找，如果找到这个数组的最后一个单元都没有找到，就返回空，其它操作符的重载，都和红黑树差不多
然后就是哈希表中提供begin和end的接口，虽然送代器中也可以，但我们在map中一定会有一个哈希表的对象，所以写在哈希表中方便，另外map还有一个[]的操作，具体做法和红黑树中作法相同

#pragma once
#include <iostream>
#include <vector>
#include <string>
#include <time.h>
using namespace std;
//哈希桶

template <class V>
struct HashNode
{
	V _data;
	HashNode<V>* _next;
	HashNode(const V& data)
		:_data(data)
		, _next(nullptr)
	{}
};
template <class K, class V, class KeyOfValue, class HashFun> //这里做前置声明的的原因是送代器要用到哈希表
class HashTable;                                             //并且哈希表也要用到送代器，因为编译器是从上往下执行的
                                                             //如果我们要用到下边的代码，它就不知道是什么，所以要有一个前置声明
template <class K, class V, class KeyOfValue, class HashFun>
struct __HIerator
{
	typedef HashNode<V> Node;
	typedef Node* pNode;
	typedef __HIerator<K, V, KeyOfValue, HashFun> Self;
	typedef HashTable<K, V, KeyOfValue,HashFun> HashT;
	HashT* _ht;
	pNode _node;

	__HIerator(pNode node, HashT* ht)
		:_node(node)
		,_ht(ht)
	{}

	V& operator * ()
	{
		return _node->_data;
	}

	V* operator -> ()
	{
		return &operator*();
	}

	bool operator != (const Self& it)
	{
		return _node != it._node;
	}
	Self& operator ++ ()
	{
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else
		{
			KeyOfValue kov;
			size_t index = _ht->HashIdx(kov(_node->_data), _ht->_table.size());

			++index;
			for (; index < _ht->_table.size(); ++index)
			{
				if (_ht->_table[index])
				{
					_node = _ht->_table[index];
					break;
				}
			}
			if (index == _ht->_table.size())
			{
				_node = nullptr;
			}
		}
		return *this;
	}
};
template <class K, class V, class KeyOfValue, class HashFun>
class HashTable
{
public:
	template <class K, class V, class KeyOfValue, class HashFun>
	friend struct __HIerator;

	typedef HashNode<V> Node;
	typedef Node* pNode;
	KeyOfValue kov;
	typedef __HIerator<K, V, KeyOfValue, HashFun> iterator;
	
	iterator end()
	{
		return iterator(nullptr, this);
	}

	iterator begin()
	{
		for (size_t i = 0; i < _table.size(); ++i)
		{
			if (_table[i])
			{
				return iterator(_table[i], this);
			}
		}
		return iterator(nullptr, this);
	}

	pair<iterator, bool> Insert(const V& data)
	{
		CheckCapacity();
		size_t index = HashIdx(kov(data), _table.size());
		pNode cur = _table[index];

		while (cur)
		{
			if (kov(cur->_data) == kov(data))
			{
				return make_pair(iterator(cur, this), false);
			}
			cur = cur->_next;
		}
		cur = new Node(data);//直接进行头插
		cur->_next = _table[index];
		_table[index] = cur;

		++_size;
		return make_pair(iterator(cur, this), true);
	}
	size_t getPrime(size_t prime)
	{
		const int PRIMECOUNT = 28;
		const size_t primeList[PRIMECOUNT] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};

		for (size_t i = 0; i < PRIMECOUNT; ++i)
		{
			if (primeList[i] > prime)
			{
				return primeList[i];
			}
		}
		return primeList[PRIMECOUNT - 1];
	}
	//要进行扩容，是因为原来数组的长度不够用了
	//但是如果长度改变了，原来数据所处的位置的也就不同了，所以必须重新再搞一个数组
	//弄好后，就是数据的迁移了，怎么找
	//从原来数组的第一个非空的的位置开始找
	//先把数据拿出来计算一下新的位置，找到位置
	//因为我们要将cur这个结点，放到新数组的一个位置，所以cur的next必须改变
	//因为原来发生了哈希冲突，但重新计算的长度不一样 ，所以不一定会发生哈希冲突
	//所以我们让它的next指向它自己
	//但是我们还要计算下一个cur的位置也就是原来cur的next的那个结点，但是我们提前改变了next
	//因为如果先把cur的next给到cur那就找不到原来的cur了，就不能改变新数组里面cur所指向的位置
	//因为我们必须改变新数组cur所指向的位置，所以我们要提前保存一下最原始cur的next的节点
	void CheckCapacity()
	{
		if (_size == _table.size())
		{
			size_t newC = getPrime(_table.size());
			vector<pNode> newT;
			newT.resize(newC);
			for (size_t i = 0; i < _table.size(); ++i)
			{
				pNode cur = _table[i];//cur是一个结构体指针
				while (cur)
				{
					pNode next = cur->_next;

					size_t index = HashIdx(kov(cur->_data), newT.size());
					cur->_next = newT[index];//这样搬动，省去创建新节点的开销
					newT[index] = cur;       //因为本来就有这个节点，所以直接可以把它的值拿来用
					                         //因为是个链表，所以我们可以把它链好后，直接覆盖，这样
					cur = next;              //既不用浪费空间申请节点，也能重新排好位置
				}                            //这就相当于一个搬运的过程，因为每个节点都是独立的，只是通过一层链接关系来进行相互关联
				_table[i] = nullptr;         //它们并不是连续的，只是有指针的存在，让它们看起来是连续的
			}                                //所以我们只需要链表的头，就能按顺序访问
			_table.swap(newT);               //所以到最后我们复制过来的只是链表头，其它都是改变链接顺序
		}                                    //所以最后我们在销毁旧链表时，只是将所有的链表头的地址给清空了，并不是将节点给释放了
	}                                        //所以我们不用担心，我们的数据是从旧链表中来了，一但清空这边也就用不了了   
                                               
	size_t HashIdx(const K& key, size_t sz) //用来处理是字符串的情况
	{
		HashFun hfun;
		return hfun(key) % sz;
	}
private:
	size_t _size = 0;
	vector<pNode> _table;
};

#include "哈希桶.hpp"
//模板特化

template <class K>
struct HFun
{
	const K& operator () (const K& key)
	{
		return key;
	}
};

template <>
struct HFun<string>
{
	size_t operator () (const string& str)
	{
			size_t hash = 0;
			for (const auto& e : str)
			{
				hash = hash * 131 + e;
			}
			return hash;
	}
};


template <class K, class V,class HashFun = HFun<K>>
class UnorderedMap
{
	struct MapKeyOfValue
	{
		const K& operator () (const pair<K, V>& data)
		{
			return data.first;
		}
	};
public:
	typedef typename HashTable<K, pair<K, V>, MapKeyOfValue, HashFun>::iterator iterator;
	V& operator [] (const K& key)
	{
		pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
		return ret.first->second;
	}

	pair<iterator, bool> Insert(const pair<K, V>& data)
	{
		return _ht.Insert(data);
	}
	iterator begin()
	{
		return _ht.begin();
	}
	iterator end()
	{
		return _ht.end();
	}
private:
	HashTable<K, pair<K, V>, MapKeyOfValue, HashFun> _ht;
};

void TestUnordered()
{
	//UnorderedMap<int, int> Umap;
	//srand(time(nullptr));
	//int n;
	//cin >> n;
	//for (size_t i = 0; i < n; i++)
	//{
	//	Umap.Insert(make_pair(rand() % 50, i));
	//}

	//Umap[100] = 100;
	//Umap[200] = 200;
	//Umap[300] = 300;

	//UnorderedMap<int, int>::iterator uit = Umap.begin();
	//while (uit != Umap.end())
	//{
	//	cout << uit->first << "--->" << uit->second << endl;
	//	++uit;
	//}
	UnorderedMap<string, string> uMap;
	uMap.Insert(make_pair("123", "123"));
	uMap.Insert(make_pair("12", "123"));
	uMap.Insert(make_pair("13", "123"));
	uMap.Insert(make_pair("3", "123"));
	uMap.Insert(make_pair("133", "123"));
	uMap.Insert(make_pair("153", "123"));
	UnorderedMap<string, string>::iterator uit = uMap.begin();
	while (uit != uMap.end())
	{
		cout << uit->first << "--->" << uit->second << endl;
		++uit;
	}
	
}

template <class K, class HashFun = HFun<K>>
class UnorderedSet
{
	struct SetKeyOfValue
	{
		const K& operator () (const K& data)
		{
			return data;
		}
	};
public:
	typedef typename HashTable<K, K, SetKeyOfValue, HashFun >::iterator iterator;
	pair<iterator, bool> Insert(const K& data)
	{
		return _ht.Insert(data);
	}
	iterator begin()
	{
		return _ht.begin();
	}
	iterator end()
	{
		return _ht.end();
	}

private:
	HashTable<K, K, SetKeyOfValue, HashFun> _ht;
};

void testSet()
{
	UnorderedSet<string> uMap;
	srand(time(nullptr));
	//int n;
	//cin >> n;
	//for (size_t i = 0; i < n; i++)
	//{
	//	uSet.Insert(rand() % 50));
	//}
	uMap.Insert("123");
	uMap.Insert("12");
	uMap.Insert("13");
	uMap.Insert("3");
	uMap.Insert("133");
	uMap.Insert("153");
	UnorderedSet<string>::iterator sit = uMap.begin();
	while (sit != uMap.end())
	{
		cout << *sit << endl;
		++sit;
	}


}

int main()
{
	//TestUnordered();
	testSet();
	return 0;
}

Charles·

发布了90 篇原创文章 · 获赞 3 · 访问量 1万+

私信关注

C++中用哈希实现map、set详解

猜你喜欢