template <typename K, typename V> struct Dictionary { //词典Dictionary模板类
	virtual int size() const = 0; //当前词条总数
	virtual bool put(K, V) = 0; //插入词条（禁止雷同词条时可能失败）
	virtual V* get(K k) = 0; //读取词条
	virtual bool remove(K k) = 0; //删除词条
};

1.2 词条头文件

//词条模板类
template <typename K, typename V> struct Entry { 
	K key; V value; //关键码、数值
	Entry(K k = K(), V v = V()) : key(k), value(v) {}; //默认构造函数
	Entry(Entry<K, V> const& e) : key(e.key), value(e.value) {}; //基于克隆的构造函数
	bool operator< (Entry<K, V> const& e) { return key < e.key; }  //比较器：小于
	bool operator> (Entry<K, V> const& e) { return key > e.key; }  //比较器：大于
	bool operator== (Entry<K, V> const& e) { return key == e.key; } //判等器：等于
	bool operator!= (Entry<K, V> const& e) { return key != e.key; } //判等器：不等于
}; //得益于比较器和判等器，从此往后，不必严格区分词条及其对应的关键码

1.3 位图头文件

#include <cstdlib>
#include <cstdio>
#include <memory.h>


class Bitmap { //位图Bitmap类
private:
    unsigned char* M; int N; //比特图所存放的空间M[]，容量为N*sizeof(char)*8比特
protected:
    void init(int n) { M = new unsigned char[N = (n + 7) / 8]; memset(M, 0, N); }
public:
    Bitmap(int n = 8) { init(n); } //按指定或默认规模创建比特图（为测试暂时选用较小的默认值）
    Bitmap(char* file, int n = 8) //按指定或默认规模，从指定文件中读取比特图
    {
            //默认的mode依赖于fstream类型
        init(n); FILE* fp = fopen(file, "r"); fread(M, sizeof(char), N, fp); fclose(fp);
    }
    ~Bitmap() { delete[] M; M = NULL; } //析构时释放比特图空间

    void set(int k) { expand(k);        M[k >> 3] |= (0x80 >> (k & 0x07)); }
    void clear(int k) { expand(k);        M[k >> 3] &= ~(0x80 >> (k & 0x07)); }
    bool test(int k) { expand(k); return M[k >> 3] & (0x80 >> (k & 0x07)); }

    void dump(char* file) //将位图整体导出至指定的文件，以便对此后的新位图批量初始化
    {
        FILE* fp = fopen(file, "w"); fwrite(M, sizeof(char), N, fp); fclose(fp);
    }
    char* bits2string(int n) { //将前n位转换为字符串——
        expand(n - 1); //此时可能被访问的最高位为bitmap[n - 1]
        char* s = new char[n + 1]; s[n] = '\0'; //字符串所占空间，由上层调用者负责释放
        for (int i = 0; i < n; i++) s[i] = test(i) ? '1' : '0';
        return s; //返回字符串位置
    }
    void expand(int k) { //若被访问的Bitmap[k]已出界，则需扩容
        if (k < 8 * N) return; //仍在界内，无需扩容
        int oldN = N; unsigned char* oldM = M;
        init(2 * k); //与向量类似，加倍策略
        memcpy_s(M, N, oldM, oldN); delete[] oldM; //原数据转移至新空间
    }
   void print(int n) //逐位打印以检验位图内容，非必需接口
         {
        expand(n); for (int i = 0; i < n; i++) printf(test(i) ? "1" : "0");
    }
};

1.4 散列表头文件

#include "Dictionary.h"
#include "Entry.h"
#include "Bitmap.h"

static size_t hashCode(char c) { return (size_t)c; } //字符
static size_t hashCode(int k) { return (size_t)k; } //整数以及长长整数
static size_t hashCode(long long i) { return (size_t)((i >> 32) + (int)i); }
static size_t hashCode(char s[]) { //生成字符串的循环移位散列码（cyclic shift hash code）
	unsigned int h = 0; //散列码
	for (size_t n = strlen(s), i = 0; i < n; i++) //自左向右，逐个处理每一字符
	{
		h = (h << 5) | (h >> 27); h += (int)s[i];
	} //散列码循环左移5位，再累加当前字符
	return (size_t)h; //如此所得的散列码，实际上可理解为近似的“多项式散列码”
} //对于英语单词，"循环左移5位"是实验统计得出的最佳值

//符合Dictionary接口的Hashtable模板类
template <typename K, typename V> class Hashtable : public Dictionary<K, V> { //key、value
	friend class UniPrint;
private:
	Entry <K, V>** ht; //桶数组，存放词条指针
	int M, N, L; //桶的总数、词条的数目、懒惰删除标记的数目（N + L <= M）
	Bitmap* removed; //懒惰删除标记
protected:
	int probe4Hit(const K& k); //沿关键码k对应的试探链，找到词条匹配的桶
	int probe4Free(const K& k); //沿关键码k对应的试探链，找到首个可用空桶
	void rehash(); //重散列算法：扩充桶数组，保证装填因子在警戒线以下
public:
	Hashtable(int c = 5); //创建一个容量不小于c的散列表（为测试暂时选用较小的默认值）
	~Hashtable(); //释放桶数组及其中各（非空）元素所指向的词条
	int size() const { return N; } // 当前的词条数目
	bool put(K, V); //插入（禁止雷同词条，故可能失败）
	V* get(K k); //读取
	bool remove(K k); //删除
};


//释放节点值
template <typename T>void releases(T& e) {
	e = NULL;
}

1.5 Dice头文件


#include <ctime>
#include <Stdlib.h>

static int dice(int range) { return rand() % range; } //取[0, range)中的随机整数
static int dice(int lo, int hi) { return lo + rand() % (hi - lo); } //取[lo, hi)中的随机整数
static float dice(float range) { return rand() % (1000 * (int)range) / (float)1000.; }
static double dice(double range) { return rand() % (1000 * (int)range) / (double)1000.; }
static char dice() { return (char)(32 + rand() % 96); }

2.相关函数

2.1 取素数

 //根据file文件中的记录，在[c, n)内取最小的素数
int primeNLT(int c, int n, char* file) {
	Bitmap B(file, n); //file已经按位图格式记录了n以内的所有素数，因此只要
	while (c < n) //从c开始，逐位地
		if (B.test(c)) c++; //测试，即可
		else return c; //返回首个发现的素数
	return c; //若没有这样的素数，返回n（实用中不能如此简化处理）
}

2.2 构造函数

//构造函数
template <typename K, typename V> Hashtable<K, V>::Hashtable ( int c ) { //创建散列表，容量为

	char text[] = "../_input/prime-1048576-bitmap.txt";
	char* ch = text;


   M = primeNLT ( c, 1048576, ch ); //不小于c的素数M
   N = 0; ht = new Entry<K, V>*[M]; //开辟桶数组（假定成功）
   memset ( ht, 0, sizeof ( Entry<K, V>* ) * M ); //初始化各桶
   removed = new Bitmap ( M ); L = 0; //用Bitmap记录懒惰删除
   //printf("A bucket array has been created with capacity = %d\n\n", M);
}

2.3 析构函数

//析构前释放桶数组及非空词条
template <typename K, typename V> Hashtable<K, V>::~Hashtable() {
	for (int i = 0; i < M; i++) //逐一检查各桶
		if (ht[i]) releases(ht[i]); //释放非空的桶
	releases(ht); //释放桶数组
	releases(removed); //释放懒惰删除标记
} //releases()负责释放复杂结构，与算法无直接关系，具体实现详见代码包

2.4 查找函数

//散列表词条查找算法
template <typename K, typename V> V* Hashtable<K, V>::get(K k) 
{
	int r = probe4Hit(k); return ht[r] ? &(ht[r]->value) : NULL;
} //禁止词条的key值雷同

2.5 插入函数

//散列表词条插入
template <typename K, typename V> bool Hashtable<K, V>::put(K k, V v) { 
	if (ht[probe4Hit(k)]) return false; //雷同元素不必重复插入
	int r = probe4Free(k); //为新词条找个空桶（只要装填因子控制得当，必然成功）
	ht[r] = new Entry<K, V>(k, v); ++N; //插入
	if (removed->test(r)) { removed->clear(r); --L; } //懒惰删除标记
	if ((N + L) * 2 > M) rehash(); //若装填因子高于50%，重散列
	return true;
}

2.6 删除函数

//散列表词条删除算法
template <typename K, typename V> bool Hashtable<K, V>::remove(K k) { 
	int r = probe4Hit(k); if (!ht[r]) return false; //确认目标词条确实存在
	releases(ht[r]); ht[r] = NULL; --N; //清除目标词条
	removed->set(r); ++L; //更新标记、计数器
	if (3 * N < L) rehash(); //若懒惰删除标记过多，重散列
	return true;
}

2.7 重散列函数

/******************************************************************************************
 * 重散列：空桶太少时对散列表重新整理：扩容，再将词条逐一移入新表
 * 散列函数的定址与表长M直接相关，故不可简单地批量复制原桶数组
 ******************************************************************************************/

template <typename K, typename V> void Hashtable<K, V>::rehash() {

	char text[] = "../_input/prime-1048576-bitmap.txt";
	char* PRIME_TABLE = text;
	   int oldM = M; Entry<K, V>** oldHt = ht;
	   M = primeNLT(4 * N, 1048576, PRIME_TABLE); //容量至少加倍（若懒惰删除很多，可能反而缩容）
	   ht = new Entry<K, V>*[M]; N = 0; memset(ht, 0, sizeof(Entry<K, V>*) * M); //桶数组
	   releases(removed); removed = new Bitmap(M); L = 0; //懒惰删除标记
	   for (int i = 0; i < oldM; i++) //扫描原表
	       if (oldHt[i]) //将每个非空桶中的词条
	        put(oldHt[i]->key, oldHt[i]->value); //转入新表
	   releases(oldHt); //释放——因所有词条均已转移，故只需释放桶数组本身
}

2.8 试探函数

/******************************************************************************************
 * 沿关键码k的试探链，找到首个可用空桶；实践中有多种试探策略可选，这里仅以线性试探为例
 ******************************************************************************************/
template <typename K, typename V> int Hashtable<K, V>::probe4Free(const K& k) {
	int r = hashCode(k) % M; //按除余法确定试探链起点
	while (ht[r]) r = (r + 1) % M; //线性试探，直到首个空桶（无论是否带有懒惰删除标记）
	return r; //只要有空桶，线性试探迟早能找到
}


/******************************************************************************************
 * 沿关键码k的试探链，找到与之匹配的桶；实践中有多种试探策略可选，这里仅以线性试探为例
 ******************************************************************************************/
template <typename K, typename V> int Hashtable<K, V>::probe4Hit(const K& k) {
	int r = hashCode(k) % M; //按除余法确定试探链起点
	while ((ht[r] && (k != ht[r]->key)) || removed->test(r))
		r = (r + 1) % M; //线性试探（跳过带懒惰删除标记的桶）
	return r; //调用者根据ht[r]是否为空及其内容，即可判断查找是否成功
}

3.完整代码

#include <iostream>
#include "Hashtable .h"
#include "Dice.h"
using namespace std;

 // 测试散列表
                //key、value
template <typename K, typename V> void testHashtable(int n) {
    Hashtable<K, V> ht(n); //cout << ht; //print(ht);
    while (ht.size() < 4 * n) {
        cout << endl;
        switch (dice(3)) {
        case 0: { //查找，成功率 <= 33.3%
            K key = dice((K)n * 12); //[0, 3n)范围内的key
            cout << "搜索：" << key << " " << hashCode(key) << endl;
            // printf("Searching "); print(key); printf("(%04d) :\n", hashCode(key));
            V* pValue = ht.get(key);
            if (pValue) cout << "搜索成功：" << *pValue << endl;
            else cout << "搜索失败！" << endl;
            //pValue ? printf("Found with "), print(*pValue) : printf("Not found"); printf("\n");
            break;
        }
        case 1: { //删除，成功率 <= 33.3%
            K key = dice((K)n * 12); //[0, 3n)范围内的key
            cout << "删除：" << key << " " << hashCode(key) << endl;
            // printf("Removing "); print(key); printf("(%04d) :\n", hashCode(key));
            if (ht.remove(key)) cout << "删除成功："  << endl;
            else cout << "词条不存在！"  << endl;
            //  ht.remove(key) ? printf("Done\n"), print(ht) : printf("Entry not exists\n");
            break;
        }
        default: {//插入，成功率 == 100%
            K key = dice((K)n * 12); V v = (V)'A' + dice(26); //在[0, 2n)*['A'~'Z']范围内的词条
            cout << "插入：<" << key << " " << hashCode(key) << "," << v << ">" << endl;
            // printf("Inserting <"); print(key); printf("(%04d)", hashCode(key)); printf(","); print(v); printf(">\n");
            if (ht.put(key, v)) cout << "插入成功！" <<endl;
            else cout << "主键重复！" << endl;
            // ht.put(key, v) ? printf("Done\n"), print(ht) : printf("Dup key\n");
            break;
        }
        } //switch
    } //whil
    while (ht.size() > 0) {
      //  cout << endl;
        K key = dice((K)n * 12); //[0, 3n)范围内的key
        cout << "删除：" << key << endl;
        //printf("Removing "); print(key); printf(" :\n");
        if (ht.remove(key)) cout << "删除成功：" << endl;
        else cout << "词条不存在！" << endl;
        // ht.remove(key) ? printf("Done\n"), print(ht) : printf("Entry not exists\n");
    }
}





int main() {
    srand((unsigned int)time(NULL)); //设置随机种子
    int i = rand() % 50;
    testHashtable<int, char>(i); //元素类型可以在这里任意选
	system("pause"); 
    return 0;
}

C++ 数据结构学习 ---- 散列表

1. 头文件

1.1 字典头文件

1.2 词条头文件

1.3 位图头文件

1.4 散列表头文件

1.5 Dice头文件

2.相关函数

2.1 取素数

2.2 构造函数

2.3 析构函数

2.4 查找函数

2.5 插入函数

2.6 删除函数

2.7 重散列函数

2.8 试探函数

3.完整代码

4.运行结果及截图

猜你喜欢