HashTable实现基因检测

HashTable实现基因检测，在有的场景下索引效率比红黑树还要高
我们先来看看代码，模拟实现
#include <iostream>
#include <string>
#include <assert.h>
#include <Windows.h>

using namespace std;
static int DeBug = 1;

#include <cstdlib>   //*
#include <crtdbg.h>  //*

#ifdef  _DEBUG         //*
#ifndef DBG_NEW           //*
#define DBG_NEW new (_NORMAL_BLOCK, __FILE__, __LINE__)//*
#define new DBG_NEW        //*
#endif               //*
#endif 

#define hash_func	SDBMHash
#define DEFAULT_LEN		4

#define CAMPARE(a, b) (strcmp((const char*)a, (const char*)b) == 0)

typedef struct listNode_t {

	void* key;   // 键值
	void* date; // 数据域

	struct listNode_t* next;  // 链表的指针域

}*listP, listNode;

typedef listP ELEM;	// 每个成员数据，和链表相似，通过结点操作
typedef listP LIST; // 用于创建一个关于索引的指针数组

typedef struct hashtable_t {

	int table_len; // 哈希桶的个数有多少（哈希表的索引个数）
	LIST* str;  // 一个指针数组

}*hashP, hashNode;

// 初始化哈希表
bool
initHashTable(hashP& ht, int max_len = DEFAULT_LEN);

/************************************************************
*哈希表的SDBM哈希算法 =》就是根据字符串尽可能不重复的转换成一个整数的KEY  *
*这个算法最核心的收获这是我									    *
*************************************************************/
unsigned int 
SDBMHash(void* key);

// 哈希函数，用于计算结点的索引,如果这个结点不存在就返回 -1 的索引（非法的）
static int
HASH(hashP ht, ELEM insert);

// 哈希表的插入,这里我们用前插法来实现（效率高）
bool
insertHashTable(hashP ht, ELEM insert);

// 哈希表的遍历
void
travelHashTable(hashP ht);

// 基因的配对函数
bool
findHashTable(hashP ht, void* key, void* date);

// 哈希表的删除
ELEM
deleteHashTable(hashP ht, const char* date = nullptr, int key = -1);

// 哈希表的销毁
void
destroyHashTable(hashP& ht);


int main(void)
{

	string name[] = { "BADC", "CCDB", "ABCD", "ADDA", "CDAD", "BDAA"};
	string test = "ABADCDABCCDDD";

	char child[16];
	strncpy_s(child, test.c_str() + 1, 4);
	cout << test << endl;
	if(DeBug)cout << child << endl;


	hashP ht = nullptr;
	// 哈希表的初始化
	if (initHashTable(ht)) {
		cout << "init success! " << endl;
	}
	else {
		cout << "init falure!" << endl;
	}

	// 哈希表的插入
	ELEM insert = nullptr;
	for (int i = 0; i < sizeof(name) / sizeof(name[0]); i++) {
		insert = new listNode;
		insert->key = (void*)name[i].c_str();
		insert->date = (void*)name[i].c_str();
		if (insertHashTable(ht, insert)) {
			if (DeBug)printf_s("第 %d 个元素插入成功!, 它的key: %d date: %s \n", i + 1, *(int *)insert->key, (const char*)insert->date);
		}
		else {
			if (DeBug)printf_s("第 %d 个元素插入失败!, 它的key: %d date: %s \n", i + 1, *(int *)insert->key, (const char*)insert->date);
			delete insert;
		}
	}
	// 哈希表的遍历
	// travelHashTable(ht);

	// 哈希表的删除
	/* ELEM delnode = nullptr;
	if (delnode = deleteHashTable(ht, "BADC")) {
		cout << "删除成功！" << endl;
		delete delnode;
	}
	else {
		cout << "删除失败！" << endl;
	}*/

	// 哈希表的遍历
	if (DeBug)travelHashTable(ht);

	cout << "\n\n****************************************\n\n";
	if (findHashTable(ht, (void*)child, (void*)child)) {
		cout << "你的基因有先天性艾滋病基因段！！！！你完了！！" << endl;
	}else {
		cout << "你的基因没有问题" << endl;
	}
	cout << "\n\n****************************************\n\n";

	if (DeBug)cout << "destroyHashTable!" << endl;
	destroyHashTable(ht);
	if (DeBug)cout << "destroyHashTable success! " << endl;
	system("pause");
	_CrtDumpMemoryLeaks();
	return 0;
}


bool
initHashTable(hashP& ht, int max_len)
{
	ht = new hashNode;
	if (!ht) {
		cerr << "the hashtable create error!" << endl;
		return false;
	}
	// 如果传入的值比默认的参数还要大
	max_len > DEFAULT_LEN ? ht->table_len = max_len : ht->table_len = DEFAULT_LEN;
	ht->str = (LIST*)calloc(ht->table_len, sizeof(LIST));

	if (!ht->str) {
		cerr << "the ht->str create error!" << endl;
		return false;
	}

	for (auto tmp = 0; tmp < ht->table_len; ) {

		ht->str[tmp] = new listNode;
		if (!ht->str[tmp]) continue; // 如果失败了重新再开

		ht->str[tmp]->key  = nullptr; // 随便赋值 头结点的key无效
		ht->str[tmp]->date = nullptr;	// 把数据域的指针默认搞为空
		ht->str[tmp]->next = nullptr;	// 把指针域设置为空
		tmp++;
	}

	return true;
}

unsigned int
SDBMHash(void* key)
{
	unsigned int hash = 0; 
	char* str = (char*)key;
	while (*str) {
		// equivalent to: hash = 65599*hash + (*str++); 
		// 使键值尽可能的不重复 经过专家大量的推导的规律，这里不用太纠结。
		hash = (*str++) + (hash << 6) + (hash << 16) - hash;
	}
	return (hash & 0x7FFFFFFF);
}

static inline int
HASH(hashP ht, ELEM insert)
{
	return insert ? hash_func(insert->key) % ht->table_len : -1;
}

bool
insertHashTable(hashP ht, ELEM insert)
{
	if (!ht || !insert)return false;

	int index = HASH(ht, insert); // 如果 insert 不存在

	ELEM tmp = ht->str[index]->next;

	while (tmp) {// 寻找该键是否已经存在，把这个去掉的话  就可以出项键值相同的元素
		if (tmp->key == insert->key) {
			break;
		}
		tmp = tmp->next;
	}

	if (tmp) {// 如果存在相同的键值，就不用插入了
		cout << "the key of insert is exist!" << endl;
		return false;
	}

	insert->next = ht->str[index]->next; // 前插法
	ht->str[index]->next = insert;
	return true;
}

void
travelHashTable(hashP ht)
{
	if (!ht) {
		cout << "hashtable is no exist!" << endl;
		return;
	}
	cout << "********************************" << endl;
	int count = 0;
	while (count < ht->table_len) {

		auto tmp = ht->str[count]->next;
		while (tmp) {
			printf_s("key:%d, date:%s  ", *(int *)tmp->key, (const char*)tmp->date);
			tmp = tmp->next;
		}
		cout << endl;
		count++;
	}
	cout << "********************************" << endl;
	return;
}

bool
findHashTable(hashP ht, void* key, void* date)
{
	if (!key || !date) {
		cout << "key = NULL or date = NULL!" << endl;
		return false;
	}

	if (!ht || !ht->str) {
		cout << "the hashtable is no exist!" << endl;
		return false;
	}

	int index = hash_func(key) % ht->table_len; // 根据相应的指针来求键值

	auto tmp = ht->str[index]->next;

	while (tmp) {
		if (CAMPARE(date, tmp->date)) {
			printf_s(" date[ %s ] \n ", (const char*)tmp->date);
			printf_s("key[ %d ] \n\n", *(int*)tmp->key);
			return true;
		}
		tmp = tmp->next;
	}

	if (!tmp) {
		return false;
	}
	return true;
}

ELEM // 软件工程的思想，在哪里申请的内存，最好就在那个函数中释放
deleteHashTable(hashP ht, const char* date, int key)
{
	if (!ht) {
		cout << "the hashtable is no exist !" << endl;
		return nullptr;
	}
	char flag = 'f';

	if (key < 0) {// date的与之类似 我就不写了 这里纯属考虑实际情况那些输入输出找茬的用户
		int tmp = 0;
	again_1:
		printf_s("请输入要删除结点的键值 范围在 %d ~ %d 之间 \n", 0, ht->table_len - 1);
		cin >> tmp;
		if (tmp > ht->table_len - 1 || tmp < 0) {
			cerr << "该键值是非法的" << endl;
		again_2:
			cout << "是否继续删除？y/n" << endl;
			cin >> flag;
			switch (flag)
			{
			case 'y':
				goto again_1;
				break;
			case 'n':
				return nullptr; // 如果放弃删除就直接推出了
			default:
				goto again_2;
			}
		}
		key = tmp;
	}
	// 先找到要删除的那个键
	int index = key % ht->table_len;
	ELEM last = ht->str[index];  // 找上一个结点，以便删除
	ELEM tmp_1 = ht->str[index]->next;
	int maxlen = 0;
	while (tmp_1) {
		strlen((const char*)(tmp_1->date)) > strlen(date)
			? maxlen = strlen((const char*)(tmp_1->date)) : maxlen = strlen(date);

		if (strncmp((const char*)(tmp_1->date), date, maxlen) == 0) {
			break;
		}
		last = tmp_1;
		tmp_1 = tmp_1->next;
	}

	if (!tmp_1) return nullptr;

	last->next = tmp_1->next;
	return tmp_1;
}

void
destroyHashTable(hashP& ht)
{
	if (!ht)return;
	ELEM tmp_2 = nullptr;
	for (int i = 0; i < ht->table_len; i++) {

		auto tmp_1 = ht->str[i];
		while (tmp_1) {

			tmp_2 = tmp_1;
			tmp_1 = tmp_1->next;
			delete tmp_2;
		}
	}
	delete[] ht->str;
	delete ht;
	return;
}
HashTable实现基因检测

猜你喜欢