版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u013139008/article/details/79645105
/* Hash Tables Implementation.
*
* This file implements in memory hash tables with insert/del/replace/find/
* get-random-element operations. Hash tables will auto resize if needed
* tables of power of two in size are used, collisions are handled by
* chaining. See the source code for more information... :)
*
* Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef _WIN32
#include "Win32_Interop/Win32_Portability.h"
#include "Win32_Interop/Win32_Time.h"
#include "Win32_Interop/win32fixes.h"
extern BOOL g_IsForkedProcess;
#endif
#include "fmacros.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <limits.h>
#ifndef _WIN32
#include <sys/time.h>
#endif
#include <ctype.h>
#include "dict.h"
#include "zmalloc.h"
#include "redisassert.h"
/* Using dictEnableResize() / dictDisableResize() we make possible to
* enable/disable resizing of the hash table as needed. This is very important
* for Redis, as we use copy-on-write and don't want to move too much memory
* around when there is a child performing saving operations.
*
* Note that even when dict_can_resize is set to 0, not all resizes are
* prevented: a hash table is still allowed to grow if the ratio between
* the number of elements and the buckets > dict_force_resize_ratio. */
static int dict_can_resize = 1;
static unsigned int dict_force_resize_ratio = 5;
/* -------------------------- private prototypes ---------------------------- */
static int _dictExpandIfNeeded(dict *ht);
static PORT_ULONG _dictNextPower(PORT_ULONG size);
static int _dictKeyIndex(dict *ht, const void *key);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
/* -------------------------- hash functions -------------------------------- */
/* Thomas Wang's 32 bit Mix Function */
// 哈希函数:将unsigned int 键值转换为unsigned int索引值
unsigned int dictIntHashFunction(unsigned int key)
{
key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return key;
}
/* MurmurHash2和times33哈希算法(即下面两种哈希算法)都使用了哈希种子
* 下面是哈希种子的默认值,以及设置和获得哈希种子的两个函数
*/
static uint32_t dict_hash_function_seed = 5381;
void dictSetHashFunctionSeed(uint32_t seed) {
dict_hash_function_seed = seed;
}
uint32_t dictGetHashFunctionSeed(void) {
return dict_hash_function_seed;
}
/* MurmurHash2, by Austin Appleby
* Note - This code makes a few assumptions about how your machine behaves -
* 1. We can read a 4-byte value from any address without crashing
* 2. sizeof(int) == 4
*
* And it has a few limitations -
*
* 1. It will not work incrementally.
* 2. It will not produce the same results on little-endian and big-endian
* machines.
*/
// MurmurHash2 哈希算法 参见http://blog.csdn.net/thinkmo/article/details/26833565
// 将指针指向的长度为len的内存空间里的值转换为unsigned int索引值
unsigned int dictGenHashFunction(const void *key, int len) {
/* 'm' and 'r' are mixing constants generated offline.
They're not really 'magic', they just happen to work well. */
uint32_t seed = dict_hash_function_seed;
const uint32_t m = 0x5bd1e995;
const int r = 24;
/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;
/* Mix 4 bytes at a time into the hash */
const unsigned char *data = (const unsigned char *)key;
while(len >= 4) {
uint32_t k = *(uint32_t*)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
/* Handle the last few bytes of the input array */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};
/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return (unsigned int)h;
}
/* And a case insensitive hash function (based on djb hash) */
// 将长度为len的字符串转换为unsigned int索引值
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
unsigned int hash = (unsigned int)dict_hash_function_seed;
while (len--)
// 左移5位,即乘以2的5次方=32,然后加hase,即乘以33
hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
return hash;
}
/* ----------------------------- API implementation ------------------------- */
// 下面三个函数一起实现一个字典的创建和初始化
/* Reset a hash table already initialized with ht_init().
* NOTE: This function should only be called by ht_destroy(). */
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
}
/* Create a new hash table */
// 创建一个字典,字典的类型由对该字典的键、值进行操作的方法的不同进行划分
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));
_dictInit(d,type,privDataPtr);
return d;
}
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1; // rehashidx是哈希表进行创建和扩展的标识信息
d->iterators = 0; // 迭代器的作用是什么?
return DICT_OK;
}
/* Resize the table to the minimal size that contains all the elements,
* but with the invariant of a USED/BUCKETS ratio near to <= 1 */
int dictResize(dict *d)
{
int minimal;
// 如果字典不能被扩展或字典已经扩展过(只有被扩展过rehashidx值才为0),直接返回错误
if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
// 哈希表已经使用的大小作为扩展后的大小
minimal = (int)d->ht[0].used; WIN_PORT_FIX /* cast (int) */
// 扩展后的大小最小为4,即最小的字典可以存放4个键值对
if (minimal < DICT_HT_INITIAL_SIZE)
minimal = DICT_HT_INITIAL_SIZE;
return dictExpand(d, minimal);
}
/* Expand or create the hash table */
// 扩展或创建哈希表
// 对同一个d而言,第一次调用该函数是创建哈希表,第二次调用是扩展哈希表
int dictExpand(dict *d,PORT_ULONG size)
{
dictht n; /* the new hash table */
// 哈希表的大小是第一个大于size且该值是2的整数倍
// 哈希表的最大值不超过unsigned long表示的最大数
PORT_ULONG realsize = _dictNextPower(size);
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
// 如果dictExpand函数被两次调用过,d->rehashidx = 0(Line 245),dictIsRehashing(d)为真
// 如果哈希表的使用的大小大于扩展或者创建的哈希表的大小,d->ht[0].used > size为真
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
/* Rehashing to the same table size is not useful. */
// 扩展后的大小等于现在的大小无意义
if (realsize == d->ht[0].size) return DICT_ERR;
/* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize; // 哈希表的大小
n.sizemask = realsize-1; // 哈希表的大小掩码,为什么是realsize-1?
n.table = zcalloc(realsize*sizeof(dictEntry*)); // 因为是table是指针的指针,申请的大小是指针占的空间大小*哈希表的大小
n.used = (size_t) 0; /* 为什么要把0的类型强转为size_t? */ WIN_PORT_FIX /* cast (size_t) */
/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
// 如果对d而言是第一次调用该函数,n的值就赋值给d->ht[0],函数返回时,d->rehashidx=-1
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}
/* Prepare a second hash table for incremental rehashing */
// // 如果对d而言是第二次调用该函数,n的值就赋值给d->ht[1],函数返回时,d->rehashidx=0
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
// On Windows we choose not to execute the dict rehash since it's not
// necessary and it may have a performance impact.
// 在Windows下不执行哈希表的重新哈希,因为没有必要并且会影响性能
// 通过宏函数WIN32_ONLY来实现#ifdefine WIN_32...的功能
WIN32_ONLY(if (g_IsForkedProcess) return 0;)
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0; // 如果rehashidx == -1,直接返回。即并没有对字典中的两张表进行初始化
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (PORT_ULONG)d->rehashidx);
// 获得哈希表0中开始有键值的地址下标d->rehashidx
// 如果哈希表0的前empty_visits个指针都为空,返回1,表示仍然有键值没有移到哈希表1中。
// 为什么不一直遍历到有键值的地方呢?因为这样这个函数就不受约束的可能会占用很多时间
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht[0].table[d->rehashidx]; // 获得哈希表0中的键值空间指针
/* Move all the keys in this bucket from the old to the new hash HT */
// 一旦找到一个键值的指针(dictEntry*类型)不为空,该指针指向的结点是一个单链表
// 而且该单链表的所有结点的指针都能在d->ht[0/1].table[i]中找到。
while(de) {
unsigned int h;
nextde = de->next;
/* Get the index in the new hash table */
/* 1.dictHashKey(d, de->key):获得键值的索引
* 2.& d->ht[1].sizemask:键值的索引和哈希表1的掩码相与获得该键值在哈希表1中索引
*/
h = dictHashKey(d, de->key) & d->ht[1].sizemask; // 两个哈希表使用的是同一个键-索引函数
// de和d->ht[1].table[h]都是一个dictEntry*类型的指针,
//现在在哈希表的h位置放了de这个指针,而指针d->ht[1].table[h]放在de的next的位置
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL; // 哈希表0的该位置为空
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
// 哈希表0的映射关系已经全部映射到后哈希表1
// 哈希表1取代哈希表0的位置
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
// 获取当前时间(毫秒)
PORT_LONGLONG timeInMilliseconds(void) {
#ifdef _WIN32
return GetHighResRelativeTime(1000);
#else
struct timeval tv;
gettimeofday(&tv,NULL);
return (((PORT_LONGLONG)tv.tv_sec)*1000)+(tv.tv_usec/1000);
#endif
}
/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
// 在指定时间(毫秒)内做重新哈希操作,返回重新哈希的键值数量
int dictRehashMilliseconds(dict *d, int ms) {
PORT_LONGLONG start = timeInMilliseconds();
int rehashes = 0;
while(dictRehash(d,100)) { // 重哈希的个数为100
rehashes += 100;
if (timeInMilliseconds()-start > ms) break;
}
return rehashes;
}
/* This function performs just a step of rehashing, and only if there are
* no safe iterators bound to our hash table. When we have iterators in the
* middle of a rehashing we can't mess with the two hash tables otherwise
* some element can be missed or duplicated.
*
* This function is called by common lookup or update operations in the
* dictionary so that the hash table automatically migrates from H1 to H2
* while it is actively used. */
static void _dictRehashStep(dict *d) {
if (d->iterators == 0) dictRehash(d,1); // 重哈希的个数为1
}