类型介绍
dict部分是redis的内存核心,其实质就是一个哈希表结构,这点和memcached有着异曲同工之妙,只不过memcached在查找和分配内存上做了分离,将slabs和assoc建立了联系,而redis更加简洁,直接就包含了两者,这是因为redis没有固定内存的格式大小,不需要提前去预分配那些内存空间,另外应该可以提到一点就是redis不使用memcached的内存分配方式的另一个原因应该就是支持类型的问题,后面可以看到的,redis支持的数据类型要比memcached丰富的多,之所以两者都采用哈希表结果作为存储核心,这是由缓存的特性和要求决定的,缓存就是用来提高访问速度的,哈希表能够提供接近于O(1)的时间复杂度的增删改查操作;
上面提到了数据操作,dict部分作为底层数据结构,同样提供了增加、删除、修改等操作以及迭代器部分,同时和memcached一样,dict部分同样存在两张哈希表,这两个表是用来调整内存大小的,当现在的哈希表大小不满足需求时就要动态调整大小,注意,这里面是动态调整,也就是说redis的rehash过程不是一次性完成,这点和memcached不一样,这是因为memcached的架构是多线程的,可以通过加锁的方式不影响数据访问,而redis是单线程的,如果不是增量式的调整大小,会导致程序阻塞。
代码分析
自定义类型
typedef struct dictEntry {
void *key; //键值
union {
void *val; //自定义类型
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next; //链表
} dictEntry;
typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
} dictType;
typedef struct dictht {
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht; //字典,哈希表
typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2]; //两个,类似于memcached中primary_hashtable和old_hashtable的功能
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
unsigned long iterators; /* number of iterators currently running */
} dict; //字典
typedef struct dictIterator {
dict *d;
long index;
int table, safe;
dictEntry *entry, *nextEntry;
/* unsafe iterator fingerprint for misuse detection. */
long long fingerprint;
} dictIterator;
定义变量
dict_can_resize,是否能够调整大小;
dict_force_resize_ratio,强制调整大小的比率;
代码分解
_dictReset,重置字典结构内部变量;
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
}
_dictInit,初始化字典;
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[0]); //重置字典哈希表
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1;
d->iterators = 0;
return DICT_OK;
}
dictCreate,创建一个字典结构;
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));
_dictInit(d,type,privDataPtr);
return d;
}
dictResize,调整字典大小;
int dictResize(dict *d)
{
int minimal;
//只有当字典允许哈希,且当前不在rehash的时候才能调整大小
if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
minimal = d->ht[0].used; //已使用空间
if (minimal < DICT_HT_INITIAL_SIZE)
minimal = DICT_HT_INITIAL_SIZE; //至少是要达到hash表的最小值
return dictExpand(d, minimal);
}
dictExpand,扩展字典;
int dictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hash table */
//找到下一个适合当前大小的hash表大小,为2的幂数:
unsigned long realsize = _dictNextPower(size);
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
//这里说的是待扩展的大小应该比现在哈希表中已存在的数据量多
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
/* Rehashing to the same table size is not useful. */
//如果大小与现在大小一样,失去了重新hash的意义
if (realsize == d->ht[0].size) return DICT_ERR;
/* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize; //新的哈希表大小
n.sizemask = realsize-1; //哈希掩码
n.table = zcalloc(realsize*sizeof(dictEntry*)); //分配并初始化
n.used = 0; //已使用
/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
//这里说的是,这个地方如果是第一次初始化的话,就不是一个rehash的过程,我们只需要将字典第一个哈希表设置为刚刚创建的hash表即可
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}
/* Prepare a second hash table for incremental rehashing */
d->ht[1] = n; //如果是,将刚刚创建的赋给第二个用于增量哈希用
d->rehashidx = 0;
return DICT_OK;
}
dictRehash,rehash过程,每次移动n位;
int dictRehash(dict *d, int n) {
//待访问的空槽位的数目
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0; //rehash中直接返回
//循环次数完毕,或者已使用的以为0,调整完毕
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
//一直找到不为空的槽
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++; //如果为空,则递增
if (--empty_visits == 0) return 1; //到达设定的最大空槽数则退出,返回1表示rehash过程未完成
}
de = d->ht[0].table[d->rehashidx]; //此时找到的不为空的槽
/* Move all the keys in this bucket from the old to the new hash HT */
//下面将所有槽中数据移动到新的hash表中
while(de) { //这里需要注意的是,并不是说把某个槽里面的一个链表整体移到一个新hash表的槽里面就可以了,而是需要将该槽里面的每个元素遍历,然后根据key值重新计算该元素在新的hash表中应该属于的槽
unsigned int h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask; //计算在hash表中新的位置
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL; //将原来的槽置空
d->rehashidx++; //进行下一个槽的遍历
}
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) { //如果已完成所有槽的rehash
zfree(d->ht[0].table); //释放原来hash表的数据槽
d->ht[0] = d->ht[1]; //将新的再次转移到0号位上
_dictReset(&d->ht[1]); //重置1号位的hash表
d->rehashidx = -1; //新的,将rehash重新置位
return 0;
}
/* More to rehash... */
return 1; //未完成即退出,需要进行继续rehash
}
dictRehashMilliseconds, rehash过程,设定单次rehash时间;
int dictRehashMilliseconds(dict *d, int ms) {
long long start = timeInMilliseconds();
int rehashes = 0;
while(dictRehash(d,100)) {
rehashes += 100;
if (timeInMilliseconds()-start > ms) break;
}
return rehashes;
}
dictAdd,添加元素;
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key); //为当前key找到一个内存区,用于设置value
if (!entry) return DICT_ERR;
dictSetVal(d, entry, val); //设置value
return DICT_OK;
}
dictAddRaw
dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht;
//正在rehash中,进行一步rehash
if (dictIsRehashing(d)) _dictRehashStep(d);
/* Get the index of the new element, or -1 if
* the element already exists. */
//找到可以存放key的空槽,返回索引,如果已存在,返回-1
if ((index = _dictKeyIndex(d, key)) == -1)
return NULL;
/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
//下面是说如果找到了index,则申请内存,然后将该entry添加到hash表的头部
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;
/* Set the hash entry fields. */
//设置键值
dictSetKey(d, entry, key);
return entry;
}
dictReplace,替换
int dictReplace(dict *d, void *key, void *val)
{
dictEntry *entry, auxentry;
/* Try to add the element. If the key
* does not exists dictAdd will suceed. */
//首先添加该k/v,如果键值不存在,则会添加成功
if (dictAdd(d, key, val) == DICT_OK)
return 1;
/* It already exists, get the entry */
//如果存在,则找到该entry地址
entry = dictFind(d, key);
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
auxentry = *entry;
//重新设置该entry的value
dictSetVal(d, entry, val);
//释放该value
dictFreeVal(d, &auxentry);
return 0;
}
dictReplaceRaw,这个函数实际上是一个简化版的dictAddRaw,这里肯定会返回一个给定key的存储空间;
dictEntry *dictReplaceRaw(dict *d, void *key) {
dictEntry *entry = dictFind(d,key); //首先查找是否存在该key的entry
return entry ? entry : dictAddRaw(d,key); //有直接发挥,没有新增该key
}
dictGenericDelete,通用删除方式,先查找,后删除
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
unsigned int h, idx;
dictEntry *he, *prevHe;
int table;
if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key); //计算该key的hash值
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
prevHe = NULL;
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
//找到的话,从链表中移除该元素
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if (!nofree) { //需要清空空间
dictFreeKey(d, he);
dictFreeVal(d, he);
}
zfree(he);
d->ht[table].used--;
return DICT_OK;
}
prevHe = he;
he = he->next;
}
//如果没有在rehash过程中,则跳出,不在1号table中查找
if (!dictIsRehashing(d)) break;
}
return DICT_ERR; /* not found */
}
dictDelete,删除某个key对应的entry;
int dictDelete(dict *ht, const void *key) {
return dictGenericDelete(ht,key,0); //释放空间
}
dictDeleteNoFree,删除某个key对应的entry,不释放空间;
int dictDeleteNoFree(dict *ht, const void *key) {
return dictGenericDelete(ht,key,1);
}
_dictClear,释放整个哈希表;
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
unsigned long i;
/* Free all the elements */
for (i = 0; i < ht->size && ht->used > 0; i++) {
dictEntry *he, *nextHe;
if (callback && (i & 65535) == 0) callback(d->privdata);
if ((he = ht->table[i]) == NULL) continue;
while(he) {
nextHe = he->next;
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
ht->used--;
he = nextHe;
}
}
/* Free the table and the allocated cache structure */
zfree(ht->table);
/* Re-initialize the table */
_dictReset(ht);
return DICT_OK; /* never fails */
}
dictRelease,释放字典;
void dictRelease(dict *d)
{
_dictClear(d,&d->ht[0],NULL);
_dictClear(d,&d->ht[1],NULL);
zfree(d);
}
dictFind,查找某个key对应的entry;
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
unsigned int h, idx, table;
//dict都为空
if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key); //计算哈希值
for (table = 0; table <= 1; table++) { //都是在两表里面查
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
//没有在rehash过程中,直接退出,返回NULL
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
dictFetchValue,获取该key对应的value;
void *dictFetchValue(dict *d, const void *key) {
dictEntry *he;
he = dictFind(d,key); //首先查找该key对应的entry
return he ? dictGetVal(he) : NULL; //找到的话才去获取value,没找到返回NULL
}
dictGetIterator,获取一个dict字典的迭代器;
dictIterator *dictGetIterator(dict *d)
{
dictIterator *iter = zmalloc(sizeof(*iter));
iter->d = d;
iter->table = 0;
iter->index = -1;
iter->safe = 0;
iter->entry = NULL;
iter->nextEntry = NULL;
return iter;
}
dictGetSafeIterator,获取一个安全迭代器;
dictIterator *dictGetSafeIterator(dict *d) {
dictIterator *i = dictGetIterator(d);
i->safe = 1; //safe标记位置1
return i;
}
dictNext,获取下一个entry,这个函数有点意思,意思在于这个迭代器和之前链表的迭代器有所不同,这里不仅需要判断是在哪个bucket里面,也需要判断是在哪个table中,最后还要考虑下是在哪个链表中的哪个entry;
dictEntry *dictNext(dictIterator *iter)
{
while (1) {
if (iter->entry == NULL) { //如果当前指向的entry为空,可能是迭代器的开始,也可能是一个链表迭代到头了,也有可能是当前正在rehash中,数据已经转移了
dictht *ht = &iter->d->ht[iter->table];
if (iter->index == -1 && iter->table == 0) {
//当前索引为负,起始点
if (iter->safe)
iter->d->iterators++; //迭代器的个数
else
iter->fingerprint = dictFingerprint(iter->d);
}
iter->index++; //索引加1
if (iter->index >= (long) ht->size) {
//超过size,则重新开始
if (dictIsRehashing(iter->d) && iter->table == 0) { //如果正在rehash
iter->table++; //跳到1号哈希表
iter->index = 0; //从头开始
ht = &iter->d->ht[1]; //获取当前哈希表
} else {
break; //如果是超出了哈希表的大小,且没有rehash,则会判断出错,返回NULL
}
}
iter->entry = ht->table[iter->index]; //得到当前entry
} else {
iter->entry = iter->nextEntry; //已存在,将该entry的后置节点赋值给
}
if (iter->entry) {
/* We need to save the 'next' here, the iterator user
* may delete the entry we are returning. */
iter->nextEntry = iter->entry->next;
return iter->entry;
}
}
return NULL;
}
dictReleaseIterator,释放迭代器;
void dictReleaseIterator(dictIterator *iter)
{
//如果该迭代器尚未添加到一个字典中,则跳过处理,直接释放
if (!(iter->index == -1 && iter->table == 0)) {
if (iter->safe)
iter->d->iterators--;
else
assert(iter->fingerprint == dictFingerprint(iter->d));
}
zfree(iter);
}