rehash是为了迁移redis的数据,redis在内存中是类似于java的hashmap形式存储的,相当于一个链表数组,数据是存放在dict里面的,也就是下面rehash的入口参数,dict里面存放两张hash表,rehash就是实现数据在这两张表之间进行迁移的。
rehash的源码在此,入口参数为dict和int
dictIsRehashing返回1代表仍然有key需要从老表迁移到新表,0代表迁移完了。
rehash是以桶(bucket)为单位进行迁移的,n是代表步数,每步完成一个bucket的迁移。一个bucket对应的是hash数组中的一个链表。
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
// 分n步执行,used参数表示节点数量,如果还没执行完n步且旧表还有数据,就执行循环。
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
// 当不再迁移时,rehashidx值是-1,这里断言该hash表的索引没有越界
assert(d->ht[0].size > (unsigned long)d->rehashidx);
// 跳过空桶
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
// 迁移桶里面所有的元素
while(de) {
uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
// 每次rehash结束,都要增加索引值,且把旧表中的bucket置为null。
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
dict里面有两个hash表,用来实现渐进式rehash,实现从老的hash表复制到新的hash表。
/* 字典 */
typedef struct dict {
// 类型特定函数
dictType *type;
// 私有数据
void *privdata;
// 哈希表
dictht ht[2];
// rehash 索引
// 当 rehash 不在进行时,值为 -1
int rehashidx; /* rehashing not in progress if rehashidx == -1 */
// 目前正在运行的安全迭代器的数量
int iterators; /* number of iterators currently running */
} dict;
hash表可以看做是链表数组,用于解决hash冲突
typedef struct dictht {
// 哈希表数组
// 可以看作是:一个哈希表数组,数组的每个项是entry链表的头结点(链地址法解决哈希冲突)
dictEntry **table;
// 哈希表大小
unsigned long size;
// 哈希表大小掩码,用于计算索引值
// 总是等于 size - 1
unsigned long sizemask;
// 该哈希表已有节点的数量
unsigned long used;
} dictht;
/* 哈希表节点 */
typedef struct dictEntry {
// 键
void *key;
// 值
union {
void *val;
uint64_t u64;
int64_t s64;
} v;
// 指向下个哈希表节点,形成链表
struct dictEntry *next;
} dictEntry;