【源码-5】rehash

rehash是为了迁移redis的数据，redis在内存中是类似于java的hashmap形式存储的，相当于一个链表数组，数据是存放在dict里面的，也就是下面rehash的入口参数，dict里面存放两张hash表，rehash就是实现数据在这两张表之间进行迁移的。
rehash的源码在此，入口参数为dict和int
dictIsRehashing返回1代表仍然有key需要从老表迁移到新表，0代表迁移完了。
rehash是以桶（bucket）为单位进行迁移的，n是代表步数，每步完成一个bucket的迁移。一个bucket对应的是hash数组中的一个链表。

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time. */

int dictRehash(dict *d, int n) {
    
    
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;
    // 分n步执行，used参数表示节点数量，如果还没执行完n步且旧表还有数据，就执行循环。
    while(n-- && d->ht[0].used != 0) {
    
    
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
// 当不再迁移时，rehashidx值是-1，这里断言该hash表的索引没有越界
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        // 跳过空桶
        while(d->ht[0].table[d->rehashidx] == NULL) {
    
    
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        // 迁移桶里面所有的元素
        while(de) {
    
    
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
// 每次rehash结束，都要增加索引值，且把旧表中的bucket置为null。
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
    
    
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

dict里面有两个hash表，用来实现渐进式rehash，实现从老的hash表复制到新的hash表。

/* 字典 */
typedef struct dict {
    
    
    // 类型特定函数
    dictType *type;
    // 私有数据
    void *privdata;
    // 哈希表
    dictht ht[2];
    // rehash 索引
    // 当 rehash 不在进行时，值为 -1
    int rehashidx; /* rehashing not in progress if rehashidx == -1 */
    // 目前正在运行的安全迭代器的数量
    int iterators; /* number of iterators currently running */
} dict;

hash表可以看做是链表数组，用于解决hash冲突

typedef struct dictht {
    
    
    // 哈希表数组
    // 可以看作是：一个哈希表数组，数组的每个项是entry链表的头结点（链地址法解决哈希冲突）
    dictEntry **table;
    // 哈希表大小
    unsigned long size;
    // 哈希表大小掩码，用于计算索引值
    // 总是等于 size - 1
    unsigned long sizemask;
    // 该哈希表已有节点的数量
    unsigned long used;
} dictht;

/* 哈希表节点 */

typedef struct dictEntry {
    
    
    // 键
    void *key;
    // 值
    union {
    
    
        void *val;
        uint64_t u64;
        int64_t s64;
    } v;
    // 指向下个哈希表节点，形成链表
    struct dictEntry *next;
} dictEntry;

猜你喜欢