Redis rehash过程中的数据一致性保证

Redis 的哈希表结构

Redis 是一种基于内存的高性能键值对存储数据库，其内部使用哈希表（hashtable）来管理键值对。在 Redis 中，哈希表由 dict 结构表示，而每个 dict 包含两个 dictht 哈希表（ht[0] 和 ht[1]）。

dict 结构定义如下（简化版，省略了一些与本文无关的字段）：

typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    int rehashidx; /* rehashing not in progress if rehashidx == -1 */
    int iterators; /* number of iterators currently running */
} dict;

dictht 结构定义如下：

typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;

其中，table 是一个指向 dictEntry 指针数组的指针，size 表示哈希表的大小，sizemask 用于计算哈希值在 table 数组中的索引位置（sizemask = size - 1），used 记录哈希表中已使用的槽位数量。

dictEntry 结构表示哈希表中的一个键值对：

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;

Redis 的哈希算法

Redis 使用 MurmurHash2 算法来计算键的哈希值。MurmurHash2 是一种快速且性能良好的哈希算法，它能在不同的输入下产生较为均匀的哈希分布，减少哈希冲突的概率。

在 Redis 中，计算哈希值的函数如下（简化版）：

unsigned long dictGenHashFunction(const void *key, int len) {
    return dictSdbmHash(key, len);
}

unsigned long dictSdbmHash(const void *key, int len) {
    unsigned long hash = 0;
    const unsigned char *p;

    for(p = key; len > 0; len--) {
        hash = (*p++) + (hash << 6) + (hash << 16) - hash;
    }
    return hash;
}

Redis 的 rehash 机制

随着数据的不断插入和删除，哈希表的负载因子（load factor = used / size）会发生变化。当负载因子过高（默认情况下，当 ht[0].used / ht[0].size >= 1 时），Redis 会进行 rehash 操作，以提高哈希表的性能。

rehash 的过程分为以下几个步骤：

分配新的哈希表：在 ht[1] 分配一个大小为 ht[0].size * 2 的新哈希表。
逐步迁移数据：将 ht[0] 中的键值对逐步迁移到 ht[1] 中。这个过程不是一次性完成的，而是在每次执行哈希表相关操作（如插入、删除、查找）时，顺便迁移少量的键值对，从而避免在 rehash 过程中长时间阻塞主线程。
完成迁移：当 ht[0] 中的所有键值对都迁移到 ht[1] 后，释放 ht[0]，将 ht[1] 赋值给 ht[0]，并重新初始化 ht[1]，等待下一次 rehash。

rehash 过程中的数据一致性挑战

在 rehash 过程中，由于数据是逐步迁移的，可能会出现以下数据一致性问题：

读写不一致：在 rehash 过程中，可能会出现读操作从旧的哈希表 ht[0] 中读取数据，而写操作却在新的哈希表 ht[1] 中进行，导致读写数据不一致。
并发操作问题：如果多个客户端同时进行读写操作，并且在 rehash 过程中，可能会出现数据丢失或数据错误的情况。

Redis 如何保证数据一致性

1. 单线程模型

Redis 采用单线程模型来处理客户端请求。这意味着在同一时刻，只会有一个操作在执行，从而避免了多线程环境下的并发问题。在 rehash 过程中，虽然数据是逐步迁移的，但由于单线程的特性，不会出现多个操作同时修改哈希表的情况，保证了数据的一致性。

2. 读写操作的协调

在 rehash 过程中，Redis 会根据 rehashidx 的值来决定从哪个哈希表中读取或写入数据。

读操作：当 rehashidx != -1 时，读操作会先尝试从 ht[1] 中读取数据，如果在 ht[1] 中未找到，则再从 ht[0] 中读取。这样可以保证在 rehash 过程中，读取到的数据是最新的。
写操作：写操作（插入、删除）会同时在 ht[0] 和 ht[1] 中进行。当插入新的键值对时，先将其插入到 ht[1] 中，然后再从 ht[0] 中删除（如果 ht[0] 中存在相同键的键值对）。删除操作则在 ht[0] 和 ht[1] 中同时进行。这样可以确保在 rehash 过程中，写操作的一致性。

3. 渐进式 rehash

Redis 的渐进式 rehash 机制是保证数据一致性的关键。通过在每次哈希表相关操作中迁移少量的键值对，避免了在 rehash 过程中长时间阻塞主线程。在每次操作中，Redis 会根据 rehashidx 的值，从 ht[0] 中迁移一个桶（bucket）中的所有键值对到 ht[1] 中，并将 rehashidx 加 1。这样，随着时间的推移，ht[0] 中的所有键值对会逐步迁移到 ht[1] 中，而在这个过程中，不会影响其他操作对哈希表的正常使用。

代码示例

以下是一个简化的 Redis rehash 过程的代码示例（使用 C 语言），用于演示 rehash 过程中的数据一致性保证。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define INITIAL_SIZE 4

typedef struct dictEntry {
    char *key;
    char *value;
    struct dictEntry *next;
} dictEntry;

typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long used;
} dictht;

typedef struct dict {
    dictht ht[2];
    int rehashidx;
} dict;

unsigned long dictGenHashFunction(const char *key, int len) {
    unsigned long hash = 0;
    const unsigned char *p;

    for(p = (const unsigned char *)key; len > 0; len--) {
        hash = (*p++) + (hash << 6) + (hash << 16) - hash;
    }
    return hash;
}

dict* createDict() {
    dict *d = (dict*)malloc(sizeof(dict));
    d->ht[0].size = INITIAL_SIZE;
    d->ht[0].sizemask = INITIAL_SIZE - 1;
    d->ht[0].used = 0;
    d->ht[0].table = (dictEntry**)calloc(INITIAL_SIZE, sizeof(dictEntry*));

    d->ht[1].size = 0;
    d->ht[1].sizemask = 0;
    d->ht[1].used = 0;
    d->ht[1].table = NULL;

    d->rehashidx = -1;
    return d;
}

void rehash(dict *d) {
    if (d->rehashidx != -1) return;

    d->ht[1].size = d->ht[0].size * 2;
    d->ht[1].sizemask = d->ht[1].size - 1;
    d->ht[1].used = 0;
    d->ht[1].table = (dictEntry**)calloc(d->ht[1].size, sizeof(dictEntry*));

    d->rehashidx = 0;
}

void rehashStep(dict *d) {
    if (d->rehashidx == -1) return;

    while(d->ht[0].used > 0) {
        dictEntry *e, *next;
        unsigned long index;

        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (d->rehashidx >= d->ht[0].size) {
                free(d->ht[0].table);
                d->ht[0] = d->ht[1];
                d->ht[1].size = 0;
                d->ht[1].sizemask = 0;
                d->ht[1].used = 0;
                d->ht[1].table = NULL;
                d->rehashidx = -1;
                return;
            }
        }

        e = d->ht[0].table[d->rehashidx];
        while(e) {
            next = e->next;
            index = dictGenHashFunction(e->key, strlen(e->key)) & d->ht[1].sizemask;
            e->next = d->ht[1].table[index];
            d->ht[1].table[index] = e;
            d->ht[0].used--;
            d->ht[1].used++;
            e = next;
        }

        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    free(d->ht[0].table);
    d->ht[0] = d->ht[1];
    d->ht[1].size = 0;
    d->ht[1].sizemask = 0;
    d->ht[1].used = 0;
    d->ht[1].table = NULL;
    d->rehashidx = -1;
}

void set(dict *d, const char *key, const char *value) {
    unsigned long index;
    dictEntry *e;

    if (d->rehashidx != -1) {
        index = dictGenHashFunction(key, strlen(key)) & d->ht[1].sizemask;
        e = d->ht[1].table[index];
        while(e) {
            if (strcmp(e->key, key) == 0) {
                free(e->value);
                e->value = strdup(value);
                return;
            }
            e = e->next;
        }

        e = (dictEntry*)malloc(sizeof(dictEntry));
        e->key = strdup(key);
        e->value = strdup(value);
        e->next = d->ht[1].table[index];
        d->ht[1].table[index] = e;
        d->ht[1].used++;
    }

    index = dictGenHashFunction(key, strlen(key)) & d->ht[0].sizemask;
    e = d->ht[0].table[index];
    while(e) {
        if (strcmp(e->key, key) == 0) {
            free(e->value);
            e->value = strdup(value);
            return;
        }
        e = e->next;
    }

    e = (dictEntry*)malloc(sizeof(dictEntry));
    e->key = strdup(key);
    e->value = strdup(value);
    e->next = d->ht[0].table[index];
    d->ht[0].table[index] = e;
    d->ht[0].used++;

    if (d->ht[0].used / d->ht[0].size >= 1) {
        rehash(d);
    }
}

char* get(dict *d, const char *key) {
    unsigned long index;
    dictEntry *e;

    if (d->rehashidx != -1) {
        index = dictGenHashFunction(key, strlen(key)) & d->ht[1].sizemask;
        e = d->ht[1].table[index];
        while(e) {
            if (strcmp(e->key, key) == 0) {
                return e->value;
            }
            e = e->next;
        }
    }

    index = dictGenHashFunction(key, strlen(key)) & d->ht[0].sizemask;
    e = d->ht[0].table[index];
    while(e) {
        if (strcmp(e->key, key) == 0) {
            return e->value;
        }
        e = e->next;
    }

    return NULL;
}

void freeDict(dict *d) {
    int i;
    dictEntry *e, *next;

    for(i = 0; i < d->ht[0].size; i++) {
        e = d->ht[0].table[i];
        while(e) {
            next = e->next;
            free(e->key);
            free(e->value);
            free(e);
            e = next;
        }
    }

    free(d->ht[0].table);

    if (d->ht[1].table) {
        for(i = 0; i < d->ht[1].size; i++) {
            e = d->ht[1].table[i];
            while(e) {
                next = e->next;
                free(e->key);
                free(e->value);
                free(e);
                e = next;
            }
        }
        free(d->ht[1].table);
    }

    free(d);
}

int main() {
    dict *d = createDict();

    set(d, "key1", "value1");
    set(d, "key2", "value2");
    set(d, "key3", "value3");
    set(d, "key4", "value4");

    printf("get key1: %s\n", get(d, "key1"));

    rehashStep(d);

    printf("get key2: %s\n", get(d, "key2"));

    freeDict(d);
    return 0;
}

在上述代码中，createDict 函数创建一个新的哈希表，rehash 函数开始 rehash 过程，rehashStep 函数执行一次 rehash 步骤，set 函数用于设置键值对，get 函数用于获取键对应的值，freeDict 函数用于释放哈希表占用的内存。

总结

Redis 在 rehash 过程中通过单线程模型、读写操作的协调以及渐进式 rehash 机制，有效地保证了数据的一致性。这种设计使得 Redis 在处理大量数据时，既能保持高性能，又能确保数据的完整性和一致性。了解 Redis 的 rehash 机制和数据一致性保证，对于深入理解 Redis 的工作原理以及在实际应用中优化和调优 Redis 具有重要意义。