summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsundb <sundbcn@gmail.com>2021-11-16 19:12:25 +0800
committerGitHub <noreply@github.com>2021-11-16 13:12:25 +0200
commit985430b4fca5ac55b121a98ac0407909c6767530 (patch)
tree57c09d97f4efe22e1d7f380f6ff8ce637840b6ab
parente725d737fb2ee492fbcd04bb7deb1696d7e182d1 (diff)
downloadredis-985430b4fca5ac55b121a98ac0407909c6767530.tar.gz
Change lzf to handle values larger than UINT32_MAX (#9776)
Redis supports inserting data over 4GB into string (and recently for lists too, see #9357), But LZF compression used in RDB files (see `rdbcompression` config), and in quicklist (see `list-compress-depth` config) does not support compress/decompress data over UINT32_MAX, which will result in corrupting the rdb after compression. Internal changes: 1. Modify the `unsigned int` parameter of `lzf_compress/lzf_decompress` to `size_t`. 2. Modify the variable types in `lzf_compress` involving offsets and lengths to `size_t`. 3. Set LZF_USE_OFFSETS to 0. When LZF_USE_OFFSETS is 1, lzf store offset into `LZF_HSLOT`(32bit). Even in 64-bit, `LZF_USE_OFFSETS` defaults to 1, because lzf assumes that it only compresses and decompresses data smaller than UINT32_MAX. But now we need to make lzf support 64-bit, turning on `LZF_USE_OFFSETS` will make it impossible to store 64-bit offsets or pointers. BTW, disable LZF_USE_OFFSETS also brings a few performance improvements. Tests: 1. Add test for compress/decompress string large than UINT32_MAX. 2. Add unittest for compress/decompress quicklistNode.
-rw-r--r--src/lzf.h12
-rw-r--r--src/lzfP.h5
-rw-r--r--src/lzf_c.c13
-rw-r--r--src/lzf_d.c6
-rw-r--r--src/quicklist.c76
-rw-r--r--tests/unit/bitops.tcl14
6 files changed, 109 insertions, 17 deletions
diff --git a/src/lzf.h b/src/lzf.h
index 98e038f31..45ddfa8da 100644
--- a/src/lzf.h
+++ b/src/lzf.h
@@ -73,9 +73,9 @@
* and lzf_c.c.
*
*/
-unsigned int
-lzf_compress (const void *const in_data, unsigned int in_len,
- void *out_data, unsigned int out_len);
+size_t
+lzf_compress (const void *const in_data, size_t in_len,
+ void *out_data, size_t out_len);
/*
* Decompress data compressed with some version of the lzf_compress
@@ -92,9 +92,9 @@ lzf_compress (const void *const in_data, unsigned int in_len,
*
* This function is very fast, about as fast as a copying loop.
*/
-unsigned int
-lzf_decompress (const void *const in_data, unsigned int in_len,
- void *out_data, unsigned int out_len);
+size_t
+lzf_decompress (const void *const in_data, size_t in_len,
+ void *out_data, size_t out_len);
#endif
diff --git a/src/lzfP.h b/src/lzfP.h
index 78c858fad..567f5a201 100644
--- a/src/lzfP.h
+++ b/src/lzfP.h
@@ -129,8 +129,9 @@
* Whether to store pointers or offsets inside the hash table. On
* 64 bit architectures, pointers take up twice as much space,
* and might also be slower. Default is to autodetect.
- */
-/*#define LZF_USER_OFFSETS autodetect */
+ * Notice: Don't set this value to 1, it will result in 'LZF_HSLOT'
+ * not being able to store offset above UINT32_MAX in 64bit. */
+#define LZF_USE_OFFSETS 0
/*****************************************************************************/
/* nothing should be changed below */
diff --git a/src/lzf_c.c b/src/lzf_c.c
index 6c117dab1..7cbbc82fa 100644
--- a/src/lzf_c.c
+++ b/src/lzf_c.c
@@ -105,9 +105,9 @@
*
*/
NO_SANITIZE("alignment")
-unsigned int
-lzf_compress (const void *const in_data, unsigned int in_len,
- void *out_data, unsigned int out_len
+size_t
+lzf_compress (const void *const in_data, size_t in_len,
+ void *out_data, size_t out_len
#if LZF_STATE_ARG
, LZF_STATE htab
#endif
@@ -132,7 +132,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
#if defined (WIN32) && defined (_M_X64)
unsigned _int64 off; /* workaround for missing POSIX compliance */
#else
- unsigned long off;
+ size_t off;
#endif
unsigned int hval;
int lit;
@@ -153,7 +153,8 @@ lzf_compress (const void *const in_data, unsigned int in_len,
hval = NEXT (hval, ip);
hslot = htab + IDX (hval);
- ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS;
+ ref = *hslot ? (*hslot + LZF_HSLOT_BIAS) : NULL; /* avoid applying zero offset to null pointer */
+ *hslot = ip - LZF_HSLOT_BIAS;
if (1
#if INIT_HTAB
@@ -171,7 +172,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
{
/* match found at *ref++ */
unsigned int len = 2;
- unsigned int maxlen = in_end - ip - len;
+ size_t maxlen = in_end - ip - len;
maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
diff --git a/src/lzf_d.c b/src/lzf_d.c
index dbf1ae772..ff32be892 100644
--- a/src/lzf_d.c
+++ b/src/lzf_d.c
@@ -56,9 +56,9 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#endif
-unsigned int
-lzf_decompress (const void *const in_data, unsigned int in_len,
- void *out_data, unsigned int out_len)
+size_t
+lzf_decompress (const void *const in_data, size_t in_len,
+ void *out_data, size_t out_len)
{
u8 const *ip = (const u8 *)in_data;
u8 *op = (u8 *)out_data;
diff --git a/src/quicklist.c b/src/quicklist.c
index b1051bd9b..22bd9b516 100644
--- a/src/quicklist.c
+++ b/src/quicklist.c
@@ -1721,6 +1721,7 @@ void quicklistBookmarksClear(quicklist *ql) {
#include <stdint.h>
#include <sys/time.h>
#include "testhelp.h"
+#include <stdlib.h>
#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__)
@@ -1902,6 +1903,30 @@ static char *genstr(char *prefix, int i) {
return result;
}
+static void randstring(unsigned char *target, size_t sz) {
+ size_t p = 0;
+ int minval, maxval;
+ switch(rand() % 3) {
+ case 0:
+ minval = 'a';
+ maxval = 'z';
+ break;
+ case 1:
+ minval = '0';
+ maxval = '9';
+ break;
+ case 2:
+ minval = 'A';
+ maxval = 'Z';
+ break;
+ default:
+ assert(NULL);
+ }
+
+ while(p < sz)
+ target[p++] = minval+rand()%(maxval-minval+1);
+}
+
/* main test, but callable from other files */
int quicklistTest(int argc, char *argv[], int flags) {
UNUSED(argc);
@@ -3125,6 +3150,57 @@ int quicklistTest(int argc, char *argv[], int flags) {
quicklistRelease(ql);
}
+ if (flags & REDIS_TEST_LARGE_MEMORY) {
+ TEST("compress and decompress quicklist ziplist node") {
+ quicklistNode *node = quicklistCreateNode();
+ node->entry = ziplistNew();
+
+ /* Create a rand string */
+ size_t sz = (1 << 25); /* 32MB per one entry */
+ unsigned char *s = zmalloc(sz);
+ randstring(s, sz);
+
+ /* Keep filling the node, until it reaches 1GB */
+ for (int i = 0; i < 32; i++) {
+ node->entry = ziplistPush(node->entry, s, sz, ZIPLIST_TAIL);
+ quicklistNodeUpdateSz(node);
+
+ long long start = mstime();
+ assert(__quicklistCompressNode(node));
+ assert(__quicklistDecompressNode(node));
+ printf("Compress and decompress: %zu MB in %.2f seconds.\n",
+ node->sz/1024/1024, (float)(mstime() - start) / 1000);
+ }
+
+ zfree(s);
+ zfree(node->entry);
+ zfree(node);
+ }
+
+#if ULONG_MAX >= 0xffffffffffffffff
+ TEST("compress and decomress quicklist plain node large than UINT32_MAX") {
+ size_t sz = (1ull << 32);
+ unsigned char *s = zmalloc(sz);
+ randstring(s, sz);
+ memcpy(s, "helloworld", 10);
+ memcpy(s + sz - 10, "1234567890", 10);
+
+ quicklistNode *node = __quicklistCreatePlainNode(s, sz);
+
+ long long start = mstime();
+ assert(__quicklistCompressNode(node));
+ assert(__quicklistDecompressNode(node));
+ printf("Compress and decompress: %zu MB in %.2f seconds.\n",
+ node->sz/1024/1024, (float)(mstime() - start) / 1000);
+
+ assert(memcmp(node->entry, "helloworld", 10) == 0);
+ assert(memcmp(node->entry + sz - 10, "1234567890", 10) == 0);
+ zfree(node->entry);
+ zfree(node);
+ }
+#endif
+ }
+
if (!err)
printf("ALL TESTS PASSED!\n");
else
diff --git a/tests/unit/bitops.tcl b/tests/unit/bitops.tcl
index 1a3c3a54e..89431c81f 100644
--- a/tests/unit/bitops.tcl
+++ b/tests/unit/bitops.tcl
@@ -566,4 +566,18 @@ start_server {tags {"bitops"}} {
r config set proto-max-bulk-len $oldval
r del mykey
} {1} {large-memory}
+
+ test "SETBIT values larger than UINT32_MAX and lzf_compress/lzf_decompress correctly" {
+ set bytes [expr (1 << 32) + 1]
+ set bitpos [expr (1 << 35)]
+ set oldval [lindex [r config get proto-max-bulk-len] 1]
+ r config set proto-max-bulk-len $bytes
+ r setbit mykey $bitpos 1
+ assert_equal $bytes [r strlen mykey]
+ assert_equal 1 [r getbit mykey $bitpos]
+ r debug reload ;# lzf_compress/lzf_decompress when RDB saving/loading.
+ assert_equal 1 [r getbit mykey $bitpos]
+ r config set proto-max-bulk-len $oldval
+ r del mykey
+ } {1} {large-memory needs:debug}
}