Fix possible int overflow when hashing an sds. (#9916)

This caused a crash when adding elements larger than 2GB to a set (same goes for hash keys). See #8455. Details: * The fix makes the dict hash functions receive a `size_t` instead of an `int`. In practice the dict hash functions call siphash which receives a `size_t` and the callers to the hash function pass a `size_t` to it so the fix is trivial. * The issue was recreated by attempting to add a >2gb value to a set. Appropriate tests were added where I create a set with large elements and check basic functionality on it (SADD, SCARD, SPOP, etc...). * When I added the tests I also refactored a bit all the tests code which is run under the `--large-memory` flag. This removed code duplication for the test framework's `write_big_bulk` and `write_big_bulk` code and also takes care of not allocating the test frameworks helper huge string used by these tests when not run under `--large-memory`. * I also added the _violoations.tcl_ unit tests to be part of the entire test suite and leaned up non relevant list related tests that were in there. This was done in this PR because most of the _violations_ tests are "large memory" tests.
author: yoav-steinberg <yoav@monfort.co.il> 2021-12-13 20:16:25 +0100
committer: GitHub <noreply@github.com> 2021-12-13 21:16:25 +0200
commit: c7dc17fc0f9341f61be1a1318468409249310316 (patch)
tree: 96a4645c690bf7eae47045e9a7cca65c899a7ca7
parent: c40d23b89fbee79506e73d1e44ed4ba9ea60ecd9 (diff)
download: redis-c7dc17fc0f9341f61be1a1318468409249310316.tar.gz
7 files changed, 125 insertions, 107 deletions
diff --git a/src/dict.c b/src/dict.c
index 4d064f548..1420055e7 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -83,11 +83,11 @@ uint8_t *dictGetHashFunctionSeed(void) {
 uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
 uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);
 
-uint64_t dictGenHashFunction(const void *key, int len) {
+uint64_t dictGenHashFunction(const void *key, size_t len) {
     return siphash(key,len,dict_hash_function_seed);
 }
 
-uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
     return siphash_nocase(buf,len,dict_hash_function_seed);
 }
 
diff --git a/src/dict.h b/src/dict.h
index e41d149ad..e65fbb583 100644
--- a/src/dict.h
+++ b/src/dict.h
@@ -192,8 +192,8 @@ dictEntry *dictGetRandomKey(dict *d);
 dictEntry *dictGetFairRandomKey(dict *d);
 unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
 void dictGetStats(char *buf, size_t bufsize, dict *d);
-uint64_t dictGenHashFunction(const void *key, int len);
-uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
+uint64_t dictGenHashFunction(const void *key, size_t len);
+uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len);
 void dictEmpty(dict *d, void(callback)(dict*));
 void dictEnableResize(void);
 void dictDisableResize(void);
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 3feb1e961..d97743665 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -907,3 +907,74 @@ proc delete_lines_with_pattern {filename tmpfilename pattern} {
     close $fh_out
     file rename -force $tmpfilename $filename
 }
+
+# The following functions and variables are used only when running large-memory
+# tests. We avoid defining them when not running large-memory tests because the 
+# global variables takes up lots of memory.
+proc init_large_mem_vars {} {
+    if {![info exists ::str500]} {
+        set ::str500 [string repeat x 500000000] ;# 500mb
+        set ::str500_len [string length $::str500]
+    }
+}
+
+# Utility function to write big argument into redis client connection
+proc write_big_bulk {size {prefix ""} {skip_read no}} {
+    init_large_mem_vars
+
+    assert {[string length prefix] <= $size}
+    r write "\$$size\r\n"
+    r write $prefix
+    incr size -[string length $prefix]
+    while {$size >= 500000000} {
+        r write $::str500
+        incr size -500000000
+    }
+    if {$size > 0} {
+        r write [string repeat x $size]
+    }
+    r write "\r\n"
+    if {!$skip_read} {
+        r flush
+        r read
+    }
+}
+
+# Utility to read big bulk response (work around Tcl limitations)
+proc read_big_bulk {code {compare no} {prefix ""}} {
+    init_large_mem_vars
+
+    r readraw 1
+    set resp_len [uplevel 1 $code] ;# get the first line of the RESP response
+    assert_equal [string range $resp_len 0 0] "$"
+    set resp_len [string range $resp_len 1 end]
+    set prefix_len [string length $prefix]
+    if {$compare} {
+        assert {$prefix_len <= $resp_len}
+        assert {$prefix_len <= $::str500_len}
+    }
+
+    set remaining $resp_len
+    while {$remaining > 0} {
+        set l $remaining
+        if {$l > $::str500_len} {set l $::str500_len} ; # can't read more than 2gb at a time, so read 500mb so we can easily verify read data
+        set read_data [r rawread $l]
+        set nbytes [string length $read_data]
+        if {$compare} {
+            set comp_len $nbytes
+            # Compare prefix part
+            if {$remaining == $resp_len} {
+                assert_equal $prefix [string range $read_data 0 [expr $prefix_len - 1]]
+                set read_data [string range $read_data $prefix_len $nbytes]
+                incr comp_len -$prefix_len
+            }
+            # Compare rest of data, evaluate and then assert to avoid huge print in case of failure
+            set data_equal [expr {$read_data == [string range $::str500 0 [expr $comp_len - 1]]}]
+            assert $data_equal
+        }
+        incr remaining -$nbytes
+    }
+    assert_equal [r rawread 2] "\r\n"
+    r readraw 0
+    return $resp_len
+}
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index d0612ca7b..d1405e3e3 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -85,6 +85,7 @@ set ::all_tests {
     unit/networking
     unit/cluster
     unit/client-eviction
+    unit/violations
 }
 # Index to the next test to run in the ::all_tests list.
 set ::next_test 0
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index b08f75c51..c185951c9 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -1,38 +1,3 @@
-set ::str500 [string repeat x 500000000] ;# 500mb
-
-# Utility function to write big argument into redis client connection
-proc write_big_bulk {size} {
-    r write "\$$size\r\n"
-    while {$size >= 500000000} {
-        r write $::str500
-        incr size -500000000
-    }
-    if {$size > 0} {
-        r write [string repeat x $size]
-    }
-    r write "\r\n"
-    r flush
-    r read
-}
-
-# Utility to read big bulk response (work around Tcl limitations)
-proc read_big_bulk {code} {
-    r readraw 1
-    set resp_len [uplevel 1 $code] ;# get the first line of the RESP response
-    assert_equal [string range $resp_len 0 0] "$"
-    set resp_len [string range $resp_len 1 end]
-    set remaining $resp_len
-    while {$remaining > 0} {
-        set l $remaining
-        if {$l > 2147483647} {set l 2147483647}
-        set nbytes [string length [r rawread $l]]
-        incr remaining [expr {- $nbytes}]
-    }
-    assert_equal [r rawread 2] "\r\n"
-    r readraw 0
-    return $resp_len
-}
-
 # check functionality compression of plain and zipped nodes
 start_server [list overrides [list save ""] ] {
     r config set list-compress-depth 2
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 7e30fd3be..587bd58f8 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -934,3 +934,38 @@ start_server {
         }
     }
 }
+
+start_server [list overrides [list save ""] ] {
+
+# test if the server supports such large configs (avoid 32 bit builds)
+catch {
+    r config set proto-max-bulk-len 10000000000 ;#10gb
+    r config set client-query-buffer-limit 10000000000 ;#10gb
+}
+if {[lindex [r config get proto-max-bulk-len] 1] == 10000000000} {
+
+    set str_length 4400000000 ;#~4.4GB
+
+    test {SADD, SCARD, SISMEMBER - large data} {
+        r flushdb
+        r write "*3\r\n\$4\r\nSADD\r\n\$5\r\nmyset\r\n"
+        assert_equal 1 [write_big_bulk $str_length "aaa"]
+        r write "*3\r\n\$4\r\nSADD\r\n\$5\r\nmyset\r\n"
+        assert_equal 1 [write_big_bulk $str_length "bbb"]
+        r write "*3\r\n\$4\r\nSADD\r\n\$5\r\nmyset\r\n"
+        assert_equal 0 [write_big_bulk $str_length "aaa"]
+        assert_encoding hashtable myset
+        set s0 [s used_memory]
+        assert {$s0 > [expr $str_length * 2]}
+        assert_equal 2 [r scard myset]
+
+        r write "*3\r\n\$9\r\nSISMEMBER\r\n\$5\r\nmyset\r\n"
+        assert_equal 1 [write_big_bulk $str_length "aaa"]
+        r write "*3\r\n\$9\r\nSISMEMBER\r\n\$5\r\nmyset\r\n"
+        assert_equal 0 [write_big_bulk $str_length "ccc"]
+        r write "*3\r\n\$4\r\nSREM\r\n\$5\r\nmyset\r\n"
+        assert_equal 1 [write_big_bulk $str_length "bbb"]
+        assert_equal [read_big_bulk {r spop myset} yes "aaa"] $str_length
+    } {} {large-memory}
+} ;# skip 32bit builds
+}
diff --git a/tests/unit/violations.tcl b/tests/unit/violations.tcl
index 1d3140c52..716edf8ac 100644
--- a/tests/unit/violations.tcl
+++ b/tests/unit/violations.tcl
@@ -1,20 +1,3 @@
-# These tests consume massive amounts of memory, and are not
-# suitable to be executed as part of the normal test suite
-set ::str500 [string repeat x 500000000] ;# 500mb
-
-# Utility function to write big argument into redis client connection
-proc write_big_bulk {size} {
-    r write "\$$size\r\n"
-    while {$size >= 500000000} {
-        r write $::str500
-        incr size -500000000
-    }
-    if {$size > 0} {
-        r write [string repeat x $size]
-    }
-    r write "\r\n"
-}
-
 # One XADD with one huge 5GB field
 # Expected to fail resulting in an empty stream
 start_server [list overrides [list save ""] ] {
@@ -23,12 +6,12 @@ start_server [list overrides [list save ""] ] {
         r config set client-query-buffer-limit 10000000000 ;#10gb
         r write "*5\r\n\$4\r\nXADD\r\n\$2\r\nS1\r\n\$1\r\n*\r\n"
         r write "\$1\r\nA\r\n"
-        write_big_bulk 5000000000 ;#5gb
-        r flush
-        catch {r read} err
+        catch {
+            write_big_bulk 5000000000 ;#5gb
+        } err
         assert_match {*too large*} $err
         r xlen S1
-    } {0}
+    } {0} {large-memory}
 }
 
 # One XADD with one huge (exactly nearly) 4GB field
@@ -40,12 +23,12 @@ start_server [list overrides [list save ""] ] {
         r config set client-query-buffer-limit 10000000000 ;#10gb
         r write "*5\r\n\$4\r\nXADD\r\n\$2\r\nS1\r\n\$1\r\n*\r\n"
         r write "\$1\r\nA\r\n"
-        write_big_bulk 4294967295 ;#4gb-1
-        r flush
-        catch {r read} err
+        catch {
+            write_big_bulk 4294967295 ;#4gb-1
+        } err
         assert_match {*too large*} $err
         r xlen S1
-    } {0}
+    } {0} {large-memory}
 }
 
 # Gradually add big stream fields using repeated XADD calls
@@ -57,7 +40,7 @@ start_server [list overrides [list save ""] ] {
         }
         r ping
         r xlen stream
-    } {10}
+    } {10} {large-memory}
 }
 
 # Add over 4GB to a single stream listpack (one XADD command)
@@ -67,13 +50,13 @@ start_server [list overrides [list save ""] ] {
         r write "*23\r\n\$4\r\nXADD\r\n\$1\r\nS\r\n\$1\r\n*\r\n"
         for {set j 0} {$j<10} {incr j} {
             r write "\$1\r\n$j\r\n"
-            write_big_bulk 500000000 ;#500mb
+            write_big_bulk 500000000 "" yes ;#500mb
         }
         r flush
         catch {r read} err
         assert_match {*too large*} $err
         r xlen S
-    } {0}
+    } {0} {large-memory}
 }
 
 # Gradually add big hash fields using repeated HSET calls
@@ -86,7 +69,7 @@ start_server [list overrides [list save ""] ] {
             r hset h $j $::str500
         }
         r object encoding h
-    } {hashtable}
+    } {hashtable} {large-memory}
 }
 
 # Add over 4GB to a single hash field (one HSET command)
@@ -99,47 +82,10 @@ start_server [list overrides [list save ""] ] {
         r write "*4\r\n\$4\r\nHSET\r\n\$2\r\nH1\r\n"
         r write "\$1\r\nA\r\n"
         write_big_bulk 5000000000 ;#5gb
-        r flush
-        r read
         r object encoding H1
-    } {hashtable}
-}
-
-# Add over 4GB to a single list member (one LPUSH command)
-# Currently unsupported, and expected to fail rather than being truncated
-# Expected to fail resulting in a non-existing list
-start_server [list overrides [list save ""] ] {
-    test {list with one huge field} {
-        r config set proto-max-bulk-len 10000000000 ;#10gb
-        r config set client-query-buffer-limit 10000000000 ;#10gb
-        r write "*3\r\n\$5\r\nLPUSH\r\n\$2\r\nL1\r\n"
-        write_big_bulk 5000000000 ;#5gb
-        r flush
-        catch {r read} err
-        assert_match {*too large*} $err
-        r exists L1
-    } {0}
+    } {hashtable} {large-memory}
 }
 
-# SORT which attempts to store an element larger than 4GB into a list.
-# Currently unsupported and results in an assertion instead of truncation
-start_server [list overrides [list save ""] ] {
-    test {SORT adds huge field to list} {
-        r config set proto-max-bulk-len 10000000000 ;#10gb
-        r config set client-query-buffer-limit 10000000000 ;#10gb
-        r write "*3\r\n\$3\r\nSET\r\n\$2\r\nS1\r\n"
-        write_big_bulk 5000000000 ;#5gb
-        r flush
-        r read
-        assert_equal [r strlen S1] 5000000000
-        r set S2 asdf
-        r sadd myset 1 2
-        r mset D1 1 D2 2
-        catch {r sort myset by D* get S* store mylist}
-        assert_equal [count_log_message 0 "crashed by signal"] 0
-        assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
-    }
-}
 
 # SORT which stores an integer encoded element into a list.
 # Just for coverage, no news here.
@@ -152,5 +98,5 @@ start_server [list overrides [list save ""] ] {
         r mset D1 1 D2 2
         r sort myset by D* get S* store mylist
         r llen mylist
-    } {2}
+    } {2} {cluster:skip}
 }
author	yoav-steinberg <yoav@monfort.co.il>	2021-12-13 20:16:25 +0100
committer	GitHub <noreply@github.com>	2021-12-13 21:16:25 +0200
commit	c7dc17fc0f9341f61be1a1318468409249310316 (patch)
tree	96a4645c690bf7eae47045e9a7cca65c899a7ca7
parent	c40d23b89fbee79506e73d1e44ed4ba9ea60ecd9 (diff)
download	redis-c7dc17fc0f9341f61be1a1318468409249310316.tar.gz