diff options
author | Aaron Crane <arc@cpan.org> | 2017-06-01 14:42:22 +0200 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2017-06-01 17:17:34 +0200 |
commit | b02f36453d1392e2b0bd62fdde2b286fb60bd5bc (patch) | |
tree | 511763b834e942d731ea762309284dccbd5450d0 | |
parent | 1cf41740f284a4e05bbefc5b15c5ffd9c254aa78 (diff) | |
download | perl-b02f36453d1392e2b0bd62fdde2b286fb60bd5bc.tar.gz |
RT #127742: Hash keys are limited to 2 GB - throw an exception if hash keys are too long
We currently require hash keys to be less than 2**31 bytes long. But (a)
nothing actually tries to enforce that, and (b) if a Perl program tries to
create a hash with such a key (using a 64-bit system), we miscalculate the
size of a memory block, yielding a panic:
$ ./perl -e '+{ "x" x 2**31, undef }'
panic: malloc, size=18446744071562068026 at -e line 1.
Instead, check for this situation, and croak with an appropriate (new)
diagnostic in the unlikely event that it occurs.
This also involves changing the type of an argument to a public API function:
Perl_share_hek() previously took the key's length as an I32, but that makes
it impossible to detect over-long keys, so it must be SSize_t instead.
From Yves:
We also inject the length test into the PERL_HASH() macro, so that where
the macro is used *before* calling into any of the hv functions we can
avoid hashing a very long string only to throw an exception that it is
too long. Might as well fail fast.
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | embed.fnc | 4 | ||||
-rw-r--r-- | hv.c | 10 | ||||
-rw-r--r-- | hv_func.h | 10 | ||||
-rw-r--r-- | pod/perldiag.pod | 7 | ||||
-rw-r--r-- | proto.h | 4 | ||||
-rw-r--r-- | t/bigmem/hash.t | 33 |
7 files changed, 58 insertions, 11 deletions
@@ -5267,6 +5267,7 @@ t/base/term.t See if various terms work t/base/translate.t See if character set translation works t/base/while.t See if while work t/benchmark/rt26188-speed-up-keys-on-empty-hash.t Benchmark if keys on empty hashes is fast enough +t/bigmem/hash.t Check hashing too large strings throws an exception t/bigmem/index.t Check that index() handles large offsets t/bigmem/pos.t Check that pos() handles large offsets t/bigmem/read.t Check read() handles large offsets @@ -1444,7 +1444,7 @@ AMpd |OP* |op_scope |NULLOK OP* o : Only used by perl.c/miniperl.c, but defined in caretx.c px |void |set_caret_X Apd |void |setdefout |NN GV* gv -Ap |HEK* |share_hek |NN const char* str|I32 len|U32 hash +Ap |HEK* |share_hek |NN const char* str|SSize_t len|U32 hash #if defined(HAS_SIGACTION) && defined(SA_SIGINFO) : Used in perl.c np |Signal_t |sighandler |int sig|NULLOK siginfo_t *info|NULLOK void *uap @@ -2072,7 +2072,7 @@ sR |HE* |new_he sanR |HEK* |save_hek_flags |NN const char *str|I32 len|U32 hash|int flags sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash -sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags +sR |HEK* |share_hek_flags|NN const char *str|STRLEN len|U32 hash|int flags rs |void |hv_notallowed |int flags|NN const char *key|I32 klen|NN const char *msg in |U32|ptr_hash|PTRV u s |struct xpvhv_aux*|hv_auxinit|NN HV *hv @@ -2966,7 +2966,7 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash) * len and hash must both be valid for str. */ HEK * -Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash) +Perl_share_hek(pTHX_ const char *str, SSize_t len, U32 hash) { bool is_utf8 = FALSE; int flags = 0; @@ -2998,7 +2998,7 @@ Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash) } STATIC HEK * -S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) +S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags) { HE *entry; const int flags_masked = flags & HVhek_MASK; @@ -3007,6 +3007,10 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) PERL_ARGS_ASSERT_SHARE_HEK_FLAGS; + if (UNLIKELY(len > (STRLEN) I32_MAX)) { + Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes"); + } + /* what follows is the moral equivalent of: if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE))) @@ -3021,7 +3025,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) for (;entry; entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; - if (HeKLEN(entry) != len) + if (HeKLEN(entry) != (SSize_t) len) continue; if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */ continue; @@ -93,10 +93,12 @@ sbox32_seed_state96(seed + __PERL_HASH_SEED_BYTES , state + __PERL_HASH_STATE_BYTES); \ } STMT_END -#define _PERL_HASH_WITH_STATE(state,str,len) \ - ((len <= SBOX32_MAX_LEN) \ - ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len)) \ - : __PERL_HASH_WITH_STATE((state),(str),(len))) +#define _PERL_HASH_WITH_STATE(state,str,len) \ + (LIKELY(len <= SBOX32_MAX_LEN) \ + ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len)) \ + : UNLIKELY(len > (STRLEN) I32_MAX) \ + ? Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes") \ + : __PERL_HASH_WITH_STATE((state),(str),(len))) #endif diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 876833338f..8f24318f6f 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -5645,6 +5645,13 @@ overhauled. (F) An ancient error message that almost nobody ever runs into anymore. But before sort was a keyword, people sometimes used it as a filehandle. +=item Sorry, hash keys must be smaller than 2**31 bytes + +(F) You tried to create a hash containing a very large key, where "very +large" means that it needs at least 2 gigabytes to store. Unfortunately, +Perl doesn't yet handle such large hash keys. You should +reconsider your design to avoid hashing such a long string directly. + =item Source filters apply only to byte streams (F) You tried to activate a source filter (usually by loading a @@ -2914,7 +2914,7 @@ PERL_CALLCONV void Perl_set_numeric_standard(pTHX); PERL_CALLCONV void Perl_setdefout(pTHX_ GV* gv); #define PERL_ARGS_ASSERT_SETDEFOUT \ assert(gv) -PERL_CALLCONV HEK* Perl_share_hek(pTHX_ const char* str, I32 len, U32 hash); +PERL_CALLCONV HEK* Perl_share_hek(pTHX_ const char* str, SSize_t len, U32 hash); #define PERL_ARGS_ASSERT_SHARE_HEK \ assert(str) PERL_CALLCONV void Perl_sortsv(pTHX_ SV** array, size_t num_elts, SVCOMPARE_t cmp); @@ -4570,7 +4570,7 @@ STATIC HEK* S_save_hek_flags(const char *str, I32 len, U32 hash, int flags) #define PERL_ARGS_ASSERT_SAVE_HEK_FLAGS \ assert(str) -STATIC HEK* S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) +STATIC HEK* S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_SHARE_HEK_FLAGS \ assert(str) diff --git a/t/bigmem/hash.t b/t/bigmem/hash.t new file mode 100644 index 0000000000..e3d2980e02 --- /dev/null +++ b/t/bigmem/hash.t @@ -0,0 +1,33 @@ +#!perl +BEGIN { + chdir 't' if -d 't'; + @INC = "../lib"; + require './test.pl'; +} + +use Config qw(%Config); + +$ENV{PERL_TEST_MEMORY} >= 4 + or skip_all("Need ~4Gb for this test"); +$Config{ptrsize} >= 8 + or skip_all("Need 64-bit pointers for this test"); + +plan(2); + +sub exn { + my ($code_string) = @_; + local $@; + return undef if eval "do { $code_string }; 1"; + return $@; +} + +like(exn('my $h = { "x" x 2**31, undef }'), + qr/^\QSorry, hash keys must be smaller than 2**31 bytes\E\b/, + "hash constructed with huge key"); + +TODO: { + local $TODO = "Doesn't yet work with OP_MULTIDEREF"; + like(exn('my %h; %h{ "x" x 2**31 } = undef'), + qr/^\QSorry, hash keys must be smaller than 2**31 bytes\E\b/, + "assign to huge hash key"); +} |