diff options
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | embed.fnc | 4 | ||||
-rw-r--r-- | hv.c | 10 | ||||
-rw-r--r-- | hv_func.h | 10 | ||||
-rw-r--r-- | pod/perldiag.pod | 7 | ||||
-rw-r--r-- | proto.h | 4 | ||||
-rw-r--r-- | t/bigmem/hash.t | 33 |
7 files changed, 58 insertions, 11 deletions
@@ -5267,6 +5267,7 @@ t/base/term.t See if various terms work t/base/translate.t See if character set translation works t/base/while.t See if while work t/benchmark/rt26188-speed-up-keys-on-empty-hash.t Benchmark if keys on empty hashes is fast enough +t/bigmem/hash.t Check hashing too large strings throws an exception t/bigmem/index.t Check that index() handles large offsets t/bigmem/pos.t Check that pos() handles large offsets t/bigmem/read.t Check read() handles large offsets @@ -1444,7 +1444,7 @@ AMpd |OP* |op_scope |NULLOK OP* o : Only used by perl.c/miniperl.c, but defined in caretx.c px |void |set_caret_X Apd |void |setdefout |NN GV* gv -Ap |HEK* |share_hek |NN const char* str|I32 len|U32 hash +Ap |HEK* |share_hek |NN const char* str|SSize_t len|U32 hash #if defined(HAS_SIGACTION) && defined(SA_SIGINFO) : Used in perl.c np |Signal_t |sighandler |int sig|NULLOK siginfo_t *info|NULLOK void *uap @@ -2072,7 +2072,7 @@ sR |HE* |new_he sanR |HEK* |save_hek_flags |NN const char *str|I32 len|U32 hash|int flags sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash -sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags +sR |HEK* |share_hek_flags|NN const char *str|STRLEN len|U32 hash|int flags rs |void |hv_notallowed |int flags|NN const char *key|I32 klen|NN const char *msg in |U32|ptr_hash|PTRV u s |struct xpvhv_aux*|hv_auxinit|NN HV *hv @@ -2966,7 +2966,7 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash) * len and hash must both be valid for str. */ HEK * -Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash) +Perl_share_hek(pTHX_ const char *str, SSize_t len, U32 hash) { bool is_utf8 = FALSE; int flags = 0; @@ -2998,7 +2998,7 @@ Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash) } STATIC HEK * -S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) +S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags) { HE *entry; const int flags_masked = flags & HVhek_MASK; @@ -3007,6 +3007,10 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) PERL_ARGS_ASSERT_SHARE_HEK_FLAGS; + if (UNLIKELY(len > (STRLEN) I32_MAX)) { + Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes"); + } + /* what follows is the moral equivalent of: if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE))) @@ -3021,7 +3025,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) for (;entry; entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; - if (HeKLEN(entry) != len) + if (HeKLEN(entry) != (SSize_t) len) continue; if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */ continue; @@ -93,10 +93,12 @@ sbox32_seed_state96(seed + __PERL_HASH_SEED_BYTES , state + __PERL_HASH_STATE_BYTES); \ } STMT_END -#define _PERL_HASH_WITH_STATE(state,str,len) \ - ((len <= SBOX32_MAX_LEN) \ - ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len)) \ - : __PERL_HASH_WITH_STATE((state),(str),(len))) +#define _PERL_HASH_WITH_STATE(state,str,len) \ + (LIKELY(len <= SBOX32_MAX_LEN) \ + ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len)) \ + : UNLIKELY(len > (STRLEN) I32_MAX) \ + ? Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes") \ + : __PERL_HASH_WITH_STATE((state),(str),(len))) #endif diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 876833338f..8f24318f6f 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -5645,6 +5645,13 @@ overhauled. (F) An ancient error message that almost nobody ever runs into anymore. But before sort was a keyword, people sometimes used it as a filehandle. +=item Sorry, hash keys must be smaller than 2**31 bytes + +(F) You tried to create a hash containing a very large key, where "very +large" means that it needs at least 2 gigabytes to store. Unfortunately, +Perl doesn't yet handle such large hash keys. You should +reconsider your design to avoid hashing such a long string directly. + =item Source filters apply only to byte streams (F) You tried to activate a source filter (usually by loading a @@ -2914,7 +2914,7 @@ PERL_CALLCONV void Perl_set_numeric_standard(pTHX); PERL_CALLCONV void Perl_setdefout(pTHX_ GV* gv); #define PERL_ARGS_ASSERT_SETDEFOUT \ assert(gv) -PERL_CALLCONV HEK* Perl_share_hek(pTHX_ const char* str, I32 len, U32 hash); +PERL_CALLCONV HEK* Perl_share_hek(pTHX_ const char* str, SSize_t len, U32 hash); #define PERL_ARGS_ASSERT_SHARE_HEK \ assert(str) PERL_CALLCONV void Perl_sortsv(pTHX_ SV** array, size_t num_elts, SVCOMPARE_t cmp); @@ -4570,7 +4570,7 @@ STATIC HEK* S_save_hek_flags(const char *str, I32 len, U32 hash, int flags) #define PERL_ARGS_ASSERT_SAVE_HEK_FLAGS \ assert(str) -STATIC HEK* S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) +STATIC HEK* S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_SHARE_HEK_FLAGS \ assert(str) diff --git a/t/bigmem/hash.t b/t/bigmem/hash.t new file mode 100644 index 0000000000..e3d2980e02 --- /dev/null +++ b/t/bigmem/hash.t @@ -0,0 +1,33 @@ +#!perl +BEGIN { + chdir 't' if -d 't'; + @INC = "../lib"; + require './test.pl'; +} + +use Config qw(%Config); + +$ENV{PERL_TEST_MEMORY} >= 4 + or skip_all("Need ~4Gb for this test"); +$Config{ptrsize} >= 8 + or skip_all("Need 64-bit pointers for this test"); + +plan(2); + +sub exn { + my ($code_string) = @_; + local $@; + return undef if eval "do { $code_string }; 1"; + return $@; +} + +like(exn('my $h = { "x" x 2**31, undef }'), + qr/^\QSorry, hash keys must be smaller than 2**31 bytes\E\b/, + "hash constructed with huge key"); + +TODO: { + local $TODO = "Doesn't yet work with OP_MULTIDEREF"; + like(exn('my %h; %h{ "x" x 2**31 } = undef'), + qr/^\QSorry, hash keys must be smaller than 2**31 bytes\E\b/, + "assign to huge hash key"); +} |