summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST1
-rw-r--r--embed.fnc4
-rw-r--r--hv.c10
-rw-r--r--hv_func.h10
-rw-r--r--pod/perldiag.pod7
-rw-r--r--proto.h4
-rw-r--r--t/bigmem/hash.t33
7 files changed, 58 insertions, 11 deletions
diff --git a/MANIFEST b/MANIFEST
index 0d61af0062..a54b1d88d8 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5267,6 +5267,7 @@ t/base/term.t See if various terms work
t/base/translate.t See if character set translation works
t/base/while.t See if while work
t/benchmark/rt26188-speed-up-keys-on-empty-hash.t Benchmark if keys on empty hashes is fast enough
+t/bigmem/hash.t Check hashing too large strings throws an exception
t/bigmem/index.t Check that index() handles large offsets
t/bigmem/pos.t Check that pos() handles large offsets
t/bigmem/read.t Check read() handles large offsets
diff --git a/embed.fnc b/embed.fnc
index 69e505107f..d3aec2666a 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1444,7 +1444,7 @@ AMpd |OP* |op_scope |NULLOK OP* o
: Only used by perl.c/miniperl.c, but defined in caretx.c
px |void |set_caret_X
Apd |void |setdefout |NN GV* gv
-Ap |HEK* |share_hek |NN const char* str|I32 len|U32 hash
+Ap |HEK* |share_hek |NN const char* str|SSize_t len|U32 hash
#if defined(HAS_SIGACTION) && defined(SA_SIGINFO)
: Used in perl.c
np |Signal_t |sighandler |int sig|NULLOK siginfo_t *info|NULLOK void *uap
@@ -2072,7 +2072,7 @@ sR |HE* |new_he
sanR |HEK* |save_hek_flags |NN const char *str|I32 len|U32 hash|int flags
sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store
s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash
-sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags
+sR |HEK* |share_hek_flags|NN const char *str|STRLEN len|U32 hash|int flags
rs |void |hv_notallowed |int flags|NN const char *key|I32 klen|NN const char *msg
in |U32|ptr_hash|PTRV u
s |struct xpvhv_aux*|hv_auxinit|NN HV *hv
diff --git a/hv.c b/hv.c
index 3bd62c6f9d..8acf33a4b2 100644
--- a/hv.c
+++ b/hv.c
@@ -2966,7 +2966,7 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash)
* len and hash must both be valid for str.
*/
HEK *
-Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash)
+Perl_share_hek(pTHX_ const char *str, SSize_t len, U32 hash)
{
bool is_utf8 = FALSE;
int flags = 0;
@@ -2998,7 +2998,7 @@ Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash)
}
STATIC HEK *
-S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags)
+S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags)
{
HE *entry;
const int flags_masked = flags & HVhek_MASK;
@@ -3007,6 +3007,10 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags)
PERL_ARGS_ASSERT_SHARE_HEK_FLAGS;
+ if (UNLIKELY(len > (STRLEN) I32_MAX)) {
+ Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes");
+ }
+
/* what follows is the moral equivalent of:
if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE)))
@@ -3021,7 +3025,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags)
for (;entry; entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
- if (HeKLEN(entry) != len)
+ if (HeKLEN(entry) != (SSize_t) len)
continue;
if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */
continue;
diff --git a/hv_func.h b/hv_func.h
index e091c86b0f..8a92c362e9 100644
--- a/hv_func.h
+++ b/hv_func.h
@@ -93,10 +93,12 @@
sbox32_seed_state96(seed + __PERL_HASH_SEED_BYTES , state + __PERL_HASH_STATE_BYTES); \
} STMT_END
-#define _PERL_HASH_WITH_STATE(state,str,len) \
- ((len <= SBOX32_MAX_LEN) \
- ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len)) \
- : __PERL_HASH_WITH_STATE((state),(str),(len)))
+#define _PERL_HASH_WITH_STATE(state,str,len) \
+ (LIKELY(len <= SBOX32_MAX_LEN) \
+ ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len)) \
+ : UNLIKELY(len > (STRLEN) I32_MAX) \
+ ? Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes") \
+ : __PERL_HASH_WITH_STATE((state),(str),(len)))
#endif
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 876833338f..8f24318f6f 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -5645,6 +5645,13 @@ overhauled.
(F) An ancient error message that almost nobody ever runs into anymore.
But before sort was a keyword, people sometimes used it as a filehandle.
+=item Sorry, hash keys must be smaller than 2**31 bytes
+
+(F) You tried to create a hash containing a very large key, where "very
+large" means that it needs at least 2 gigabytes to store. Unfortunately,
+Perl doesn't yet handle such large hash keys. You should
+reconsider your design to avoid hashing such a long string directly.
+
=item Source filters apply only to byte streams
(F) You tried to activate a source filter (usually by loading a
diff --git a/proto.h b/proto.h
index b581ffdbaf..e129449a4a 100644
--- a/proto.h
+++ b/proto.h
@@ -2914,7 +2914,7 @@ PERL_CALLCONV void Perl_set_numeric_standard(pTHX);
PERL_CALLCONV void Perl_setdefout(pTHX_ GV* gv);
#define PERL_ARGS_ASSERT_SETDEFOUT \
assert(gv)
-PERL_CALLCONV HEK* Perl_share_hek(pTHX_ const char* str, I32 len, U32 hash);
+PERL_CALLCONV HEK* Perl_share_hek(pTHX_ const char* str, SSize_t len, U32 hash);
#define PERL_ARGS_ASSERT_SHARE_HEK \
assert(str)
PERL_CALLCONV void Perl_sortsv(pTHX_ SV** array, size_t num_elts, SVCOMPARE_t cmp);
@@ -4570,7 +4570,7 @@ STATIC HEK* S_save_hek_flags(const char *str, I32 len, U32 hash, int flags)
#define PERL_ARGS_ASSERT_SAVE_HEK_FLAGS \
assert(str)
-STATIC HEK* S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags)
+STATIC HEK* S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_SHARE_HEK_FLAGS \
assert(str)
diff --git a/t/bigmem/hash.t b/t/bigmem/hash.t
new file mode 100644
index 0000000000..e3d2980e02
--- /dev/null
+++ b/t/bigmem/hash.t
@@ -0,0 +1,33 @@
+#!perl
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = "../lib";
+ require './test.pl';
+}
+
+use Config qw(%Config);
+
+$ENV{PERL_TEST_MEMORY} >= 4
+ or skip_all("Need ~4Gb for this test");
+$Config{ptrsize} >= 8
+ or skip_all("Need 64-bit pointers for this test");
+
+plan(2);
+
+sub exn {
+ my ($code_string) = @_;
+ local $@;
+ return undef if eval "do { $code_string }; 1";
+ return $@;
+}
+
+like(exn('my $h = { "x" x 2**31, undef }'),
+ qr/^\QSorry, hash keys must be smaller than 2**31 bytes\E\b/,
+ "hash constructed with huge key");
+
+TODO: {
+ local $TODO = "Doesn't yet work with OP_MULTIDEREF";
+ like(exn('my %h; %h{ "x" x 2**31 } = undef'),
+ qr/^\QSorry, hash keys must be smaller than 2**31 bytes\E\b/,
+ "assign to huge hash key");
+}