summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Crane <arc@cpan.org>2017-06-01 14:42:22 +0200
committerYves Orton <demerphq@gmail.com>2017-06-01 17:17:34 +0200
commitb02f36453d1392e2b0bd62fdde2b286fb60bd5bc (patch)
tree511763b834e942d731ea762309284dccbd5450d0
parent1cf41740f284a4e05bbefc5b15c5ffd9c254aa78 (diff)
downloadperl-b02f36453d1392e2b0bd62fdde2b286fb60bd5bc.tar.gz
RT #127742: Hash keys are limited to 2 GB - throw an exception if hash keys are too long
We currently require hash keys to be less than 2**31 bytes long. But (a) nothing actually tries to enforce that, and (b) if a Perl program tries to create a hash with such a key (using a 64-bit system), we miscalculate the size of a memory block, yielding a panic: $ ./perl -e '+{ "x" x 2**31, undef }' panic: malloc, size=18446744071562068026 at -e line 1. Instead, check for this situation, and croak with an appropriate (new) diagnostic in the unlikely event that it occurs. This also involves changing the type of an argument to a public API function: Perl_share_hek() previously took the key's length as an I32, but that makes it impossible to detect over-long keys, so it must be SSize_t instead. From Yves: We also inject the length test into the PERL_HASH() macro, so that where the macro is used *before* calling into any of the hv functions we can avoid hashing a very long string only to throw an exception that it is too long. Might as well fail fast.
-rw-r--r--MANIFEST1
-rw-r--r--embed.fnc4
-rw-r--r--hv.c10
-rw-r--r--hv_func.h10
-rw-r--r--pod/perldiag.pod7
-rw-r--r--proto.h4
-rw-r--r--t/bigmem/hash.t33
7 files changed, 58 insertions, 11 deletions
diff --git a/MANIFEST b/MANIFEST
index 0d61af0062..a54b1d88d8 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5267,6 +5267,7 @@ t/base/term.t See if various terms work
t/base/translate.t See if character set translation works
t/base/while.t See if while work
t/benchmark/rt26188-speed-up-keys-on-empty-hash.t Benchmark if keys on empty hashes is fast enough
+t/bigmem/hash.t Check hashing too large strings throws an exception
t/bigmem/index.t Check that index() handles large offsets
t/bigmem/pos.t Check that pos() handles large offsets
t/bigmem/read.t Check read() handles large offsets
diff --git a/embed.fnc b/embed.fnc
index 69e505107f..d3aec2666a 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1444,7 +1444,7 @@ AMpd |OP* |op_scope |NULLOK OP* o
: Only used by perl.c/miniperl.c, but defined in caretx.c
px |void |set_caret_X
Apd |void |setdefout |NN GV* gv
-Ap |HEK* |share_hek |NN const char* str|I32 len|U32 hash
+Ap |HEK* |share_hek |NN const char* str|SSize_t len|U32 hash
#if defined(HAS_SIGACTION) && defined(SA_SIGINFO)
: Used in perl.c
np |Signal_t |sighandler |int sig|NULLOK siginfo_t *info|NULLOK void *uap
@@ -2072,7 +2072,7 @@ sR |HE* |new_he
sanR |HEK* |save_hek_flags |NN const char *str|I32 len|U32 hash|int flags
sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store
s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash
-sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags
+sR |HEK* |share_hek_flags|NN const char *str|STRLEN len|U32 hash|int flags
rs |void |hv_notallowed |int flags|NN const char *key|I32 klen|NN const char *msg
in |U32|ptr_hash|PTRV u
s |struct xpvhv_aux*|hv_auxinit|NN HV *hv
diff --git a/hv.c b/hv.c
index 3bd62c6f9d..8acf33a4b2 100644
--- a/hv.c
+++ b/hv.c
@@ -2966,7 +2966,7 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash)
* len and hash must both be valid for str.
*/
HEK *
-Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash)
+Perl_share_hek(pTHX_ const char *str, SSize_t len, U32 hash)
{
bool is_utf8 = FALSE;
int flags = 0;
@@ -2998,7 +2998,7 @@ Perl_share_hek(pTHX_ const char *str, I32 len, U32 hash)
}
STATIC HEK *
-S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags)
+S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags)
{
HE *entry;
const int flags_masked = flags & HVhek_MASK;
@@ -3007,6 +3007,10 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags)
PERL_ARGS_ASSERT_SHARE_HEK_FLAGS;
+ if (UNLIKELY(len > (STRLEN) I32_MAX)) {
+ Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes");
+ }
+
/* what follows is the moral equivalent of:
if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE)))
@@ -3021,7 +3025,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags)
for (;entry; entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
- if (HeKLEN(entry) != len)
+ if (HeKLEN(entry) != (SSize_t) len)
continue;
if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */
continue;
diff --git a/hv_func.h b/hv_func.h
index e091c86b0f..8a92c362e9 100644
--- a/hv_func.h
+++ b/hv_func.h
@@ -93,10 +93,12 @@
sbox32_seed_state96(seed + __PERL_HASH_SEED_BYTES , state + __PERL_HASH_STATE_BYTES); \
} STMT_END
-#define _PERL_HASH_WITH_STATE(state,str,len) \
- ((len <= SBOX32_MAX_LEN) \
- ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len)) \
- : __PERL_HASH_WITH_STATE((state),(str),(len)))
+#define _PERL_HASH_WITH_STATE(state,str,len) \
+ (LIKELY(len <= SBOX32_MAX_LEN) \
+ ? sbox32_hash_with_state((state + __PERL_HASH_STATE_BYTES),(U8*)(str),(len)) \
+ : UNLIKELY(len > (STRLEN) I32_MAX) \
+ ? Perl_croak_nocontext("Sorry, hash keys must be smaller than 2**31 bytes") \
+ : __PERL_HASH_WITH_STATE((state),(str),(len)))
#endif
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 876833338f..8f24318f6f 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -5645,6 +5645,13 @@ overhauled.
(F) An ancient error message that almost nobody ever runs into anymore.
But before sort was a keyword, people sometimes used it as a filehandle.
+=item Sorry, hash keys must be smaller than 2**31 bytes
+
+(F) You tried to create a hash containing a very large key, where "very
+large" means that it needs at least 2 gigabytes to store. Unfortunately,
+Perl doesn't yet handle such large hash keys. You should
+reconsider your design to avoid hashing such a long string directly.
+
=item Source filters apply only to byte streams
(F) You tried to activate a source filter (usually by loading a
diff --git a/proto.h b/proto.h
index b581ffdbaf..e129449a4a 100644
--- a/proto.h
+++ b/proto.h
@@ -2914,7 +2914,7 @@ PERL_CALLCONV void Perl_set_numeric_standard(pTHX);
PERL_CALLCONV void Perl_setdefout(pTHX_ GV* gv);
#define PERL_ARGS_ASSERT_SETDEFOUT \
assert(gv)
-PERL_CALLCONV HEK* Perl_share_hek(pTHX_ const char* str, I32 len, U32 hash);
+PERL_CALLCONV HEK* Perl_share_hek(pTHX_ const char* str, SSize_t len, U32 hash);
#define PERL_ARGS_ASSERT_SHARE_HEK \
assert(str)
PERL_CALLCONV void Perl_sortsv(pTHX_ SV** array, size_t num_elts, SVCOMPARE_t cmp);
@@ -4570,7 +4570,7 @@ STATIC HEK* S_save_hek_flags(const char *str, I32 len, U32 hash, int flags)
#define PERL_ARGS_ASSERT_SAVE_HEK_FLAGS \
assert(str)
-STATIC HEK* S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags)
+STATIC HEK* S_share_hek_flags(pTHX_ const char *str, STRLEN len, U32 hash, int flags)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_SHARE_HEK_FLAGS \
assert(str)
diff --git a/t/bigmem/hash.t b/t/bigmem/hash.t
new file mode 100644
index 0000000000..e3d2980e02
--- /dev/null
+++ b/t/bigmem/hash.t
@@ -0,0 +1,33 @@
+#!perl
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = "../lib";
+ require './test.pl';
+}
+
+use Config qw(%Config);
+
+$ENV{PERL_TEST_MEMORY} >= 4
+ or skip_all("Need ~4Gb for this test");
+$Config{ptrsize} >= 8
+ or skip_all("Need 64-bit pointers for this test");
+
+plan(2);
+
+sub exn {
+ my ($code_string) = @_;
+ local $@;
+ return undef if eval "do { $code_string }; 1";
+ return $@;
+}
+
+like(exn('my $h = { "x" x 2**31, undef }'),
+ qr/^\QSorry, hash keys must be smaller than 2**31 bytes\E\b/,
+ "hash constructed with huge key");
+
+TODO: {
+ local $TODO = "Doesn't yet work with OP_MULTIDEREF";
+ like(exn('my %h; %h{ "x" x 2**31 } = undef'),
+ qr/^\QSorry, hash keys must be smaller than 2**31 bytes\E\b/,
+ "assign to huge hash key");
+}