misaligned buffer with heredoc and /(?{...})/

RT #129199 When an re_eval like /(?{...})/ is tokenised, as well as tokenising the individual elements of the code, the whole src string is returned as a constant too, to enable the stringification of the regex to be calculated. For example, /abc(?{$x})def/ is tokenised like MATCH '(' CONST('abc') DO '{' '$' CONST('x') '}' ',' CONST('(?{$x})') ',' CONST('def'), ')' If the code within the (?{...}) contains a heredoc (<<) and the PL_linestr buffer happens to get reallocated, the pointer which points to the start of the code string will get adjusted using the wrong buffer pointer. Later when the end of the code is reached and the whole code string '(?{$x})' is copied to a new SV, garbage may get copied (or it may panic with -ve length, out of memory etc). Note that this garbage will only used for the string representation of the regex, e.g. my $r = qr/abc(?{$x})def/; print "$r"; # garbage used here /xyz$r/; # garbage not used here
author: David Mitchell <davem@iabyn.com> 2016-12-10 15:06:30 +0000
committer: David Mitchell <davem@iabyn.com> 2016-12-10 15:50:12 +0000
commit: 98d5e3efa825adce1bfa065a5deed791c30162ac (patch)
tree: e591c8784b775e2f3a8a1cafb0af37074d3c8e2a
parent: 88e94c8da78227f5d839a2b1aca58b5e4fd24364 (diff)
download: perl-98d5e3efa825adce1bfa065a5deed791c30162ac.tar.gz
2 files changed, 16 insertions, 2 deletions
diff --git a/t/re/reg_eval.t b/t/re/reg_eval.t
index 09bc3d4c0a..b492178ec6 100644
--- a/t/re/reg_eval.t
+++ b/t/re/reg_eval.t
@@ -83,4 +83,10 @@ fresh_perl_is($preamble . <<'CODE', 'no match ::', {}, 'regex distillation 4');
 match("Jim Jones, 35 years old, secret wombat 007.");
 CODE
 
+# RT #129199: this is mainly for ASAN etc's benefit
+fresh_perl_is(<<'CODE', '', {}, "RT #129199:");
+/(?{<<""})/
+0
+CODE
+
 done_testing;
diff --git a/toke.c b/toke.c
index 841b5f90ee..fd819a9de1 100644
--- a/toke.c
+++ b/toke.c
@@ -923,10 +923,18 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
     char *buf;
     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
+    bool current;
+
     linestr = PL_parser->linestr;
     buf = SvPVX(linestr);
     if (len <= SvLEN(linestr))
 	return buf;
+
+    /* Is the lex_shared linestr SV the same as the current linestr SV?
+     * Only in this case does re_eval_start need adjusting, since it
+     * points within lex_shared->ls_linestr's buffer */
+    current = (linestr == PL_parser->lex_shared->ls_linestr);
+
     bufend_pos = PL_parser->bufend - buf;
     bufptr_pos = PL_parser->bufptr - buf;
     oldbufptr_pos = PL_parser->oldbufptr - buf;
@@ -934,7 +942,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
     linestart_pos = PL_parser->linestart - buf;
     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
-    re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
+    re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
                             PL_parser->lex_shared->re_eval_start - buf : 0;
 
     buf = sv_grow(linestr, len);
@@ -948,7 +956,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len)
 	PL_parser->last_uni = buf + last_uni_pos;
     if (PL_parser->last_lop)
 	PL_parser->last_lop = buf + last_lop_pos;
-    if (PL_parser->lex_shared->re_eval_start)
+    if (current && PL_parser->lex_shared->re_eval_start)
         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
     return buf;
 }
author	David Mitchell <davem@iabyn.com>	2016-12-10 15:06:30 +0000
committer	David Mitchell <davem@iabyn.com>	2016-12-10 15:50:12 +0000
commit	98d5e3efa825adce1bfa065a5deed791c30162ac (patch)
tree	e591c8784b775e2f3a8a1cafb0af37074d3c8e2a
parent	88e94c8da78227f5d839a2b1aca58b5e4fd24364 (diff)
download	perl-98d5e3efa825adce1bfa065a5deed791c30162ac.tar.gz