summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Cook <tony@develop-help.com>2017-09-28 14:40:24 +1000
committerTony Cook <tony@develop-help.com>2019-08-19 15:30:46 +1000
commit105c827d9a0f19a772c7b179e2997d842a095460 (patch)
treed0ecf252e2adf5ebe358fd1bde81baa778ff7121
parent7654aaa1134e4908a6a6dc0b85d360816e79faa3 (diff)
downloadperl-105c827d9a0f19a772c7b179e2997d842a095460.tar.gz
(perl #124256) disallow \K in lookahead and lookbehind
\K can cause infinite loops in matching in these, and we're not sure how it really should behave, so forbid it.
-rw-r--r--pod/perldiag.pod5
-rw-r--r--regcomp.c42
-rw-r--r--t/lib/croak/regcomp9
3 files changed, 45 insertions, 11 deletions
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index f9c023d148..2fa4b62257 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -3277,6 +3277,11 @@ line. See L<perlrun> for more details.
(P) The regular expression parser is confused.
+=item \K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/%s/
+
+(F) Your regular expression used C<\K> in a lookhead or lookbehind
+assertion, which isn't permitted.
+
=item Label not found for "last %s"
(F) You named a loop to break out of, but you're not currently in a loop
diff --git a/regcomp.c b/regcomp.c
index cf9246473f..aba6648da5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -182,6 +182,7 @@ struct RExC_state_t {
through */
U32 study_chunk_recursed_bytes; /* bytes in bitmap */
I32 in_lookbehind;
+ I32 in_lookahead;
I32 contains_locale;
I32 override_recoding;
#ifdef EBCDIC
@@ -273,6 +274,7 @@ struct RExC_state_t {
#define RExC_study_chunk_recursed_bytes \
(pRExC_state->study_chunk_recursed_bytes)
#define RExC_in_lookbehind (pRExC_state->in_lookbehind)
+#define RExC_in_lookahead (pRExC_state->in_lookahead)
#define RExC_contains_locale (pRExC_state->contains_locale)
#ifdef EBCDIC
# define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
@@ -7622,6 +7624,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_seen = 0;
RExC_maxlen = 0;
RExC_in_lookbehind = 0;
+ RExC_in_lookahead = 0;
RExC_seen_zerolen = *exp == '^' ? -1 : 0;
#ifdef EBCDIC
RExC_recode_x_to_native = 0;
@@ -11078,6 +11081,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
*flagp = 0; /* Tentatively. */
+ if (RExC_in_lookbehind) {
+ RExC_in_lookbehind++;
+ }
+ if (RExC_in_lookahead) {
+ RExC_in_lookahead++;
+ }
+
/* Having this true makes it feasible to have a lot fewer tests for the
* parse pointer being in scope. For example, we can write
* while(isFOO(*RExC_parse)) RExC_parse++;
@@ -11542,10 +11552,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
if (RExC_parse >= RExC_end) {
vFAIL("Sequence (?... not terminated");
}
-
- /* FALLTHROUGH */
+ RExC_seen_zerolen++;
+ break;
case '=': /* (?=...) */
RExC_seen_zerolen++;
+ RExC_in_lookahead++;
break;
case '!': /* (?!...) */
RExC_seen_zerolen++;
@@ -12344,6 +12355,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
if (RExC_in_lookbehind) {
RExC_in_lookbehind--;
}
+ if (RExC_in_lookahead) {
+ RExC_in_lookahead--;
+ }
if (after_freeze > RExC_npar)
RExC_npar = after_freeze;
return(ret);
@@ -13429,15 +13443,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
*flagp |= SIMPLE;
goto finish_meta_pat;
case 'K':
- RExC_seen_zerolen++;
- ret = reg_node(pRExC_state, KEEPS);
- *flagp |= SIMPLE;
- /* XXX:dmq : disabling in-place substitution seems to
- * be necessary here to avoid cases of memory corruption, as
- * with: C<$_="x" x 80; s/x\K/y/> -- rgs
- */
- RExC_seen |= REG_LOOKBEHIND_SEEN;
- goto finish_meta_pat;
+ if (!RExC_in_lookbehind && !RExC_in_lookahead) {
+ RExC_seen_zerolen++;
+ ret = reg_node(pRExC_state, KEEPS);
+ *flagp |= SIMPLE;
+ /* XXX:dmq : disabling in-place substitution seems to
+ * be necessary here to avoid cases of memory corruption, as
+ * with: C<$_="x" x 80; s/x\K/y/> -- rgs
+ */
+ RExC_seen |= REG_LOOKBEHIND_SEEN;
+ goto finish_meta_pat;
+ }
+ else {
+ ++RExC_parse; /* advance past the 'K' */
+ vFAIL("\\K not permitted in lookahead/lookbehind");
+ }
case 'Z':
ret = reg_node(pRExC_state, SEOL);
*flagp |= SIMPLE;
diff --git a/t/lib/croak/regcomp b/t/lib/croak/regcomp
index 0ba705e915..fc410829b6 100644
--- a/t/lib/croak/regcomp
+++ b/t/lib/croak/regcomp
@@ -70,3 +70,12 @@ qr/((a))/;
EXPECT
Too many nested open parens in regex; marked by <-- HERE in m/(( <-- HERE a))/ at - line 3.
########
+# NAME \K not permitted in lookahead
+$x =~ /(?=a\Ka)a/;
+EXPECT
+\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?=a\K <-- HERE a)a/ at - line 1.
+########
+# NAME \K not permitted in lookbehind
+$x =~ /(?<=a\Ka)a/;
+EXPECT
+\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?<=a\K <-- HERE a)a/ at - line 1.