diff options
author | Tony Cook <tony@develop-help.com> | 2017-09-28 14:40:24 +1000 |
---|---|---|
committer | Tony Cook <tony@develop-help.com> | 2019-08-19 15:30:46 +1000 |
commit | 105c827d9a0f19a772c7b179e2997d842a095460 (patch) | |
tree | d0ecf252e2adf5ebe358fd1bde81baa778ff7121 | |
parent | 7654aaa1134e4908a6a6dc0b85d360816e79faa3 (diff) | |
download | perl-105c827d9a0f19a772c7b179e2997d842a095460.tar.gz |
(perl #124256) disallow \K in lookahead and lookbehind
\K can cause infinite loops in matching in these, and we're not sure
how it really should behave, so forbid it.
-rw-r--r-- | pod/perldiag.pod | 5 | ||||
-rw-r--r-- | regcomp.c | 42 | ||||
-rw-r--r-- | t/lib/croak/regcomp | 9 |
3 files changed, 45 insertions, 11 deletions
diff --git a/pod/perldiag.pod b/pod/perldiag.pod index f9c023d148..2fa4b62257 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -3277,6 +3277,11 @@ line. See L<perlrun> for more details. (P) The regular expression parser is confused. +=item \K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/%s/ + +(F) Your regular expression used C<\K> in a lookhead or lookbehind +assertion, which isn't permitted. + =item Label not found for "last %s" (F) You named a loop to break out of, but you're not currently in a loop @@ -182,6 +182,7 @@ struct RExC_state_t { through */ U32 study_chunk_recursed_bytes; /* bytes in bitmap */ I32 in_lookbehind; + I32 in_lookahead; I32 contains_locale; I32 override_recoding; #ifdef EBCDIC @@ -273,6 +274,7 @@ struct RExC_state_t { #define RExC_study_chunk_recursed_bytes \ (pRExC_state->study_chunk_recursed_bytes) #define RExC_in_lookbehind (pRExC_state->in_lookbehind) +#define RExC_in_lookahead (pRExC_state->in_lookahead) #define RExC_contains_locale (pRExC_state->contains_locale) #ifdef EBCDIC # define RExC_recode_x_to_native (pRExC_state->recode_x_to_native) @@ -7622,6 +7624,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_seen = 0; RExC_maxlen = 0; RExC_in_lookbehind = 0; + RExC_in_lookahead = 0; RExC_seen_zerolen = *exp == '^' ? -1 : 0; #ifdef EBCDIC RExC_recode_x_to_native = 0; @@ -11078,6 +11081,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) *flagp = 0; /* Tentatively. */ + if (RExC_in_lookbehind) { + RExC_in_lookbehind++; + } + if (RExC_in_lookahead) { + RExC_in_lookahead++; + } + /* Having this true makes it feasible to have a lot fewer tests for the * parse pointer being in scope. For example, we can write * while(isFOO(*RExC_parse)) RExC_parse++; @@ -11542,10 +11552,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) if (RExC_parse >= RExC_end) { vFAIL("Sequence (?... not terminated"); } - - /* FALLTHROUGH */ + RExC_seen_zerolen++; + break; case '=': /* (?=...) */ RExC_seen_zerolen++; + RExC_in_lookahead++; break; case '!': /* (?!...) */ RExC_seen_zerolen++; @@ -12344,6 +12355,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) if (RExC_in_lookbehind) { RExC_in_lookbehind--; } + if (RExC_in_lookahead) { + RExC_in_lookahead--; + } if (after_freeze > RExC_npar) RExC_npar = after_freeze; return(ret); @@ -13429,15 +13443,21 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) *flagp |= SIMPLE; goto finish_meta_pat; case 'K': - RExC_seen_zerolen++; - ret = reg_node(pRExC_state, KEEPS); - *flagp |= SIMPLE; - /* XXX:dmq : disabling in-place substitution seems to - * be necessary here to avoid cases of memory corruption, as - * with: C<$_="x" x 80; s/x\K/y/> -- rgs - */ - RExC_seen |= REG_LOOKBEHIND_SEEN; - goto finish_meta_pat; + if (!RExC_in_lookbehind && !RExC_in_lookahead) { + RExC_seen_zerolen++; + ret = reg_node(pRExC_state, KEEPS); + *flagp |= SIMPLE; + /* XXX:dmq : disabling in-place substitution seems to + * be necessary here to avoid cases of memory corruption, as + * with: C<$_="x" x 80; s/x\K/y/> -- rgs + */ + RExC_seen |= REG_LOOKBEHIND_SEEN; + goto finish_meta_pat; + } + else { + ++RExC_parse; /* advance past the 'K' */ + vFAIL("\\K not permitted in lookahead/lookbehind"); + } case 'Z': ret = reg_node(pRExC_state, SEOL); *flagp |= SIMPLE; diff --git a/t/lib/croak/regcomp b/t/lib/croak/regcomp index 0ba705e915..fc410829b6 100644 --- a/t/lib/croak/regcomp +++ b/t/lib/croak/regcomp @@ -70,3 +70,12 @@ qr/((a))/; EXPECT Too many nested open parens in regex; marked by <-- HERE in m/(( <-- HERE a))/ at - line 3. ######## +# NAME \K not permitted in lookahead +$x =~ /(?=a\Ka)a/; +EXPECT +\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?=a\K <-- HERE a)a/ at - line 1. +######## +# NAME \K not permitted in lookbehind +$x =~ /(?<=a\Ka)a/; +EXPECT +\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?<=a\K <-- HERE a)a/ at - line 1. |