summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Cook <tony@develop-help.com>2020-09-14 16:00:28 +1000
committerSteve Hay <steve.m.hay@googlemail.com>2021-01-06 17:40:16 +0000
commit536520b4a24d012a8aa0690f044f651300258171 (patch)
treecd3879de39c809692ce490c1c169a2722786f253
parent7925f8dfc09ddebcbfa214fdcd854c52090586f9 (diff)
downloadperl-536520b4a24d012a8aa0690f044f651300258171.tar.gz
don't croak when the \K follows the lookaround assertion
this also simplifies the flagging for these assertions, since this error is now the only thing using in_lookhead and in_lookbehind they can be combined into a single in_lookaround. Rather than conditional increment/decrement as we recurse into S_reg I simply save the value of in_lookaround and restore it before returning. Some unsuccessful or restart paths don't do the restore, but they either result in a croak(), or a restart which reinitialises in_lookaround anyway. Also added tests to ensure that all the different zero-width assertions with content trigger the error. (cherry picked from commit 80f44cf4982e395989f886220e05dd2071bb205a)
-rw-r--r--regcomp.c35
-rw-r--r--t/lib/croak/regcomp43
2 files changed, 53 insertions, 25 deletions
diff --git a/regcomp.c b/regcomp.c
index 2109b6a403..0da659cf80 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -242,8 +242,7 @@ struct RExC_state_t {
U8 *study_chunk_recursed; /* bitmap of which subs we have moved
through */
U32 study_chunk_recursed_bytes; /* bytes in bitmap */
- I32 in_lookbehind;
- I32 in_lookahead;
+ I32 in_lookaround;
I32 contains_locale;
I32 override_recoding;
I32 recode_x_to_native;
@@ -330,8 +329,7 @@ struct RExC_state_t {
#define RExC_study_chunk_recursed (pRExC_state->study_chunk_recursed)
#define RExC_study_chunk_recursed_bytes \
(pRExC_state->study_chunk_recursed_bytes)
-#define RExC_in_lookbehind (pRExC_state->in_lookbehind)
-#define RExC_in_lookahead (pRExC_state->in_lookahead)
+#define RExC_in_lookaround (pRExC_state->in_lookaround)
#define RExC_contains_locale (pRExC_state->contains_locale)
#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
@@ -7791,8 +7789,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_seen = 0;
RExC_maxlen = 0;
- RExC_in_lookbehind = 0;
- RExC_in_lookahead = 0;
+ RExC_in_lookaround = 0;
RExC_seen_zerolen = *exp == '^' ? -1 : 0;
RExC_recode_x_to_native = 0;
RExC_in_multi_char_class = 0;
@@ -11180,6 +11177,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
I32 after_freeze = 0;
I32 num; /* numeric backreferences */
SV * max_open; /* Max number of unclosed parens */
+ I32 was_in_lookaround = RExC_in_lookaround;
char * parse_start = RExC_parse; /* MJD */
char * const oregcomp_parse = RExC_parse;
@@ -11201,13 +11199,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
*flagp = 0; /* Tentatively. */
- if (RExC_in_lookbehind) {
- RExC_in_lookbehind++;
- }
- if (RExC_in_lookahead) {
- RExC_in_lookahead++;
- }
-
/* Having this true makes it feasible to have a lot fewer tests for the
* parse pointer being in scope. For example, we can write
* while(isFOO(*RExC_parse)) RExC_parse++;
@@ -11461,11 +11452,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
lookbehind_alpha_assertions:
RExC_seen |= REG_LOOKBEHIND_SEEN;
- RExC_in_lookbehind++;
/*FALLTHROUGH*/
alpha_assertions:
+ RExC_in_lookaround++;
RExC_seen_zerolen++;
if (! start_arg) {
@@ -11668,7 +11659,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
RExC_seen |= REG_LOOKBEHIND_SEEN;
- RExC_in_lookbehind++;
+ RExC_in_lookaround++;
RExC_parse++;
if (RExC_parse >= RExC_end) {
vFAIL("Sequence (?... not terminated");
@@ -11677,7 +11668,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
break;
case '=': /* (?=...) */
RExC_seen_zerolen++;
- RExC_in_lookahead++;
+ RExC_in_lookaround++;
break;
case '!': /* (?!...) */
RExC_seen_zerolen++;
@@ -11689,6 +11680,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
nextchar(pRExC_state);
return ret;
}
+ RExC_in_lookaround++;
break;
case '|': /* (?|...) */
/* branch reset, behave like a (?:...) except that
@@ -12509,14 +12501,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
NOT_REACHED; /* NOTREACHED */
}
- if (RExC_in_lookbehind) {
- RExC_in_lookbehind--;
- }
- if (RExC_in_lookahead) {
- RExC_in_lookahead--;
- }
if (after_freeze > RExC_npar)
RExC_npar = after_freeze;
+
+ RExC_in_lookaround = was_in_lookaround;
+
return(ret);
}
@@ -13627,7 +13616,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
*flagp |= SIMPLE;
goto finish_meta_pat;
case 'K':
- if (!RExC_in_lookbehind && !RExC_in_lookahead) {
+ if (!RExC_in_lookaround) {
RExC_seen_zerolen++;
ret = reg_node(pRExC_state, KEEPS);
*flagp |= SIMPLE;
diff --git a/t/lib/croak/regcomp b/t/lib/croak/regcomp
index 476b239fa7..74e70ed363 100644
--- a/t/lib/croak/regcomp
+++ b/t/lib/croak/regcomp
@@ -77,15 +77,54 @@ EXPECT
Too many nested open parens in regex; marked by <-- HERE in m/(( <-- HERE a))/ at - line 3.
########
# NAME \K not permitted in lookahead
-$x =~ /(?=a\Ka)a/;
+qr/(?=a\Ka)a/;
EXPECT
\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?=a\K <-- HERE a)a/ at - line 1.
########
+# NAME \K not permitted in lookahead (alpha)
+no warnings 'experimental::alpha_assertions';
+qr/(*positive_lookahead:a\Ka)a/;
+EXPECT
+\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(*positive_lookahead:a\K <-- HERE a)a/ at - line 2.
+########
+# NAME \K not permitted in negative lookahead
+qr/(?!a\Ka)a/;
+EXPECT
+\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?!a\K <-- HERE a)a/ at - line 1.
+########
+# NAME \K not permitted in negative lookahead (alpha)
+no warnings 'experimental::alpha_assertions';
+qr/(*negative_lookahead:a\Ka)a/;
+EXPECT
+\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(*negative_lookahead:a\K <-- HERE a)a/ at - line 2.
+########
# NAME \K not permitted in lookbehind
-$x =~ /(?<=a\Ka)a/;
+qr/(?<=a\Ka)a/;
EXPECT
\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?<=a\K <-- HERE a)a/ at - line 1.
########
+# NAME \K not permitted in lookbehind (alpha)
+no warnings 'experimental::alpha_assertions';
+qr/(*positive_lookbehind:a\Ka)a/;
+EXPECT
+\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(*positive_lookbehind:a\K <-- HERE a)a/ at - line 2.
+########
+# NAME \K not permitted in negative lookbehind
+qr/(?<!a\Ka)a/;
+EXPECT
+\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(?<!a\K <-- HERE a)a/ at - line 1.
+########
+# NAME \K not permitted in negative lookbehind (alpha)
+no warnings 'experimental::alpha_assertions';
+qr/(*negative_lookbehind:a\Ka)a/;
+EXPECT
+\K not permitted in lookahead/lookbehind in regex; marked by <-- HERE in m/(*negative_lookbehind:a\K <-- HERE a)a/ at - line 2.
+########
+# NAME \K is permitted after the lookahead GH#18123
+qr/(?=(?=x)x)\K/;
+EXPECT
+OPTIONS nonfatal
+########
# NAME numeric parsing buffer overflow in numeric.c
0=~/\p{nV:-0}/
EXPECT