summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-01-08 15:49:04 +0100
committerYves Orton <demerphq@gmail.com>2023-01-19 18:44:49 +0800
commitc224bbd5d135fe48f49b4cc25f10a4977d695145 (patch)
tree5909b6fd666bb025496824a3f8c67715643164a8 /regcomp.c
parent09b3a407e87f128d7aecd14f9c8d75dcff9aaaf8 (diff)
downloadperl-c224bbd5d135fe48f49b4cc25f10a4977d695145.tar.gz
regcomp.c - add optimistic eval (*{ ... }) and (**{ ... })
This adds (*{ ... }) and (**{ ... }) as equivalents to (?{ ... }) and (??{ ... }). The only difference being that the star variants are "optimisitic" and are defined to never disable optimisations. This is especially relevant now that use of (?{ ... }) prevents important optimisations anywhere in the pattern, instead of the older and inconsistent rules where it only affected the parts that contained the EVAL. It is also very useful for injecting debugging style expressions to the pattern to understand what the regex engine is actually doing. The older style (?{ ... }) variants would change the regex engines behavior, meaning this was not as effective a tool as it could have been. Similarly it is now possible to test that a given regex optimisation works correctly using (*{ ... }), which was not possible with (?{ ... }).
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c52
1 files changed, 42 insertions, 10 deletions
diff --git a/regcomp.c b/regcomp.c
index 3a9f66cc97..bf83f8e2f0 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1902,7 +1902,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
!sawlookahead &&
(OP(first) == STAR &&
REGNODE_TYPE(OP(REGNODE_AFTER(first))) == REG_ANY) &&
- !(RExC_rx->intflags & PREGf_ANCH) && !pRExC_state->code_blocks)
+ !(RExC_rx->intflags & PREGf_ANCH) && !(RExC_seen & REG_PESSIMIZE_SEEN))
{
/* turn .* into ^.* with an implied $*=1 */
const int type =
@@ -1915,7 +1915,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
if (sawplus && !sawminmod && !sawlookahead
&& (!sawopen || !RExC_sawback)
- && !pRExC_state->code_blocks) /* May examine pos and $& */
+ && !(RExC_seen & REG_PESSIMIZE_SEEN)) /* May examine pos and $& */
/* x+ must match at the 1st pos of run of x's */
RExC_rx->intflags |= PREGf_SKIP;
@@ -2167,20 +2167,27 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
if (RExC_seen & REG_GPOS_SEEN)
RExC_rx->intflags |= PREGf_GPOS_SEEN;
+
+ if (RExC_seen & REG_PESSIMIZE_SEEN)
+ RExC_rx->intflags |= PREGf_PESSIMIZE_SEEN;
+
if (RExC_seen & REG_LOOKBEHIND_SEEN)
RExC_rx->extflags |= RXf_NO_INPLACE_SUBST; /* inplace might break the
lookbehind */
if (pRExC_state->code_blocks)
RExC_rx->extflags |= RXf_EVAL_SEEN;
- if (RExC_seen & REG_VERBARG_SEEN)
- {
+
+ if (RExC_seen & REG_VERBARG_SEEN) {
RExC_rx->intflags |= PREGf_VERBARG_SEEN;
RExC_rx->extflags |= RXf_NO_INPLACE_SUBST; /* don't understand this! Yves */
}
+
if (RExC_seen & REG_CUTGROUP_SEEN)
RExC_rx->intflags |= PREGf_CUTGROUP_SEEN;
+
if (pm_flags & PMf_USE_RE_EVAL)
RExC_rx->intflags |= PREGf_USE_RE_EVAL;
+
if (RExC_paren_names)
RXp_PAREN_NAMES(RExC_rx) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
else
@@ -2944,6 +2951,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
I32 num; /* numeric backreferences */
SV * max_open; /* Max number of unclosed parens */
I32 was_in_lookaround = RExC_in_lookaround;
+ I32 fake_eval = 0; /* matches paren */
/* The difference between the following variables can be seen with *
* the broken pattern /(?:foo/ where segment_parse_start will point *
@@ -3000,6 +3008,16 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
goto parse_rest;
}
else if ( *RExC_parse == '*') { /* (*VERB:ARG), (*construct:...) */
+ if (RExC_parse[1] == '{') {
+ fake_eval = '{';
+ goto handle_qmark;
+ }
+ else
+ if ( RExC_parse[1] == '*' && RExC_parse[2] == '{' ) {
+ fake_eval = '?';
+ goto handle_qmark;
+ }
+
char *start_verb = RExC_parse + 1;
STRLEN verb_len;
char *start_arg = NULL;
@@ -3310,7 +3328,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
return ret;
}
else if (*RExC_parse == '?') { /* (?...) */
- bool is_logical = 0;
+ handle_qmark:
+ ; /* make sure the label has a statement associated with it*/
+ bool is_logical = 0, is_optimistic = 0;
const char * const seqstart = RExC_parse;
const char * endptr;
const char non_existent_group_msg[]
@@ -3323,8 +3343,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
RExC_parse_inc_by(1); /* past the '?' */
- paren = *RExC_parse; /* might be a trailing NUL, if not
- well-formed */
+ if (!fake_eval) {
+ paren = *RExC_parse; /* might be a trailing NUL, if not
+ well-formed */
+ is_optimistic = 0;
+ } else {
+ is_optimistic = 1;
+ paren = fake_eval;
+ }
RExC_parse_inc();
if (RExC_parse > RExC_end) {
paren = '\0';
@@ -3705,10 +3731,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
pRExC_state->code_index++;
nextchar(pRExC_state);
+ if (!is_optimistic)
+ RExC_seen |= REG_PESSIMIZE_SEEN;
if (is_logical) {
regnode_offset eval;
ret = reg_node(pRExC_state, LOGICAL);
+ FLAGS(REGNODE_p(ret)) = 2;
eval = reg2Lanode(pRExC_state, EVAL,
n,
@@ -3717,13 +3746,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
* return value */
RExC_flags & RXf_PMf_COMPILETIME
);
- FLAGS(REGNODE_p(ret)) = 2;
+ FLAGS(REGNODE_p(eval)) = is_optimistic * EVAL_OPTIMISTIC_FLAG;
if (! REGTAIL(pRExC_state, ret, eval)) {
REQUIRE_BRANCHJ(flagp, 0);
}
return ret;
}
ret = reg2Lanode(pRExC_state, EVAL, n, 0);
+ FLAGS(REGNODE_p(ret)) = is_optimistic * EVAL_OPTIMISTIC_FLAG;
+
return ret;
}
case '(': /* (?(?{...})...) and (?(?=...)...) */
@@ -3737,7 +3768,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
|| RExC_parse[1] == '<'
|| RExC_parse[1] == '{'))
|| ( RExC_parse[0] == '*' /* (?(*...)) */
- && ( memBEGINs(RExC_parse + 1,
+ && ( RExC_parse[1] == '{'
+ || ( memBEGINs(RExC_parse + 1,
(Size_t) (RExC_end - (RExC_parse + 1)),
"pla:")
|| memBEGINs(RExC_parse + 1,
@@ -3760,7 +3792,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
"negative_lookahead:")
|| memBEGINs(RExC_parse + 1,
(Size_t) (RExC_end - (RExC_parse + 1)),
- "negative_lookbehind:"))))
+ "negative_lookbehind:")))))
) { /* Lookahead or eval. */
I32 flag;
regnode_offset tail;