summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-01-09 21:59:24 +0100
committerYves Orton <demerphq@gmail.com>2023-01-15 13:46:02 +0100
commit0b5fb5dd6851cc2ffc94d9d28add98cc3f441ead (patch)
treef65b8c995c9d561452d35637f7e91e13cbf7f253 /regexec.c
parentc5b1c090dbd52c47488c0f80eecb9cb7fa6f93e3 (diff)
downloadperl-0b5fb5dd6851cc2ffc94d9d28add98cc3f441ead.tar.gz
regexec.c - rework CLOSE_CAPTURE() to take rex as an arg to enable reuse.
This also splits up CLOSE_CAPTURE() into two parts, with the important parts implemented by CLOSE_ANY_CAPTURE(), and the debugging parts in CLOSE_CAPTURE(). This allows it to be used in contexts where the regexp structure isn't set up under the name 'rex', and where the debugging output it includes might not be relevant or possible to produce. This encapsulates all the places that "close" a capture buffer, and ensures that they are closed properly. One important case in particular cannot use CLOSE_CAPTURE() directly, as it does not have a 'rex' variable in scope (it is called prog in this function), nor the debugging context used in normal execution of CLOSE_CAPTURE(). Using CLOSE_ANY_CAPTURE() instead means all the main points that update capture buffer state use the same macro API.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c46
1 files changed, 25 insertions, 21 deletions
diff --git a/regexec.c b/regexec.c
index 6390ba7a17..3e5e5892b7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -313,21 +313,24 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
regcpblow(cp)
/* set the start and end positions of capture ix */
-#define CLOSE_CAPTURE(ix, s, e) \
- rex->offs[ix].start = s; \
- rex->offs[ix].end = e; \
- if (ix > rex->lastparen) \
- rex->lastparen = ix; \
- rex->lastcloseparen = ix; \
- DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_ \
+#define CLOSE_ANY_CAPTURE(rex, ix, s, e) \
+ (rex)->offs[(ix)].start = (s); \
+ (rex)->offs[(ix)].end = (e)
+
+#define CLOSE_CAPTURE(rex, ix, s, e) \
+ CLOSE_ANY_CAPTURE(rex, ix, s, e); \
+ if (ix > (rex)->lastparen) \
+ (rex)->lastparen = (ix); \
+ (rex)->lastcloseparen = (ix); \
+ DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_ \
"CLOSE: rex=0x%" UVxf " offs=0x%" UVxf ": \\%" UVuf ": set %" IVdf "..%" IVdf " max: %" UVuf "\n", \
- depth, \
- PTR2UV(rex), \
- PTR2UV(rex->offs), \
- (UV)ix, \
- (IV)rex->offs[ix].start, \
- (IV)rex->offs[ix].end, \
- (UV)rex->lastparen \
+ depth, \
+ PTR2UV(rex), \
+ PTR2UV(rex->offs), \
+ (UV)(ix), \
+ (IV)(rex)->offs[ix].start, \
+ (IV)(rex)->offs[ix].end, \
+ (UV)(rex)->lastparen \
))
#define UNWIND_PAREN(lp, lcp) \
@@ -3728,10 +3731,11 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
* Let @-, @+, $^N know */
prog->lastparen = prog->lastcloseparen = 0;
RXp_MATCH_UTF8_set(prog, utf8_target);
- prog->offs[0].start = s - strbeg;
- prog->offs[0].end = utf8_target
+ SSize_t match_start = s - strbeg;
+ SSize_t match_end = utf8_target
? (char*)utf8_hop_forward((U8*)s, prog->minlenret, (U8 *) strend) - strbeg
: s - strbeg + prog->minlenret;
+ CLOSE_ANY_CAPTURE(prog, 0, match_start, match_end);
if ( !(flags & REXEC_NOT_FIRST) )
S_reg_set_capture_string(aTHX_ rx,
strbeg, strend,
@@ -8492,7 +8496,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
case CLOSE: /* ) */
n = PARNO(scan); /* which paren pair */
- CLOSE_CAPTURE(n, rex->offs[n].start_tmp,
+ CLOSE_CAPTURE(rex, n, rex->offs[n].start_tmp,
locinput - reginfo->strbeg);
if ( EVAL_CLOSE_PAREN_IS( cur_eval, n ) )
goto fake_end;
@@ -8532,7 +8536,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
if ( n > lastopen ) /* might be OPEN/CLOSE in the way */
continue; /* so skip this one */
- CLOSE_CAPTURE(n, rex->offs[n].start_tmp,
+ CLOSE_CAPTURE(rex, n, rex->offs[n].start_tmp,
locinput - reginfo->strbeg);
if ( n == utmp || EVAL_CLOSE_PAREN_IS(cur_eval, n) )
@@ -9066,7 +9070,7 @@ NULL
if (ST.me->flags) {
/* emulate CLOSE: mark current A as captured */
U32 paren = (U32)ST.me->flags;
- CLOSE_CAPTURE(paren,
+ CLOSE_CAPTURE(rex, paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
locinput - reginfo->strbeg);
}
@@ -9140,7 +9144,7 @@ NULL
/* emulate CLOSE: mark current A as captured */
U32 paren = (U32)ST.me->flags;
if (ST.count || is_accepted) {
- CLOSE_CAPTURE(paren,
+ CLOSE_CAPTURE(rex, paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
locinput - reginfo->strbeg);
}
@@ -9184,7 +9188,7 @@ NULL
#define CURLY_SETPAREN(paren, success) \
if (paren) { \
if (success) { \
- CLOSE_CAPTURE(paren, HOPc(locinput, -1) - reginfo->strbeg, \
+ CLOSE_CAPTURE(rex, paren, HOPc(locinput, -1) - reginfo->strbeg, \
locinput - reginfo->strbeg); \
} \
else { \