summaryrefslogtreecommitdiff
path: root/pp_hot.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-01-09 20:49:12 +0100
committerYves Orton <demerphq@gmail.com>2023-01-11 14:28:33 +0100
commit3f11a2855248134af98ca8d71cf71a3fe736dbae (patch)
tree7938935c782a5aabed1b392f3b841906d91e167c /pp_hot.c
parent67244d99e1ad05ddedabc8d640ceaf1d5b8e259d (diff)
downloadperl-3f11a2855248134af98ca8d71cf71a3fe736dbae.tar.gz
regexec engine - wrap and replace RX_OFFS() with better abstractions
RX_OFFS() exposes a bit too much about how capture buffers are represented. This adds RX_OFFS_START() and RX_OFFS_END() and RX_OFFS_VALID() to replace most of the uses of the RX_OFFS() macro or direct access to the rx->off[] array. (We add RX_OFFSp() for those rare cases that should have direct access to the array.) This allows us to replace this logic with more complicated macros in the future. Pretty much anything using RX_OFFS() is going to be broken by future changes, so changing the define allows us to track it down easily. Not all use of the rx->offs[] array are converted; some uses are required for the regex engine internals, but anything outside of the regex engine should be using the replacement macros, and most things in the regex internals should use it also.
Diffstat (limited to 'pp_hot.c')
-rw-r--r--pp_hot.c36
1 files changed, 16 insertions, 20 deletions
diff --git a/pp_hot.c b/pp_hot.c
index 491cffc305..c56d7976e9 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -3189,7 +3189,7 @@ PP(pp_match)
if (global && (gimme != G_LIST || (dynpm->op_pmflags & PMf_CONTINUE))) {
if (!mg)
mg = sv_magicext_mglob(TARG);
- MgBYTEPOS_set(mg, TARG, truebase, RXp_OFFS(prog)[0].end);
+ MgBYTEPOS_set(mg, TARG, truebase, RXp_OFFS_END(prog,0));
if (RXp_ZERO_LEN(prog))
mg->mg_flags |= MGf_MINMATCH;
else
@@ -3211,20 +3211,16 @@ PP(pp_match)
EXTEND(SP, nparens + i);
EXTEND_MORTAL(nparens + i);
for (i = !i; i <= nparens; i++) {
- if (LIKELY((RXp_OFFS(prog)[i].start != -1)
- && RXp_OFFS(prog)[i].end != -1 ))
+ if (LIKELY(RXp_OFFS_VALID(prog,i)))
{
- const I32 len = RXp_OFFS(prog)[i].end - RXp_OFFS(prog)[i].start;
- const char * const s = RXp_OFFS(prog)[i].start + truebase;
- if (UNLIKELY( RXp_OFFS(prog)[i].end < 0
- || RXp_OFFS(prog)[i].start < 0
- || len < 0
- || len > strend - s)
+ const I32 len = RXp_OFFS_END(prog,i) - RXp_OFFS_START(prog,i);
+ const char * const s = RXp_OFFS_START(prog,i) + truebase;
+ if ( UNLIKELY( len < 0 || len > strend - s)
)
DIE(aTHX_ "panic: pp_match start/end pointers, i=%ld, "
"start=%ld, end=%ld, s=%p, strend=%p, len=%" UVuf,
- (long) i, (long) RXp_OFFS(prog)[i].start,
- (long)RXp_OFFS(prog)[i].end, s, strend, (UV) len);
+ (long) i, (long) RXp_OFFS_START(prog,i),
+ (long)RXp_OFFS_END(prog,i), s, strend, (IV) len);
PUSHs(newSVpvn_flags(s, len,
(DO_UTF8(TARG))
? SVf_UTF8|SVs_TEMP
@@ -3235,7 +3231,7 @@ PP(pp_match)
}
}
if (global) {
- curpos = (UV)RXp_OFFS(prog)[0].end;
+ curpos = (UV)RXp_OFFS_END(prog,0);
had_zerolen = RXp_ZERO_LEN(prog);
PUTBACK; /* EVAL blocks may use stack */
r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
@@ -4519,8 +4515,8 @@ PP(pp_subst)
char *d, *m;
if (RXp_MATCH_TAINTED(prog)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
- m = orig + RXp_OFFS(prog)[0].start;
- d = orig + RXp_OFFS(prog)[0].end;
+ m = orig + RXp_OFFS_START(prog,0);
+ d = orig + RXp_OFFS_END(prog,0);
s = orig;
if (m - s > strend - d) { /* faster to shorten from end */
I32 i;
@@ -4550,7 +4546,7 @@ PP(pp_subst)
}
else {
char *d, *m;
- d = s = RXp_OFFS(prog)[0].start + orig;
+ d = s = RXp_OFFS_START(prog,0) + orig;
do {
I32 i;
if (UNLIKELY(iters++ > maxiters))
@@ -4558,7 +4554,7 @@ PP(pp_subst)
/* run time pattern taint, eg locale */
if (UNLIKELY(RXp_MATCH_TAINTED(prog)))
rxtainted |= SUBST_TAINT_PAT;
- m = RXp_OFFS(prog)[0].start + orig;
+ m = RXp_OFFS_START(prog,0) + orig;
if ((i = m - s)) {
if (s != d)
Move(s, d, i, char);
@@ -4568,7 +4564,7 @@ PP(pp_subst)
Copy(c, d, clen, char);
d += clen;
}
- s = RXp_OFFS(prog)[0].end + orig;
+ s = RXp_OFFS_END(prog,0) + orig;
} while (CALLREGEXEC(rx, s, strend, orig,
s == m, /* don't match same null twice */
TARG, NULL,
@@ -4611,7 +4607,7 @@ PP(pp_subst)
if (RXp_MATCH_TAINTED(prog)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
repl = dstr;
- s = RXp_OFFS(prog)[0].start + orig;
+ s = RXp_OFFS_START(prog,0) + orig;
dstr = newSVpvn_flags(orig, s-orig,
SVs_TEMP | (DO_UTF8(TARG) ? SVf_UTF8 : 0));
if (!c) {
@@ -4641,9 +4637,9 @@ PP(pp_subst)
s = orig + (old_s - old_orig);
strend = s + (strend - old_s);
}
- m = RXp_OFFS(prog)[0].start + orig;
+ m = RXp_OFFS_START(prog,0) + orig;
sv_catpvn_nomg_maybeutf8(dstr, s, m - s, DO_UTF8(TARG));
- s = RXp_OFFS(prog)[0].end + orig;
+ s = RXp_OFFS_END(prog,0) + orig;
if (first) {
/* replacement already stringified */
if (clen)