summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-12-09 11:00:17 +0100
committerYves Orton <demerphq@gmail.com>2022-12-09 16:19:29 +0100
commit85900e28cc250e1c4603f11073b77d0c6b5cff46 (patch)
treeacc41c05f436dd1063459753dda9b557f6261e6c /regexec.c
parent6a6e5d037dad0702bc219f8265505037e1772552 (diff)
downloadperl-85900e28cc250e1c4603f11073b77d0c6b5cff46.tar.gz
regcomp.c - decompose into smaller files
This splits a bunch of the subcomponents of the regex engine into smaller files. regcomp_debug.c regcomp_internal.h regcomp_invlist.c regcomp_study.c regcomp_trie.c The only real change besides to the build machine to achieve the split is to also adds some new defines which can be used in embed.fnc to control exports without having to enumerate /every/ regex engine file. For instance all of regcomp*.c defines PERL_IN_REGCOMP_ANY, and this is used in embed.fnc to manage exports.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c570
1 files changed, 496 insertions, 74 deletions
diff --git a/regexec.c b/regexec.c
index 4e396d20c3..f7d66e38c6 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2,7 +2,7 @@
*/
/*
- * One Ring to rule them all, One Ring to find them
+ * One Ring to rule them all, One Ring to find them
*
* [p.v of _The Lord of the Rings_, opening poem]
* [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
@@ -40,22 +40,22 @@
/*
* pregcomp and pregexec -- regsub and regerror are not used in perl
*
- * Copyright (c) 1986 by University of Toronto.
- * Written by Henry Spencer. Not derived from licensed software.
+ * Copyright (c) 1986 by University of Toronto.
+ * Written by Henry Spencer. Not derived from licensed software.
*
- * Permission is granted to anyone to use this software for any
- * purpose on any computer system, and to redistribute it freely,
- * subject to the following restrictions:
+ * Permission is granted to anyone to use this software for any
+ * purpose on any computer system, and to redistribute it freely,
+ * subject to the following restrictions:
*
- * 1. The author is not responsible for the consequences of use of
- * this software, no matter how awful, even if they arise
- * from defects in it.
+ * 1. The author is not responsible for the consequences of use of
+ * this software, no matter how awful, even if they arise
+ * from defects in it.
*
- * 2. The origin of this software must not be misrepresented, either
- * by explicit claim or by omission.
+ * 2. The origin of this software must not be misrepresented, either
+ * by explicit claim or by omission.
*
- * 3. Altered versions must be plainly marked as such, and must not
- * be misrepresented as being the original software.
+ * 3. Altered versions must be plainly marked as such, and must not
+ * be misrepresented as being the original software.
*
**** Alterations to Henry's code are...
****
@@ -71,6 +71,7 @@
* regular-expression syntax might require a total rethink.
*/
#include "EXTERN.h"
+#define PERL_IN_REGEX_ENGINE
#define PERL_IN_REGEXEC_C
#include "perl.h"
@@ -119,7 +120,7 @@ static const char non_utf8_target_but_utf8_required[]
} STMT_END
#ifndef STATIC
-#define STATIC static
+#define STATIC static
#endif
/*
@@ -138,8 +139,8 @@ static const char non_utf8_target_but_utf8_required[]
#define HOPBACK3(pos, off, lim) \
(reginfo->is_utf8_target \
? reghopmaybe3((U8*)pos, (SSize_t)0-off, (U8*)(lim)) \
- : (pos - off >= lim) \
- ? (U8*)pos - off \
+ : (pos - off >= lim) \
+ ? (U8*)pos - off \
: NULL)
#define HOPBACKc(pos, off) ((char*)HOPBACK3(pos, off, reginfo->strbeg))
@@ -167,7 +168,7 @@ static const char non_utf8_target_but_utf8_required[]
: (U8*)(pos + off))
#define HOP4c(pos,off,llim, rlim) ((char*)HOP4(pos,off,llim, rlim))
-#define PLACEHOLDER /* Something for the preprocessor to grab onto */
+#define PLACEHOLDER /* Something for the preprocessor to grab onto */
/* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
/* for use after a quantifier and before an EXACT-like node -- japhy */
@@ -268,7 +269,7 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
PL_savestack_ix += paren_elems_to_push;
DEBUG_BUFFERS_r({
- I32 p;
+ I32 p;
for (p = parenfloor + 1; p <= (I32)maxopenparen; p++) {
Perl_re_exec_indentf(aTHX_
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "\n",
@@ -441,7 +442,7 @@ S_regcp_restore(pTHX_ regexp *rex, I32 ix, U32 *maxopenparen_p _pDEPTH)
PL_savestack_ix = tmpix;
}
-#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
+#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
STATIC bool
S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
@@ -889,7 +890,7 @@ Perl_re_intuit_start(pTHX_
U8 other_ix = 1 - prog->substrs->check_ix;
bool ml_anch = 0;
char *other_last = strpos;/* latest pos 'other' substr already checked to */
- char *check_at = NULL; /* check substr found at this pos */
+ char *check_at = NULL; /* check substr found at this pos */
const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
RXi_GET_DECL(prog,progi);
regmatch_info reginfo_buf; /* create some info to pass to find_byclass */
@@ -1067,7 +1068,7 @@ Perl_re_intuit_start(pTHX_
end_shift = prog->check_end_shift;
-#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
+#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
if (end_shift < 0)
Perl_croak(aTHX_ "panic: end_shift: %" IVdf " pattern:\n%s\n ",
(IV)end_shift, RX_PRECOMP(rx));
@@ -1306,7 +1307,7 @@ Perl_re_intuit_start(pTHX_
}
s = HOP3c(rx_origin, other->min_offset, strend);
- if (s < other_last) /* These positions already checked */
+ if (s < other_last) /* These positions already checked */
s = other_last;
must = utf8_target ? other->utf8_substr : other->substr;
@@ -1665,7 +1666,7 @@ Perl_re_intuit_start(pTHX_
cannot start at strpos. */
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ " try at offset...\n"));
- ++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
+ ++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
}
else {
/* The found rx_origin position does not prohibit matching at
@@ -1674,11 +1675,11 @@ Perl_re_intuit_start(pTHX_
* zero, free it. */
if (!(prog->intflags & PREGf_NAUGHTY)
&& (utf8_target ? (
- prog->check_utf8 /* Could be deleted already */
+ prog->check_utf8 /* Could be deleted already */
&& --BmUSEFUL(prog->check_utf8) < 0
&& (prog->check_utf8 == prog->float_utf8)
) : (
- prog->check_substr /* Could be deleted already */
+ prog->check_substr /* Could be deleted already */
&& --BmUSEFUL(prog->check_substr) < 0
&& (prog->check_substr == prog->float_substr)
)))
@@ -1688,9 +1689,9 @@ Perl_re_intuit_start(pTHX_
/* XXX Does the destruction order has to change with utf8_target? */
SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
- prog->check_substr = prog->check_utf8 = NULL; /* disable */
- prog->float_substr = prog->float_utf8 = NULL; /* clear */
- check = NULL; /* abort */
+ prog->check_substr = prog->check_utf8 = NULL; /* disable */
+ prog->float_substr = prog->float_utf8 = NULL; /* clear */
+ check = NULL; /* abort */
/* XXXX This is a remnant of the old implementation. It
looks wasteful, since now INTUIT can use many
other heuristics. */
@@ -1704,8 +1705,8 @@ Perl_re_intuit_start(pTHX_
return rx_origin;
- fail_finish: /* Substring not found */
- if (prog->check_substr || prog->check_utf8) /* could be removed already */
+ fail_finish: /* Substring not found */
+ if (prog->check_substr || prog->check_utf8) /* could be removed already */
BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
fail:
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ "%sMatch rejected by optimizer%s\n",
@@ -2202,8 +2203,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
char *pat_string; /* The pattern's exactish string */
- char *pat_end; /* ptr to end char of pat_string */
- re_fold_t folder; /* Function for computing non-utf8 folds */
+ char *pat_end; /* ptr to end char of pat_string */
+ re_fold_t folder; /* Function for computing non-utf8 folds */
const U8 *fold_array; /* array for folding ords < 256 */
STRLEN ln;
STRLEN lnc;
@@ -2473,7 +2474,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
* Unicode semantics and the german sharp ss, which hence should not be
* compiled into a node that gets here. */
pat_string = STRINGs(c);
- ln = STR_LENs(c); /* length to match in octets/bytes */
+ ln = STR_LENs(c); /* length to match in octets/bytes */
/* We know that we have to match at least 'ln' bytes (which is the same
* as characters, since not utf8). If we have to match 3 characters,
@@ -2598,7 +2599,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
* can have the same fold, or portion of a fold, or different-
* length fold */
pat_string = STRINGs(c);
- ln = STR_LENs(c); /* length to match in octets/bytes */
+ ln = STR_LENs(c); /* length to match in octets/bytes */
pat_end = pat_string + ln;
lnc = is_utf8_pat /* length to match in characters */
? utf8_length((U8 *) pat_string, (U8 *) pat_end)
@@ -3600,8 +3601,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
char *s;
regnode *c;
char *startpos;
- SSize_t minlen; /* must match at least this many chars */
- SSize_t dontbother = 0; /* how many characters not to try at end */
+ SSize_t minlen; /* must match at least this many chars */
+ SSize_t dontbother = 0; /* how many characters not to try at end */
const bool utf8_target = cBOOL(DO_UTF8(sv));
I32 multiline;
RXi_GET_DECL(prog,progi);
@@ -3755,7 +3756,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
RXp_MATCH_TAINTED_off(prog);
RXp_MATCH_UTF8_set(prog, utf8_target);
- reginfo->prog = rx; /* Yes, sorry that this is confusing. */
+ reginfo->prog = rx; /* Yes, sorry that this is confusing. */
reginfo->intuit = 0;
reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
reginfo->warned = FALSE;
@@ -3964,7 +3965,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
SSize_t back_max;
SSize_t back_min;
char *last;
- char *last1; /* Last position checked before */
+ char *last1; /* Last position checked before */
#ifdef DEBUGGING
int did_match = 0;
#endif
@@ -4006,14 +4007,14 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
if (back_min<0) {
last = strend;
} else {
- last = HOP3c(strend, /* Cannot start after this */
+ last = HOP3c(strend, /* Cannot start after this */
-(SSize_t)(CHR_SVLEN(must)
- (SvTAIL(must) != 0) + back_min), strbeg);
}
if (s > reginfo->strbeg)
last1 = HOPc(s, -1);
else
- last1 = s - 1; /* bogus */
+ last1 = s - 1; /* bogus */
/* XXXX check_substr already used to find "s", can optimize if
check_substr==must. */
@@ -4169,7 +4170,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
if (len)
last = rninstr(s, strend, little, little + len);
else
- last = strend; /* matching "$" */
+ last = strend; /* matching "$" */
}
if (!last) {
/* at one point this block contained a comment which was
@@ -4188,7 +4189,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
}
if (minlen && (dontbother < minlen))
dontbother = minlen - 1;
- strend -= dontbother; /* this one's always in bytes! */
+ strend -= dontbother; /* this one's always in bytes! */
/* We don't know much -- general case. */
if (utf8_target) {
for (;;) {
@@ -4275,16 +4276,16 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
* Do inc before dec, in case old and new rex are the same */
#define SET_reg_curpm(Re2) \
if (reginfo->info_aux_eval) { \
- (void)ReREFCNT_inc(Re2); \
- ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
- PM_SETRE((PL_reg_curpm), (Re2)); \
+ (void)ReREFCNT_inc(Re2); \
+ ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
+ PM_SETRE((PL_reg_curpm), (Re2)); \
}
/*
- regtry - try match at specific point
*/
-STATIC bool /* 0 failure, 1 success */
+STATIC bool /* 0 failure, 1 success */
S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
{
CHECKPOINT lastcp;
@@ -6366,7 +6367,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
/* cache heavy used fields of st in registers */
regnode *scan;
regnode *next;
- U32 n = 0; /* general value; init to avoid compiler warning */
+ U32 n = 0; /* general value; init to avoid compiler warning */
U32 utmp = 0; /* tmp variable - valid for at most one opcode */
SSize_t ln = 0; /* len or last; init to avoid compiler warning */
SSize_t endref = 0; /* offset of end of backref when ln is start */
@@ -6378,7 +6379,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
PERL_INT_FAST16_T nextbyte; /* is always set to UCHARAT(locinput), or -1
at EOS */
- bool result = 0; /* return value of S_regmatch */
+ bool result = 0; /* return value of S_regmatch */
U32 depth = 0; /* depth of backtrack stack */
U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
const U32 max_nochange_depth =
@@ -6406,9 +6407,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
* the very next op. They have a useful lifetime of exactly one loop
* iteration, and are not preserved or restored by state pushes/pops
*/
- bool sw = 0; /* the condition value in (?(cond)a|b) */
- bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
- int logical = 0; /* the following EVAL is:
+ bool sw = 0; /* the condition value in (?(cond)a|b) */
+ bool minmod = 0; /* the next "{n,m}" is a "{n,m}?" */
+ int logical = 0; /* the following EVAL is:
0: (?{...})
1: (?(?{...})X|Y)
2: (??{...})
@@ -6419,8 +6420,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
PAD* last_pad = NULL;
dMULTICALL;
U8 gimme = G_SCALAR;
- CV *caller_cv = NULL; /* who called us */
- CV *last_pushed_cv = NULL; /* most recently called (?{}) CV */
+ CV *caller_cv = NULL; /* who called us */
+ CV *last_pushed_cv = NULL; /* most recently called (?{}) CV */
U32 maxopenparen = 0; /* max '(' index seen so far */
int to_complement; /* Invert the result? */
char_class_number_ classnum;
@@ -7790,7 +7791,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
/* Match either CR LF or '.', as all the other possibilities
* require utf8 */
- locinput++; /* Match the . or CR */
+ locinput++; /* Match the . or CR */
if (nextbyte == '\r' /* And if it was CR, and the next is LF,
match the LF */
&& locinput < loceol
@@ -7924,12 +7925,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
endref = rex->offs[n].end;
reginfo->poscache_iter = reginfo->poscache_maxiter; /* Void cache */
if (rex->lastparen < n || ln == -1 || endref == -1)
- sayNO; /* Do not match unless seen CLOSEn. */
+ sayNO; /* Do not match unless seen CLOSEn. */
if (ln == endref)
break;
s = reginfo->strbeg + ln;
- if (type != REF /* REF can do byte comparison */
+ if (type != REF /* REF can do byte comparison */
&& (utf8_target || type == REFFU || type == REFFL))
{
char * limit = loceol;
@@ -8202,7 +8203,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
* the savestack frame */
before = (IV)(SP-PL_stack_base);
PL_op = nop;
- CALLRUNOPS(aTHX); /* Scalar context. */
+ CALLRUNOPS(aTHX); /* Scalar context. */
SPAGAIN;
if ((IV)(SP-PL_stack_base) == before)
ret = &PL_sv_undef; /* protect against empty (?{}) blocks. */
@@ -8674,7 +8675,7 @@ NULL
ST.B = next;
ST.minmod = minmod;
minmod = 0;
- ST.count = -1; /* this will be updated by WHILEM */
+ ST.count = -1; /* this will be updated by WHILEM */
ST.lastloc = NULL; /* this will be updated by WHILEM */
PUSH_YES_STATE_GOTO(CURLYX_end, REGNODE_BEFORE(next), locinput, loceol,
@@ -8916,13 +8917,13 @@ NULL
#undef ST
#define ST st->u.branch
- case BRANCHJ: /* /(...|A|...)/ with long next pointer */
+ case BRANCHJ: /* /(...|A|...)/ with long next pointer */
next = scan + ARG(scan);
if (next == scan)
next = NULL;
/* FALLTHROUGH */
- case BRANCH: /* /(...|A|...)/ */
+ case BRANCH: /* /(...|A|...)/ */
scan = REGNODE_AFTER_opcode(scan,state_num); /* scan now points to inner node */
assert(scan);
ST.lastparen = rex->lastparen;
@@ -8988,7 +8989,7 @@ NULL
#undef ST
#define ST st->u.curlym
- case CURLYM: /* /A{m,n}B/ where A is fixed-length */
+ case CURLYM: /* /A{m,n}B/ where A is fixed-length */
/* This is an optimisation of CURLYX that enables us to push
* only a single backtracking state, no matter how many matches
@@ -9172,22 +9173,22 @@ NULL
} \
}
- case STAR: /* /A*B/ where A is width 1 char */
+ case STAR: /* /A*B/ where A is width 1 char */
ST.paren = 0;
ST.min = 0;
ST.max = REG_INFTY;
scan = REGNODE_AFTER_type(scan,tregnode_STAR);
goto repeat;
- case PLUS: /* /A+B/ where A is width 1 char */
+ case PLUS: /* /A+B/ where A is width 1 char */
ST.paren = 0;
ST.min = 1;
ST.max = REG_INFTY;
scan = REGNODE_AFTER_type(scan,tregnode_PLUS);
goto repeat;
- case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
- ST.paren = scan->flags; /* Which paren to set */
+ case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
+ ST.paren = scan->flags; /* Which paren to set */
ST.lastparen = rex->lastparen;
ST.lastcloseparen = rex->lastcloseparen;
if (ST.paren > maxopenparen)
@@ -9208,7 +9209,7 @@ NULL
goto repeat;
- case CURLY: /* /A{m,n}B/ where A is width 1 char */
+ case CURLY: /* /A{m,n}B/ where A is width 1 char */
ST.paren = 0;
ST.min = ARG1(scan); /* min to match */
ST.max = ARG2(scan); /* max to match */
@@ -9471,7 +9472,7 @@ NULL
/* we've just finished A in /(??{A})B/; now continue with B */
is_accepted= false;
SET_RECURSE_LOCINPUT("FAKE-END[before]", CUR_EVAL.prev_recurse_locinput);
- st->u.eval.prev_rex = rex_sv; /* inner */
+ st->u.eval.prev_rex = rex_sv; /* inner */
/* Save *all* the positions. */
st->u.eval.cp = regcppush(rex, 0, maxopenparen);
@@ -9513,9 +9514,9 @@ NULL
(long)(reginfo->till - startpos),
PL_colors[5]));
- sayNO_SILENT; /* Cannot match: too short. */
+ sayNO_SILENT; /* Cannot match: too short. */
}
- sayYES; /* Success! */
+ sayYES; /* Success! */
case LOOKBEHIND_END: /* validate that *lookbehind* UNLESSM/IFMATCH
matches end at the right spot, required for
@@ -9536,23 +9537,23 @@ NULL
Perl_re_exec_indentf( aTHX_
"%sSUCCEED: subpattern success...%s\n",
depth, PL_colors[4], PL_colors[5]));
- sayYES; /* Success! */
+ sayYES; /* Success! */
#undef ST
#define ST st->u.ifmatch
- case SUSPEND: /* (?>A) */
+ case SUSPEND: /* (?>A) */
ST.wanted = 1;
ST.start = locinput;
ST.end = loceol;
ST.count = 1;
goto do_ifmatch;
- case UNLESSM: /* -ve lookaround: (?!A), or with 'flags', (?<!A) */
+ case UNLESSM: /* -ve lookaround: (?!A), or with 'flags', (?<!A) */
ST.wanted = 0;
goto ifmatch_trivial_fail_test;
- case IFMATCH: /* +ve lookaround: (?=A), or with 'flags', (?<=A) */
+ case IFMATCH: /* +ve lookaround: (?=A), or with 'flags', (?<=A) */
ST.wanted = 1;
ifmatch_trivial_fail_test:
ST.prev_match_end= match_end;
@@ -11739,9 +11740,430 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
return retval;
}
-
#endif /* ifndef PERL_IN_XSUB_RE */
+/* Buffer logic. */
+SV*
+Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
+ const U32 flags)
+{
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF;
+
+ PERL_UNUSED_ARG(value);
+
+ if (flags & RXapif_FETCH) {
+ return reg_named_buff_fetch(rx, key, flags);
+ } else if (flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
+ Perl_croak_no_modify();
+ return NULL;
+ } else if (flags & RXapif_EXISTS) {
+ return reg_named_buff_exists(rx, key, flags)
+ ? &PL_sv_yes
+ : &PL_sv_no;
+ } else if (flags & RXapif_REGNAMES) {
+ return reg_named_buff_all(rx, flags);
+ } else if (flags & (RXapif_SCALAR | RXapif_REGNAMES_COUNT)) {
+ return reg_named_buff_scalar(rx, flags);
+ } else {
+ Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff", (int)flags);
+ return NULL;
+ }
+}
+
+SV*
+Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
+ const U32 flags)
+{
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_ITER;
+ PERL_UNUSED_ARG(lastkey);
+
+ if (flags & RXapif_FIRSTKEY)
+ return reg_named_buff_firstkey(rx, flags);
+ else if (flags & RXapif_NEXTKEY)
+ return reg_named_buff_nextkey(rx, flags);
+ else {
+ Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_iter",
+ (int)flags);
+ return NULL;
+ }
+}
+
+SV*
+Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
+ const U32 flags)
+{
+ SV *ret;
+ struct regexp *const rx = ReANY(r);
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
+ HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
+ if (he_str) {
+ IV i;
+ SV* sv_dat=HeVAL(he_str);
+ I32 *nums=(I32*)SvPVX(sv_dat);
+ AV * const retarray = (flags & RXapif_ALL) ? newAV_alloc_x(SvIVX(sv_dat)) : NULL;
+ for ( i=0; i<SvIVX(sv_dat); i++ ) {
+ if ((I32)(rx->nparens) >= nums[i]
+ && rx->offs[nums[i]].start != -1
+ && rx->offs[nums[i]].end != -1)
+ {
+ ret = newSVpvs("");
+ CALLREG_NUMBUF_FETCH(r, nums[i], ret);
+ if (!retarray)
+ return ret;
+ } else {
+ if (retarray)
+ ret = newSV_type(SVt_NULL);
+ }
+ if (retarray)
+ av_push_simple(retarray, ret);
+ }
+ if (retarray)
+ return newRV_noinc(MUTABLE_SV(retarray));
+ }
+ }
+ return NULL;
+}
+
+bool
+Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
+ const U32 flags)
+{
+ struct regexp *const rx = ReANY(r);
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
+ if (flags & RXapif_ALL) {
+ return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
+ } else {
+ SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
+ if (sv) {
+ SvREFCNT_dec_NN(sv);
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+ }
+ } else {
+ return FALSE;
+ }
+}
+
+SV*
+Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
+{
+ struct regexp *const rx = ReANY(r);
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
+
+ if ( rx && RXp_PAREN_NAMES(rx) ) {
+ (void)hv_iterinit(RXp_PAREN_NAMES(rx));
+
+ return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
+ } else {
+ return FALSE;
+ }
+}
+
+SV*
+Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
+{
+ struct regexp *const rx = ReANY(r);
+ DECLARE_AND_GET_RE_DEBUG_FLAGS;
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
+ HV *hv = RXp_PAREN_NAMES(rx);
+ HE *temphe;
+ while ( (temphe = hv_iternext_flags(hv, 0)) ) {
+ IV i;
+ IV parno = 0;
+ SV* sv_dat = HeVAL(temphe);
+ I32 *nums = (I32*)SvPVX(sv_dat);
+ for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+ if ((I32)(rx->lastparen) >= nums[i] &&
+ rx->offs[nums[i]].start != -1 &&
+ rx->offs[nums[i]].end != -1)
+ {
+ parno = nums[i];
+ break;
+ }
+ }
+ if (parno || flags & RXapif_ALL) {
+ return newSVhek(HeKEY_hek(temphe));
+ }
+ }
+ }
+ return NULL;
+}
+
+SV*
+Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
+{
+ SV *ret;
+ AV *av;
+ SSize_t length;
+ struct regexp *const rx = ReANY(r);
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
+ if (flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
+ return newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx)));
+ } else if (flags & RXapif_ONE) {
+ ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
+ av = MUTABLE_AV(SvRV(ret));
+ length = av_count(av);
+ SvREFCNT_dec_NN(ret);
+ return newSViv(length);
+ } else {
+ Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar",
+ (int)flags);
+ return NULL;
+ }
+ }
+ return &PL_sv_undef;
+}
+
+SV*
+Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
+{
+ struct regexp *const rx = ReANY(r);
+ AV *av = newAV();
+
+ PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
+
+ if (rx && RXp_PAREN_NAMES(rx)) {
+ HV *hv= RXp_PAREN_NAMES(rx);
+ HE *temphe;
+ (void)hv_iterinit(hv);
+ while ( (temphe = hv_iternext_flags(hv, 0)) ) {
+ IV i;
+ IV parno = 0;
+ SV* sv_dat = HeVAL(temphe);
+ I32 *nums = (I32*)SvPVX(sv_dat);
+ for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+ if ((I32)(rx->lastparen) >= nums[i] &&
+ rx->offs[nums[i]].start != -1 &&
+ rx->offs[nums[i]].end != -1)
+ {
+ parno = nums[i];
+ break;
+ }
+ }
+ if (parno || flags & RXapif_ALL) {
+ av_push_simple(av, newSVhek(HeKEY_hek(temphe)));
+ }
+ }
+ }
+
+ return newRV_noinc(MUTABLE_SV(av));
+}
+
+void
+Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
+ SV * const sv)
+{
+ struct regexp *const rx = ReANY(r);
+ char *s = NULL;
+ SSize_t i = 0;
+ SSize_t s1, t1;
+ I32 n = paren;
+
+ PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
+
+ if ( n == RX_BUFF_IDX_CARET_PREMATCH
+ || n == RX_BUFF_IDX_CARET_FULLMATCH
+ || n == RX_BUFF_IDX_CARET_POSTMATCH
+ )
+ {
+ bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
+ if (!keepcopy) {
+ /* on something like
+ * $r = qr/.../;
+ * /$qr/p;
+ * the KEEPCOPY is set on the PMOP rather than the regex */
+ if (PL_curpm && r == PM_GETRE(PL_curpm))
+ keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
+ }
+ if (!keepcopy)
+ goto ret_undef;
+ }
+
+ if (!rx->subbeg)
+ goto ret_undef;
+
+ if (n == RX_BUFF_IDX_CARET_FULLMATCH)
+ /* no need to distinguish between them any more */
+ n = RX_BUFF_IDX_FULLMATCH;
+
+ if ((n == RX_BUFF_IDX_PREMATCH || n == RX_BUFF_IDX_CARET_PREMATCH)
+ && rx->offs[0].start != -1)
+ {
+ /* $`, ${^PREMATCH} */
+ i = rx->offs[0].start;
+ s = rx->subbeg;
+ }
+ else
+ if ((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH)
+ && rx->offs[0].end != -1)
+ {
+ /* $', ${^POSTMATCH} */
+ s = rx->subbeg - rx->suboffset + rx->offs[0].end;
+ i = rx->sublen + rx->suboffset - rx->offs[0].end;
+ }
+ else
+ if (inRANGE(n, 0, (I32)rx->nparens) &&
+ (s1 = rx->offs[n].start) != -1 &&
+ (t1 = rx->offs[n].end) != -1)
+ {
+ /* $&, ${^MATCH}, $1 ... */
+ i = t1 - s1;
+ s = rx->subbeg + s1 - rx->suboffset;
+ } else {
+ goto ret_undef;
+ }
+
+ assert(s >= rx->subbeg);
+ assert((STRLEN)rx->sublen >= (STRLEN)((s - rx->subbeg) + i) );
+ if (i >= 0) {
+#ifdef NO_TAINT_SUPPORT
+ sv_setpvn(sv, s, i);
+#else
+ const int oldtainted = TAINT_get;
+ TAINT_NOT;
+ sv_setpvn(sv, s, i);
+ TAINT_set(oldtainted);
+#endif
+ if (RXp_MATCH_UTF8(rx))
+ SvUTF8_on(sv);
+ else
+ SvUTF8_off(sv);
+ if (TAINTING_get) {
+ if (RXp_MATCH_TAINTED(rx)) {
+ if (SvTYPE(sv) >= SVt_PVMG) {
+ MAGIC* const mg = SvMAGIC(sv);
+ MAGIC* mgt;
+ TAINT;
+ SvMAGIC_set(sv, mg->mg_moremagic);
+ SvTAINT(sv);
+ if ((mgt = SvMAGIC(sv))) {
+ mg->mg_moremagic = mgt;
+ SvMAGIC_set(sv, mg);
+ }
+ } else {
+ TAINT;
+ SvTAINT(sv);
+ }
+ } else
+ SvTAINTED_off(sv);
+ }
+ } else {
+ ret_undef:
+ sv_set_undef(sv);
+ return;
+ }
+}
+
+void
+Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
+ SV const * const value)
+{
+ PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE;
+
+ PERL_UNUSED_ARG(rx);
+ PERL_UNUSED_ARG(paren);
+ PERL_UNUSED_ARG(value);
+
+ if (!PL_localizing)
+ Perl_croak_no_modify();
+}
+
+I32
+Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
+ const I32 paren)
+{
+ struct regexp *const rx = ReANY(r);
+ I32 i;
+ I32 s1, t1;
+
+ PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
+
+ if ( paren == RX_BUFF_IDX_CARET_PREMATCH
+ || paren == RX_BUFF_IDX_CARET_FULLMATCH
+ || paren == RX_BUFF_IDX_CARET_POSTMATCH
+ )
+ {
+ bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
+ if (!keepcopy) {
+ /* on something like
+ * $r = qr/.../;
+ * /$qr/p;
+ * the KEEPCOPY is set on the PMOP rather than the regex */
+ if (PL_curpm && r == PM_GETRE(PL_curpm))
+ keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
+ }
+ if (!keepcopy)
+ goto warn_undef;
+ }
+
+ /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
+ switch (paren) {
+ case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
+ case RX_BUFF_IDX_PREMATCH: /* $` */
+ if (rx->offs[0].start != -1) {
+ i = rx->offs[0].start;
+ if (i > 0) {
+ s1 = 0;
+ t1 = i;
+ goto getlen;
+ }
+ }
+ return 0;
+
+ case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
+ case RX_BUFF_IDX_POSTMATCH: /* $' */
+ if (rx->offs[0].end != -1) {
+ i = rx->sublen - rx->offs[0].end;
+ if (i > 0) {
+ s1 = rx->offs[0].end;
+ t1 = rx->sublen;
+ goto getlen;
+ }
+ }
+ return 0;
+
+ default: /* $& / ${^MATCH}, $1, $2, ... */
+ if (paren <= (I32)rx->nparens &&
+ (s1 = rx->offs[paren].start) != -1 &&
+ (t1 = rx->offs[paren].end) != -1)
+ {
+ i = t1 - s1;
+ goto getlen;
+ } else {
+ warn_undef:
+ if (ckWARN(WARN_UNINITIALIZED))
+ report_uninit((const SV *)sv);
+ return 0;
+ }
+ }
+ getlen:
+ if (i > 0 && RXp_MATCH_UTF8(rx)) {
+ const char * const s = rx->subbeg - rx->suboffset + s1;
+ const U8 *ep;
+ STRLEN el;
+
+ i = t1 - s1;
+ if (is_utf8_string_loclen((U8*)s, i, &ep, &el))
+ i = el;
+ }
+ return i;
+}
+
/*
* ex: set ts=8 sts=4 sw=4 et:
*/