summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-01-15 20:31:14 +0100
committerYves Orton <demerphq@gmail.com>2023-03-13 21:26:08 +0800
commitd78630f1ec4da716988da18d97cb6425388e696c (patch)
tree6ef6a501ea3219d2f493179de78e930b09ce2763
parent17e3e02ad120eabda2bdb6c297a70d53294437ef (diff)
downloadperl-d78630f1ec4da716988da18d97cb6425388e696c.tar.gz
regcomp.c - extend REF to hold the paren it needs to regcppush
this way we can avoid pushing every buffer, we only need to push the nestroot of the ref.
-rw-r--r--pod/perldebguts.pod20
-rw-r--r--regcomp.c10
-rw-r--r--regcomp.sym20
-rw-r--r--regcomp_debug.c9
-rw-r--r--regexec.c6
-rw-r--r--regnodes.h20
-rw-r--r--t/re/pat_advanced.t86
7 files changed, 88 insertions, 83 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index 90f5fb66ae..52aef07396 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -773,25 +773,25 @@ will be lost.
SROPEN none Same as OPEN, but for script run
SRCLOSE none Close preceding SROPEN
- REF num 1 Match some already matched string
- REFF num 1 Match already matched string, using /di
+ REF num 2 Match some already matched string
+ REFF num 2 Match already matched string, using /di
rules.
- REFFL num 1 Match already matched string, using /li
+ REFFL num 2 Match already matched string, using /li
rules.
- REFFU num 1 Match already matched string, usng /ui.
- REFFA num 1 Match already matched string, using /aai
+ REFFU num 2 Match already matched string, usng /ui.
+ REFFA num 2 Match already matched string, using /aai
rules.
# Named references. Code in regcomp.c assumes that these all are after
# the numbered references
- REFN no-sv 1 Match some already matched string
- REFFN no-sv 1 Match already matched string, using /di
+ REFN no-sv 2 Match some already matched string
+ REFFN no-sv 2 Match already matched string, using /di
rules.
- REFFLN no-sv 1 Match already matched string, using /li
+ REFFLN no-sv 2 Match already matched string, using /li
rules.
- REFFUN num 1 Match already matched string, using /ui
+ REFFUN num 2 Match already matched string, using /ui
rules.
- REFFAN num 1 Match already matched string, using /aai
+ REFFAN num 2 Match already matched string, using /aai
rules.
# Support for long RE
diff --git a/regcomp.c b/regcomp.c
index da8b3e32f1..94428fad9c 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2754,7 +2754,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
SvREFCNT_inc_simple_void_NN(sv_dat);
}
RExC_sawback = 1;
- ret = reg1node(pRExC_state,
+ ret = reg2node(pRExC_state,
((! FOLD)
? REFN
: (ASCII_FOLD_RESTRICTED)
@@ -2764,7 +2764,9 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
: (LOC)
? REFFLN
: REFFN),
- num);
+ num, RExC_nestroot);
+ if (RExC_nestroot && num >= (U32)RExC_nestroot)
+ REGNODE_p(ret)->flags = VOLATILE_REF;
*flagp |= HASWIDTH;
nextchar(pRExC_state);
@@ -6028,7 +6030,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
}
RExC_sawback = 1;
- ret = reg1node(pRExC_state,
+ ret = reg2node(pRExC_state,
((! FOLD)
? REF
: (ASCII_FOLD_RESTRICTED)
@@ -6038,7 +6040,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
: (LOC)
? REFFL
: REFF),
- num);
+ num, RExC_nestroot);
if (RExC_nestroot && num >= RExC_nestroot)
REGNODE_p(ret)->flags = VOLATILE_REF;
if (OP(REGNODE_p(ret)) == REFF) {
diff --git a/regcomp.sym b/regcomp.sym
index 1c0af0cd53..2c0f4a0501 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -233,22 +233,22 @@ CLOSE CLOSE, num 1 ; Close corresponding OPEN of #n.
SROPEN SROPEN, none ; Same as OPEN, but for script run
SRCLOSE SRCLOSE, none ; Close preceding SROPEN
-REF REF, num 1 V ; Match some already matched string
-REFF REF, num 1 V ; Match already matched string, using /di rules.
-REFFL REF, num 1 V ; Match already matched string, using /li rules.
+REF REF, num 2 V ; Match some already matched string
+REFF REF, num 2 V ; Match already matched string, using /di rules.
+REFFL REF, num 2 V ; Match already matched string, using /li rules.
# N?REFF[AU] could have been implemented using the FLAGS field of the
# regnode, but by having a separate node type, we can use the existing switch
# statement to avoid some tests
-REFFU REF, num 1 V ; Match already matched string, usng /ui.
-REFFA REF, num 1 V ; Match already matched string, using /aai rules.
+REFFU REF, num 2 V ; Match already matched string, usng /ui.
+REFFA REF, num 2 V ; Match already matched string, using /aai rules.
#*Named references. Code in regcomp.c assumes that these all are after
#*the numbered references
-REFN REF, no-sv 1 V ; Match some already matched string
-REFFN REF, no-sv 1 V ; Match already matched string, using /di rules.
-REFFLN REF, no-sv 1 V ; Match already matched string, using /li rules.
-REFFUN REF, num 1 V ; Match already matched string, using /ui rules.
-REFFAN REF, num 1 V ; Match already matched string, using /aai rules.
+REFN REF, no-sv 2 V ; Match some already matched string
+REFFN REF, no-sv 2 V ; Match already matched string, using /di rules.
+REFFLN REF, no-sv 2 V ; Match already matched string, using /li rules.
+REFFUN REF, num 2 V ; Match already matched string, using /ui rules.
+REFFAN REF, num 2 V ; Match already matched string, using /aai rules.
#*Support for long RE
LONGJMP LONGJMP, off 1 . 1 ; Jump far away.
diff --git a/regcomp_debug.c b/regcomp_debug.c
index d02dbfd4cb..8c5065bacd 100644
--- a/regcomp_debug.c
+++ b/regcomp_debug.c
@@ -489,9 +489,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
|| k == GROUPP || op == ACCEPT)
{
AV *name_list= NULL;
- U32 parno= (op == ACCEPT) ? (U32)ARG2i(o) :
- (op == OPEN || op == CLOSE) ? (U32)PARNO(o) :
- (U32)ARG1u(o);
+ U32 parno= (op == ACCEPT) ? ARG2u(o) :
+ (op == OPEN || op == CLOSE) ? PARNO(o) :
+ ARG1u(o);
if ( RXp_PAREN_NAMES(prog) ) {
name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
} else if ( pRExC_state ) {
@@ -542,6 +542,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
Perl_sv_catpvf(aTHX_ sv, "/%" UVuf, (UV)parno); /* Parenth number */
}
+ if ( k == REF ) {
+ Perl_sv_catpvf(aTHX_ sv, " <%" IVdf ">", (IV)ARG2i(o));
+ }
if ( k == REF && reginfo) {
U32 n = ARG1u(o); /* which paren pair */
I32 ln = RXp_OFFS_START(prog,n);
diff --git a/regexec.c b/regexec.c
index 3a8e160478..eb1fe4e06a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -8068,7 +8068,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
}
ref_yes:
if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */
- ST.cp = regcppush(rex, 0, maxopenparen);
+ ST.cp = regcppush(rex, ARG2u(scan) - 1, maxopenparen);
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(REF_next, next, locinput, loceol,
script_run_begin);
@@ -8105,7 +8105,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
U32 arg;
case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
- arg= (U32)ARG1u(scan);
+ arg = ARG1u(scan);
if (cur_eval && cur_eval->locinput == locinput) {
if ( ++nochange_depth > max_nochange_depth )
Perl_croak(aTHX_
@@ -8620,7 +8620,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
is_accepted = true;
if (scan->flags)
sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
- utmp = (U32)ARG2i(scan);
+ utmp = ARG2u(scan);
if ( utmp ) {
regnode *cursor;
diff --git a/regnodes.h b/regnodes.h
index a675874378..c4c7ba6a45 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -94,16 +94,16 @@ typedef struct regnode tregnode_POSIXL;
typedef struct regnode tregnode_POSIXU;
typedef struct regnode_1 tregnode_PRUNE;
typedef struct regnode tregnode_PSEUDO;
-typedef struct regnode_1 tregnode_REF;
-typedef struct regnode_1 tregnode_REFF;
-typedef struct regnode_1 tregnode_REFFA;
-typedef struct regnode_1 tregnode_REFFAN;
-typedef struct regnode_1 tregnode_REFFL;
-typedef struct regnode_1 tregnode_REFFLN;
-typedef struct regnode_1 tregnode_REFFN;
-typedef struct regnode_1 tregnode_REFFU;
-typedef struct regnode_1 tregnode_REFFUN;
-typedef struct regnode_1 tregnode_REFN;
+typedef struct regnode_2 tregnode_REF;
+typedef struct regnode_2 tregnode_REFF;
+typedef struct regnode_2 tregnode_REFFA;
+typedef struct regnode_2 tregnode_REFFAN;
+typedef struct regnode_2 tregnode_REFFL;
+typedef struct regnode_2 tregnode_REFFLN;
+typedef struct regnode_2 tregnode_REFFN;
+typedef struct regnode_2 tregnode_REFFU;
+typedef struct regnode_2 tregnode_REFFUN;
+typedef struct regnode_2 tregnode_REFN;
typedef struct regnode_p tregnode_REGEX_SET;
typedef struct regnode tregnode_REG_ANY;
typedef struct regnode_1 tregnode_RENUM;
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index d836525c15..d64bd1b94b 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -2590,10 +2590,10 @@ Starting parse and generation
<\g{c}> | 6| brnc
| | piec
| | atom
-<> | 8| tail~ OPEN1 'b' (4) -> REFN
- | | Setting close paren #1 to 8
- | 10| lsbr~ tying lastbr REFN (6) to ender CLOSE1 'b' (8) offset 2
- | | tail~ REFN (6) -> CLOSE
+<> | 9| tail~ OPEN1 'b' (4) -> REFN
+ | | Setting close paren #1 to 9
+ | 11| lsbr~ tying lastbr REFN <1> (6) to ender CLOSE1 'b' (9) offset 3
+ | | tail~ REFN <1> (6) -> CLOSE
Unmatched ( in regex; marked by <-- HERE in m/(?{a})( <-- HERE ?<b>\g{c}/ at - line 1.
Freeing REx: "(?{a})(?<b>\g{c}"
EOF_DEBUG_OUT
@@ -2618,35 +2618,35 @@ Starting parse and generation
<\g{c})(?<c>>...| 3| brnc
| | piec
| | atom
-<)(?<c>x)(?&b)> | 5| tail~ OPEN1 'b' (1) -> REFN
- | 7| lsbr~ tying lastbr REFN (3) to ender CLOSE1 'b' (5) offset 2
- | | tail~ REFN (3) -> CLOSE
+<)(?<c>x)(?&b)> | 6| tail~ OPEN1 'b' (1) -> REFN
+ | 8| lsbr~ tying lastbr REFN <1> (3) to ender CLOSE1 'b' (6) offset 3
+ | | tail~ REFN <1> (3) -> CLOSE
<(?<c>x)(?&b)> | | piec
| | atom
<?<c>x)(?&b)> | | reg
-<x)(?&b)> | 9| brnc
+<x)(?&b)> | 10| brnc
| | piec
| | atom
-<)(?&b)> | 11| tail~ OPEN2 'c' (7) -> EXACT
- | 13| lsbr~ tying lastbr EXACT <x> (9) to ender CLOSE2 'c' (11) offset 2
- | | tail~ EXACT <x> (9) -> CLOSE
+<)(?&b)> | 12| tail~ OPEN2 'c' (8) -> EXACT
+ | 14| lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2
+ | | tail~ EXACT <x> (10) -> CLOSE
<(?&b)> | | tail~ OPEN1 'b' (1)
- | | ~ REFN (3)
- | | ~ CLOSE1 'b' (5) -> OPEN
+ | | ~ REFN <1> (3)
+ | | ~ CLOSE1 'b' (6) -> OPEN
| | piec
| | atom
<?&b)> | | reg
-<> | 16| tail~ OPEN2 'c' (7)
- | | ~ EXACT <x> (9)
- | | ~ CLOSE2 'c' (11) -> GOSUB
- | 17| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (16) offset 15
+<> | 17| tail~ OPEN2 'c' (8)
+ | | ~ EXACT <x> (10)
+ | | ~ CLOSE2 'c' (12) -> GOSUB
+ | 18| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16
| | tail~ OPEN1 'b' (1)
- | | ~ REFN (3)
- | | ~ CLOSE1 'b' (5)
- | | ~ OPEN2 'c' (7)
- | | ~ EXACT <x> (9)
- | | ~ CLOSE2 'c' (11)
- | | ~ GOSUB1[+0:13] 'b' (13) -> END
+ | | ~ REFN <1> (3)
+ | | ~ CLOSE1 'b' (6)
+ | | ~ OPEN2 'c' (8)
+ | | ~ EXACT <x> (10)
+ | | ~ CLOSE2 'c' (12)
+ | | ~ GOSUB1[+0:14] 'b' (14) -> END
Need to redo parse
Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)"
Starting parse and generation
@@ -2658,36 +2658,36 @@ Starting parse and generation
<\g{c})(?<c>>...| 3| brnc
| | piec
| | atom
-<)(?<c>x)(?&b)> | 5| tail~ OPEN1 'b' (1) -> REFN
- | 7| lsbr~ tying lastbr REFN2 'c' (3) to ender CLOSE1 'b' (5) offset 2
- | | tail~ REFN2 'c' (3) -> CLOSE
+<)(?<c>x)(?&b)> | 6| tail~ OPEN1 'b' (1) -> REFN
+ | 8| lsbr~ tying lastbr REFN2 'c' <1> (3) to ender CLOSE1 'b' (6) offset 3
+ | | tail~ REFN2 'c' <1> (3) -> CLOSE
<(?<c>x)(?&b)> | | piec
| | atom
<?<c>x)(?&b)> | | reg
-<x)(?&b)> | 9| brnc
+<x)(?&b)> | 10| brnc
| | piec
| | atom
-<)(?&b)> | 11| tail~ OPEN2 'c' (7) -> EXACT
- | 13| lsbr~ tying lastbr EXACT <x> (9) to ender CLOSE2 'c' (11) offset 2
- | | tail~ EXACT <x> (9) -> CLOSE
+<)(?&b)> | 12| tail~ OPEN2 'c' (8) -> EXACT
+ | 14| lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2
+ | | tail~ EXACT <x> (10) -> CLOSE
<(?&b)> | | tail~ OPEN1 'b' (1)
- | | ~ REFN2 'c' (3)
- | | ~ CLOSE1 'b' (5) -> OPEN
+ | | ~ REFN2 'c' <1> (3)
+ | | ~ CLOSE1 'b' (6) -> OPEN
| | piec
| | atom
<?&b)> | | reg
-<> | 16| tail~ OPEN2 'c' (7)
- | | ~ EXACT <x> (9)
- | | ~ CLOSE2 'c' (11) -> GOSUB
- | 17| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (16) offset 15
+<> | 17| tail~ OPEN2 'c' (8)
+ | | ~ EXACT <x> (10)
+ | | ~ CLOSE2 'c' (12) -> GOSUB
+ | 18| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16
| | tail~ OPEN1 'b' (1)
- | | ~ REFN2 'c' (3)
- | | ~ CLOSE1 'b' (5)
- | | ~ OPEN2 'c' (7)
- | | ~ EXACT <x> (9)
- | | ~ CLOSE2 'c' (11)
- | | ~ GOSUB1[+0:13] 'b' (13) -> END
-Required size 16 nodes
+ | | ~ REFN2 'c' <1> (3)
+ | | ~ CLOSE1 'b' (6)
+ | | ~ OPEN2 'c' (8)
+ | | ~ EXACT <x> (10)
+ | | ~ CLOSE2 'c' (12)
+ | | ~ GOSUB1[+0:14] 'b' (14) -> END
+Required size 17 nodes
first at 3
Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)"
EOF_DEBUG_OUT