diff options
author | Yves Orton <demerphq@gmail.com> | 2023-01-15 20:31:14 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2023-03-13 21:26:08 +0800 |
commit | d78630f1ec4da716988da18d97cb6425388e696c (patch) | |
tree | 6ef6a501ea3219d2f493179de78e930b09ce2763 | |
parent | 17e3e02ad120eabda2bdb6c297a70d53294437ef (diff) | |
download | perl-d78630f1ec4da716988da18d97cb6425388e696c.tar.gz |
regcomp.c - extend REF to hold the paren it needs to regcppush
this way we can avoid pushing every buffer, we only need to push
the nestroot of the ref.
-rw-r--r-- | pod/perldebguts.pod | 20 | ||||
-rw-r--r-- | regcomp.c | 10 | ||||
-rw-r--r-- | regcomp.sym | 20 | ||||
-rw-r--r-- | regcomp_debug.c | 9 | ||||
-rw-r--r-- | regexec.c | 6 | ||||
-rw-r--r-- | regnodes.h | 20 | ||||
-rw-r--r-- | t/re/pat_advanced.t | 86 |
7 files changed, 88 insertions, 83 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod index 90f5fb66ae..52aef07396 100644 --- a/pod/perldebguts.pod +++ b/pod/perldebguts.pod @@ -773,25 +773,25 @@ will be lost. SROPEN none Same as OPEN, but for script run SRCLOSE none Close preceding SROPEN - REF num 1 Match some already matched string - REFF num 1 Match already matched string, using /di + REF num 2 Match some already matched string + REFF num 2 Match already matched string, using /di rules. - REFFL num 1 Match already matched string, using /li + REFFL num 2 Match already matched string, using /li rules. - REFFU num 1 Match already matched string, usng /ui. - REFFA num 1 Match already matched string, using /aai + REFFU num 2 Match already matched string, usng /ui. + REFFA num 2 Match already matched string, using /aai rules. # Named references. Code in regcomp.c assumes that these all are after # the numbered references - REFN no-sv 1 Match some already matched string - REFFN no-sv 1 Match already matched string, using /di + REFN no-sv 2 Match some already matched string + REFFN no-sv 2 Match already matched string, using /di rules. - REFFLN no-sv 1 Match already matched string, using /li + REFFLN no-sv 2 Match already matched string, using /li rules. - REFFUN num 1 Match already matched string, using /ui + REFFUN num 2 Match already matched string, using /ui rules. - REFFAN num 1 Match already matched string, using /aai + REFFAN num 2 Match already matched string, using /aai rules. # Support for long RE @@ -2754,7 +2754,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, SvREFCNT_inc_simple_void_NN(sv_dat); } RExC_sawback = 1; - ret = reg1node(pRExC_state, + ret = reg2node(pRExC_state, ((! FOLD) ? REFN : (ASCII_FOLD_RESTRICTED) @@ -2764,7 +2764,9 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, : (LOC) ? REFFLN : REFFN), - num); + num, RExC_nestroot); + if (RExC_nestroot && num >= (U32)RExC_nestroot) + REGNODE_p(ret)->flags = VOLATILE_REF; *flagp |= HASWIDTH; nextchar(pRExC_state); @@ -6028,7 +6030,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) } } RExC_sawback = 1; - ret = reg1node(pRExC_state, + ret = reg2node(pRExC_state, ((! FOLD) ? REF : (ASCII_FOLD_RESTRICTED) @@ -6038,7 +6040,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) : (LOC) ? REFFL : REFF), - num); + num, RExC_nestroot); if (RExC_nestroot && num >= RExC_nestroot) REGNODE_p(ret)->flags = VOLATILE_REF; if (OP(REGNODE_p(ret)) == REFF) { diff --git a/regcomp.sym b/regcomp.sym index 1c0af0cd53..2c0f4a0501 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -233,22 +233,22 @@ CLOSE CLOSE, num 1 ; Close corresponding OPEN of #n. SROPEN SROPEN, none ; Same as OPEN, but for script run SRCLOSE SRCLOSE, none ; Close preceding SROPEN -REF REF, num 1 V ; Match some already matched string -REFF REF, num 1 V ; Match already matched string, using /di rules. -REFFL REF, num 1 V ; Match already matched string, using /li rules. +REF REF, num 2 V ; Match some already matched string +REFF REF, num 2 V ; Match already matched string, using /di rules. +REFFL REF, num 2 V ; Match already matched string, using /li rules. # N?REFF[AU] could have been implemented using the FLAGS field of the # regnode, but by having a separate node type, we can use the existing switch # statement to avoid some tests -REFFU REF, num 1 V ; Match already matched string, usng /ui. -REFFA REF, num 1 V ; Match already matched string, using /aai rules. +REFFU REF, num 2 V ; Match already matched string, usng /ui. +REFFA REF, num 2 V ; Match already matched string, using /aai rules. #*Named references. Code in regcomp.c assumes that these all are after #*the numbered references -REFN REF, no-sv 1 V ; Match some already matched string -REFFN REF, no-sv 1 V ; Match already matched string, using /di rules. -REFFLN REF, no-sv 1 V ; Match already matched string, using /li rules. -REFFUN REF, num 1 V ; Match already matched string, using /ui rules. -REFFAN REF, num 1 V ; Match already matched string, using /aai rules. +REFN REF, no-sv 2 V ; Match some already matched string +REFFN REF, no-sv 2 V ; Match already matched string, using /di rules. +REFFLN REF, no-sv 2 V ; Match already matched string, using /li rules. +REFFUN REF, num 2 V ; Match already matched string, using /ui rules. +REFFAN REF, num 2 V ; Match already matched string, using /aai rules. #*Support for long RE LONGJMP LONGJMP, off 1 . 1 ; Jump far away. diff --git a/regcomp_debug.c b/regcomp_debug.c index d02dbfd4cb..8c5065bacd 100644 --- a/regcomp_debug.c +++ b/regcomp_debug.c @@ -489,9 +489,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ || k == GROUPP || op == ACCEPT) { AV *name_list= NULL; - U32 parno= (op == ACCEPT) ? (U32)ARG2i(o) : - (op == OPEN || op == CLOSE) ? (U32)PARNO(o) : - (U32)ARG1u(o); + U32 parno= (op == ACCEPT) ? ARG2u(o) : + (op == OPEN || op == CLOSE) ? PARNO(o) : + ARG1u(o); if ( RXp_PAREN_NAMES(prog) ) { name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]); } else if ( pRExC_state ) { @@ -542,6 +542,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ Perl_sv_catpvf(aTHX_ sv, "/%" UVuf, (UV)parno); /* Parenth number */ } + if ( k == REF ) { + Perl_sv_catpvf(aTHX_ sv, " <%" IVdf ">", (IV)ARG2i(o)); + } if ( k == REF && reginfo) { U32 n = ARG1u(o); /* which paren pair */ I32 ln = RXp_OFFS_START(prog,n); @@ -8068,7 +8068,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) } ref_yes: if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */ - ST.cp = regcppush(rex, 0, maxopenparen); + ST.cp = regcppush(rex, ARG2u(scan) - 1, maxopenparen); REGCP_SET(ST.lastcp); PUSH_STATE_GOTO(REF_next, next, locinput, loceol, script_run_begin); @@ -8105,7 +8105,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) U32 arg; case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */ - arg= (U32)ARG1u(scan); + arg = ARG1u(scan); if (cur_eval && cur_eval->locinput == locinput) { if ( ++nochange_depth > max_nochange_depth ) Perl_croak(aTHX_ @@ -8620,7 +8620,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) is_accepted = true; if (scan->flags) sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]); - utmp = (U32)ARG2i(scan); + utmp = ARG2u(scan); if ( utmp ) { regnode *cursor; diff --git a/regnodes.h b/regnodes.h index a675874378..c4c7ba6a45 100644 --- a/regnodes.h +++ b/regnodes.h @@ -94,16 +94,16 @@ typedef struct regnode tregnode_POSIXL; typedef struct regnode tregnode_POSIXU; typedef struct regnode_1 tregnode_PRUNE; typedef struct regnode tregnode_PSEUDO; -typedef struct regnode_1 tregnode_REF; -typedef struct regnode_1 tregnode_REFF; -typedef struct regnode_1 tregnode_REFFA; -typedef struct regnode_1 tregnode_REFFAN; -typedef struct regnode_1 tregnode_REFFL; -typedef struct regnode_1 tregnode_REFFLN; -typedef struct regnode_1 tregnode_REFFN; -typedef struct regnode_1 tregnode_REFFU; -typedef struct regnode_1 tregnode_REFFUN; -typedef struct regnode_1 tregnode_REFN; +typedef struct regnode_2 tregnode_REF; +typedef struct regnode_2 tregnode_REFF; +typedef struct regnode_2 tregnode_REFFA; +typedef struct regnode_2 tregnode_REFFAN; +typedef struct regnode_2 tregnode_REFFL; +typedef struct regnode_2 tregnode_REFFLN; +typedef struct regnode_2 tregnode_REFFN; +typedef struct regnode_2 tregnode_REFFU; +typedef struct regnode_2 tregnode_REFFUN; +typedef struct regnode_2 tregnode_REFN; typedef struct regnode_p tregnode_REGEX_SET; typedef struct regnode tregnode_REG_ANY; typedef struct regnode_1 tregnode_RENUM; diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index d836525c15..d64bd1b94b 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -2590,10 +2590,10 @@ Starting parse and generation <\g{c}> | 6| brnc | | piec | | atom -<> | 8| tail~ OPEN1 'b' (4) -> REFN - | | Setting close paren #1 to 8 - | 10| lsbr~ tying lastbr REFN (6) to ender CLOSE1 'b' (8) offset 2 - | | tail~ REFN (6) -> CLOSE +<> | 9| tail~ OPEN1 'b' (4) -> REFN + | | Setting close paren #1 to 9 + | 11| lsbr~ tying lastbr REFN <1> (6) to ender CLOSE1 'b' (9) offset 3 + | | tail~ REFN <1> (6) -> CLOSE Unmatched ( in regex; marked by <-- HERE in m/(?{a})( <-- HERE ?<b>\g{c}/ at - line 1. Freeing REx: "(?{a})(?<b>\g{c}" EOF_DEBUG_OUT @@ -2618,35 +2618,35 @@ Starting parse and generation <\g{c})(?<c>>...| 3| brnc | | piec | | atom -<)(?<c>x)(?&b)> | 5| tail~ OPEN1 'b' (1) -> REFN - | 7| lsbr~ tying lastbr REFN (3) to ender CLOSE1 'b' (5) offset 2 - | | tail~ REFN (3) -> CLOSE +<)(?<c>x)(?&b)> | 6| tail~ OPEN1 'b' (1) -> REFN + | 8| lsbr~ tying lastbr REFN <1> (3) to ender CLOSE1 'b' (6) offset 3 + | | tail~ REFN <1> (3) -> CLOSE <(?<c>x)(?&b)> | | piec | | atom <?<c>x)(?&b)> | | reg -<x)(?&b)> | 9| brnc +<x)(?&b)> | 10| brnc | | piec | | atom -<)(?&b)> | 11| tail~ OPEN2 'c' (7) -> EXACT - | 13| lsbr~ tying lastbr EXACT <x> (9) to ender CLOSE2 'c' (11) offset 2 - | | tail~ EXACT <x> (9) -> CLOSE +<)(?&b)> | 12| tail~ OPEN2 'c' (8) -> EXACT + | 14| lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2 + | | tail~ EXACT <x> (10) -> CLOSE <(?&b)> | | tail~ OPEN1 'b' (1) - | | ~ REFN (3) - | | ~ CLOSE1 'b' (5) -> OPEN + | | ~ REFN <1> (3) + | | ~ CLOSE1 'b' (6) -> OPEN | | piec | | atom <?&b)> | | reg -<> | 16| tail~ OPEN2 'c' (7) - | | ~ EXACT <x> (9) - | | ~ CLOSE2 'c' (11) -> GOSUB - | 17| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (16) offset 15 +<> | 17| tail~ OPEN2 'c' (8) + | | ~ EXACT <x> (10) + | | ~ CLOSE2 'c' (12) -> GOSUB + | 18| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16 | | tail~ OPEN1 'b' (1) - | | ~ REFN (3) - | | ~ CLOSE1 'b' (5) - | | ~ OPEN2 'c' (7) - | | ~ EXACT <x> (9) - | | ~ CLOSE2 'c' (11) - | | ~ GOSUB1[+0:13] 'b' (13) -> END + | | ~ REFN <1> (3) + | | ~ CLOSE1 'b' (6) + | | ~ OPEN2 'c' (8) + | | ~ EXACT <x> (10) + | | ~ CLOSE2 'c' (12) + | | ~ GOSUB1[+0:14] 'b' (14) -> END Need to redo parse Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)" Starting parse and generation @@ -2658,36 +2658,36 @@ Starting parse and generation <\g{c})(?<c>>...| 3| brnc | | piec | | atom -<)(?<c>x)(?&b)> | 5| tail~ OPEN1 'b' (1) -> REFN - | 7| lsbr~ tying lastbr REFN2 'c' (3) to ender CLOSE1 'b' (5) offset 2 - | | tail~ REFN2 'c' (3) -> CLOSE +<)(?<c>x)(?&b)> | 6| tail~ OPEN1 'b' (1) -> REFN + | 8| lsbr~ tying lastbr REFN2 'c' <1> (3) to ender CLOSE1 'b' (6) offset 3 + | | tail~ REFN2 'c' <1> (3) -> CLOSE <(?<c>x)(?&b)> | | piec | | atom <?<c>x)(?&b)> | | reg -<x)(?&b)> | 9| brnc +<x)(?&b)> | 10| brnc | | piec | | atom -<)(?&b)> | 11| tail~ OPEN2 'c' (7) -> EXACT - | 13| lsbr~ tying lastbr EXACT <x> (9) to ender CLOSE2 'c' (11) offset 2 - | | tail~ EXACT <x> (9) -> CLOSE +<)(?&b)> | 12| tail~ OPEN2 'c' (8) -> EXACT + | 14| lsbr~ tying lastbr EXACT <x> (10) to ender CLOSE2 'c' (12) offset 2 + | | tail~ EXACT <x> (10) -> CLOSE <(?&b)> | | tail~ OPEN1 'b' (1) - | | ~ REFN2 'c' (3) - | | ~ CLOSE1 'b' (5) -> OPEN + | | ~ REFN2 'c' <1> (3) + | | ~ CLOSE1 'b' (6) -> OPEN | | piec | | atom <?&b)> | | reg -<> | 16| tail~ OPEN2 'c' (7) - | | ~ EXACT <x> (9) - | | ~ CLOSE2 'c' (11) -> GOSUB - | 17| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (16) offset 15 +<> | 17| tail~ OPEN2 'c' (8) + | | ~ EXACT <x> (10) + | | ~ CLOSE2 'c' (12) -> GOSUB + | 18| lsbr~ tying lastbr OPEN1 'b' (1) to ender END (17) offset 16 | | tail~ OPEN1 'b' (1) - | | ~ REFN2 'c' (3) - | | ~ CLOSE1 'b' (5) - | | ~ OPEN2 'c' (7) - | | ~ EXACT <x> (9) - | | ~ CLOSE2 'c' (11) - | | ~ GOSUB1[+0:13] 'b' (13) -> END -Required size 16 nodes + | | ~ REFN2 'c' <1> (3) + | | ~ CLOSE1 'b' (6) + | | ~ OPEN2 'c' (8) + | | ~ EXACT <x> (10) + | | ~ CLOSE2 'c' (12) + | | ~ GOSUB1[+0:14] 'b' (14) -> END +Required size 17 nodes first at 3 Freeing REx: "(?<b>\g{c})(?<c>x)(?&b)" EOF_DEBUG_OUT |