diff options
-rw-r--r-- | regcomp.c | 2 | ||||
-rw-r--r-- | regcomp.h | 8 | ||||
-rw-r--r-- | regcomp.sym | 1 | ||||
-rw-r--r-- | regcomp_internal.h | 2 | ||||
-rw-r--r-- | regexec.c | 78 | ||||
-rw-r--r-- | regexp.h | 6 | ||||
-rw-r--r-- | regnodes.h | 34 |
7 files changed, 108 insertions, 23 deletions
@@ -6039,6 +6039,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) ? REFFL : REFF), num); + if (RExC_nestroot && num >= RExC_nestroot) + REGNODE_p(ret)->flags = VOLATILE_REF; if (OP(REGNODE_p(ret)) == REFF) { RExC_seen_d_op = TRUE; } @@ -13,6 +13,12 @@ #define PERL_REGCOMP_H_ +/* define this to 1 if you want to enable a really aggressive and inefficient + * paren cleanup during backtracking. We should pass test with this as 0. */ +#ifndef RE_PESSIMISTIC_PARENS +#define RE_PESSIMISTIC_PARENS 0 +#endif + #include "regcharclass.h" /* Convert branch sequences to more efficient trie ops? */ @@ -1483,6 +1489,8 @@ typedef enum { #define EVAL_OPTIMISTIC_FLAG 128 #define EVAL_FLAGS_MASK (EVAL_OPTIMISTIC_FLAG-1) + + #endif /* PERL_REGCOMP_H_ */ /* diff --git a/regcomp.sym b/regcomp.sym index e01844f9b0..d58f1cb54f 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -344,3 +344,4 @@ MARKPOINT next:FAIL SKIP next:FAIL CUTGROUP next:FAIL KEEPS next:FAIL +REF next:FAIL diff --git a/regcomp_internal.h b/regcomp_internal.h index c273d2f70f..ff428bce33 100644 --- a/regcomp_internal.h +++ b/regcomp_internal.h @@ -1258,4 +1258,6 @@ static const scan_data_t zero_scan_data = { #define REGNODE_STEP_OVER(ret,t1,t2) \ NEXT_OFF(REGNODE_p(ret)) = ((sizeof(t1)+sizeof(t2))/sizeof(regnode)) +#define VOLATILE_REF 1 + #endif /* REGCOMP_INTERNAL_H */ @@ -6841,8 +6841,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) case TRIE_next_fail: /* we failed - try next alternative */ { U8 *uc; - REGCP_UNWIND(ST.lastcp); - regcppop(rex,&maxopenparen); + if (RE_PESSIMISTIC_PARENS) { + REGCP_UNWIND(ST.lastcp); + regcppop(rex,&maxopenparen); + } if ( ST.jump ) { /* undo any captures done in the tail part of a branch, * e.g. @@ -6965,8 +6967,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) }); if ( ST.accepted > 1 || has_cutgroup || ST.jump ) { - (void)regcppush(rex, 0, maxopenparen); - REGCP_SET(ST.lastcp); + if (RE_PESSIMISTIC_PARENS) { + (void)regcppush(rex, 0, maxopenparen); + REGCP_SET(ST.lastcp); + } PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol, script_run_begin); NOT_REACHED; /* NOTREACHED */ @@ -7983,6 +7987,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) utf8_fold_flags = 0; goto do_ref; +#undef ST +#define ST st->u.backref case REF: /* /\1/ */ folder = NULL; fold_array = NULL; @@ -8018,8 +8024,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) endref = rex->offs[n].end; if (ln == -1 || endref == -1) sayNO; /* Do not match unless seen CLOSEn. */ + if (ln == endref) - break; + goto ref_yes; s = reginfo->strbeg + ln; if (type != REF /* REF can do byte comparison */ @@ -8038,7 +8045,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) sayNO; } locinput = limit; - break; + goto ref_yes; } /* Not utf8: Inline the first character, for speed. */ @@ -8058,8 +8065,26 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) : ! folder(aTHX_ locinput, s, ln))) sayNO; locinput += ln; - break; } + ref_yes: + if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */ + ST.cp = regcppush(rex, 0, maxopenparen); + REGCP_SET(ST.lastcp); + PUSH_STATE_GOTO(REF_next, next, locinput, loceol, + script_run_begin); + } + break; + NOT_REACHED; /* NOTREACHED */ + + case REF_next: + sayYES; + break; + + case REF_next_fail: + REGCP_UNWIND(ST.lastcp); + regcppop(rex, &maxopenparen); + sayNO; + break; case NOTHING: /* null op; e.g. the 'nothing' following * the '*' in m{(a+|b)*}' */ @@ -9005,8 +9030,7 @@ NULL ); /* Try grabbing another A and see if it helps. */ cur_curlyx->u.curlyx.lastloc = locinput; - ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, - maxopenparen); + ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, maxopenparen); REGCP_SET(ST.lastcp); PUSH_STATE_GOTO(WHILEM_A_min, /*A*/ REGNODE_AFTER(ST.save_curlyx->u.curlyx.me), @@ -9035,8 +9059,10 @@ NULL ST.lastcloseparen = rex->lastcloseparen; ST.next_branch = next; REGCP_SET(ST.cp); - regcppush(rex, 0, maxopenparen); - REGCP_SET(ST.lastcp); + if (RE_PESSIMISTIC_PARENS) { + regcppush(rex, 0, maxopenparen); + REGCP_SET(ST.lastcp); + } /* Now go into the branch */ if (has_cutgroup) { @@ -9073,8 +9099,10 @@ NULL do_cutgroup = 0; no_final = 0; } - REGCP_UNWIND(ST.lastcp); - regcppop(rex,&maxopenparen); + if (RE_PESSIMISTIC_PARENS) { + REGCP_UNWIND(ST.lastcp); + regcppop(rex,&maxopenparen); + } REGCP_UNWIND(ST.cp); UNWIND_PAREN(ST.lastparen, ST.lastcloseparen); CAPTURE_CLEAR(ST.before_paren+1,ST.after_paren,"BRANCH_next_fail"); @@ -9439,8 +9467,10 @@ NULL case CURLY_B_min_fail: /* failed to find B in a non-greedy match. */ - REGCP_UNWIND(ST.lastcp); - regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */ + if (RE_PESSIMISTIC_PARENS) { + REGCP_UNWIND(ST.lastcp); + regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */ + } REGCP_UNWIND(ST.cp); if (ST.paren) { UNWIND_PAREN(ST.lastparen, ST.lastcloseparen); @@ -9553,8 +9583,10 @@ NULL } curly_try_B_min: - (void)regcppush(rex, 0, maxopenparen); - REGCP_SET(ST.lastcp); + if (RE_PESSIMISTIC_PARENS) { + (void)regcppush(rex, 0, maxopenparen); + REGCP_SET(ST.lastcp); + } CURLY_SETPAREN(ST.paren, ST.count); PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol, script_run_begin); @@ -9568,8 +9600,10 @@ NULL && locinput + ST.Binfo.min_length <= loceol && S_test_EXACTISH_ST(locinput, ST.Binfo))) { - (void)regcppush(rex, 0, maxopenparen); - REGCP_SET(ST.lastcp); + if (RE_PESSIMISTIC_PARENS) { + (void)regcppush(rex, 0, maxopenparen); + REGCP_SET(ST.lastcp); + } CURLY_SETPAREN(ST.paren, ST.count); PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol, script_run_begin); @@ -9581,8 +9615,10 @@ NULL case CURLY_B_max_fail: /* failed to find B in a greedy match */ - REGCP_UNWIND(ST.lastcp); - regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */ + if (RE_PESSIMISTIC_PARENS) { + REGCP_UNWIND(ST.lastcp); + regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */ + } CURLY_B_all_failed: REGCP_UNWIND(ST.cp); if (ST.paren) { @@ -958,7 +958,7 @@ typedef struct regmatch_state { struct { U32 paren; CHECKPOINT cp; - CHECKPOINT lastcp; /* remember current savestack index */ + CHECKPOINT lastcp; /* remember current savestack index */ U32 lastparen; U32 lastcloseparen; char *maxpos; /* highest possible point in string to match */ @@ -969,6 +969,10 @@ typedef struct regmatch_state { struct next_matchable_info Binfo; } curly; /* and CURLYN/PLUS/STAR */ + struct { + CHECKPOINT cp; + CHECKPOINT lastcp; + } backref; /* REF and friends */ } u; } regmatch_state; diff --git a/regnodes.h b/regnodes.h index 321a841989..77e8e66c0a 100644 --- a/regnodes.h +++ b/regnodes.h @@ -128,7 +128,7 @@ typedef struct regnode tregnode_WHILEM; /* Regops and State definitions */ #define REGNODE_MAX 111 -#define REGMATCH_STATE_MAX 151 +#define REGMATCH_STATE_MAX 153 /* -- For regexec.c to switch on target being utf8 (t8) or not (tb, b='byte'); */ #define with_t_UTF8ness(op, t_utf8) (((op) << 1) + (cBOOL(t_utf8))) @@ -1573,6 +1573,22 @@ typedef struct regnode tregnode_WHILEM; #define KEEPS_next_fail_t8_pb 606 /* 0x25e */ #define KEEPS_next_fail_t8_p8 607 /* 0x25f */ +#define REF_next 152 /* 0x98 state for REF */ +#define REF_next_tb 304 /* 0x130 */ +#define REF_next_t8 305 /* 0x131 */ +#define REF_next_tb_pb 608 /* 0x260 */ +#define REF_next_tb_p8 609 /* 0x261 */ +#define REF_next_t8_pb 610 /* 0x262 */ +#define REF_next_t8_p8 611 /* 0x263 */ + +#define REF_next_fail 153 /* 0x99 state for REF */ +#define REF_next_fail_tb 306 /* 0x132 */ +#define REF_next_fail_t8 307 /* 0x133 */ +#define REF_next_fail_tb_pb 612 /* 0x264 */ +#define REF_next_fail_tb_p8 613 /* 0x265 */ +#define REF_next_fail_t8_pb 614 /* 0x266 */ +#define REF_next_fail_t8_p8 615 /* 0x267 */ + /* PL_regnode_name[] - Opcode/state names in string form, for debugging */ @@ -1733,6 +1749,8 @@ EXTCONST char * const PL_regnode_name[] = { "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */ "KEEPS_next", /* REGNODE_MAX +0x27 */ "KEEPS_next_fail", /* REGNODE_MAX +0x28 */ + "REF_next", /* REGNODE_MAX +0x29 */ + "REF_next_fail", /* REGNODE_MAX +0x2a */ }; #endif /* DOINIT */ @@ -2806,6 +2824,20 @@ EXTCONST struct regnode_meta PL_regnode_info[] = { .arg_len = 0, .arg_len_varies = 0, .off_by_arg = 0 + }, + { + /* #152 state REF_next */ + .type = REF, + .arg_len = 0, + .arg_len_varies = 0, + .off_by_arg = 0 + }, + { + /* #153 state REF_next_fail */ + .type = REF, + .arg_len = 0, + .arg_len_varies = 0, + .off_by_arg = 0 } }; #endif /* DOINIT */ |