summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c2
-rw-r--r--regcomp.h8
-rw-r--r--regcomp.sym1
-rw-r--r--regcomp_internal.h2
-rw-r--r--regexec.c78
-rw-r--r--regexp.h6
-rw-r--r--regnodes.h34
7 files changed, 108 insertions, 23 deletions
diff --git a/regcomp.c b/regcomp.c
index 61ec8c211f..bf9825941f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6039,6 +6039,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
? REFFL
: REFF),
num);
+ if (RExC_nestroot && num >= RExC_nestroot)
+ REGNODE_p(ret)->flags = VOLATILE_REF;
if (OP(REGNODE_p(ret)) == REFF) {
RExC_seen_d_op = TRUE;
}
diff --git a/regcomp.h b/regcomp.h
index 4a4ed52219..515ad7d7bd 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -13,6 +13,12 @@
#define PERL_REGCOMP_H_
+/* define this to 1 if you want to enable a really aggressive and inefficient
+ * paren cleanup during backtracking. We should pass test with this as 0. */
+#ifndef RE_PESSIMISTIC_PARENS
+#define RE_PESSIMISTIC_PARENS 0
+#endif
+
#include "regcharclass.h"
/* Convert branch sequences to more efficient trie ops? */
@@ -1483,6 +1489,8 @@ typedef enum {
#define EVAL_OPTIMISTIC_FLAG 128
#define EVAL_FLAGS_MASK (EVAL_OPTIMISTIC_FLAG-1)
+
+
#endif /* PERL_REGCOMP_H_ */
/*
diff --git a/regcomp.sym b/regcomp.sym
index e01844f9b0..d58f1cb54f 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -344,3 +344,4 @@ MARKPOINT next:FAIL
SKIP next:FAIL
CUTGROUP next:FAIL
KEEPS next:FAIL
+REF next:FAIL
diff --git a/regcomp_internal.h b/regcomp_internal.h
index c273d2f70f..ff428bce33 100644
--- a/regcomp_internal.h
+++ b/regcomp_internal.h
@@ -1258,4 +1258,6 @@ static const scan_data_t zero_scan_data = {
#define REGNODE_STEP_OVER(ret,t1,t2) \
NEXT_OFF(REGNODE_p(ret)) = ((sizeof(t1)+sizeof(t2))/sizeof(regnode))
+#define VOLATILE_REF 1
+
#endif /* REGCOMP_INTERNAL_H */
diff --git a/regexec.c b/regexec.c
index 116d150865..dcb8875b46 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6841,8 +6841,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
case TRIE_next_fail: /* we failed - try next alternative */
{
U8 *uc;
- REGCP_UNWIND(ST.lastcp);
- regcppop(rex,&maxopenparen);
+ if (RE_PESSIMISTIC_PARENS) {
+ REGCP_UNWIND(ST.lastcp);
+ regcppop(rex,&maxopenparen);
+ }
if ( ST.jump ) {
/* undo any captures done in the tail part of a branch,
* e.g.
@@ -6965,8 +6967,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
});
if ( ST.accepted > 1 || has_cutgroup || ST.jump ) {
- (void)regcppush(rex, 0, maxopenparen);
- REGCP_SET(ST.lastcp);
+ if (RE_PESSIMISTIC_PARENS) {
+ (void)regcppush(rex, 0, maxopenparen);
+ REGCP_SET(ST.lastcp);
+ }
PUSH_STATE_GOTO(TRIE_next, scan, (char*)uc, loceol,
script_run_begin);
NOT_REACHED; /* NOTREACHED */
@@ -7983,6 +7987,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
utf8_fold_flags = 0;
goto do_ref;
+#undef ST
+#define ST st->u.backref
case REF: /* /\1/ */
folder = NULL;
fold_array = NULL;
@@ -8018,8 +8024,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
endref = rex->offs[n].end;
if (ln == -1 || endref == -1)
sayNO; /* Do not match unless seen CLOSEn. */
+
if (ln == endref)
- break;
+ goto ref_yes;
s = reginfo->strbeg + ln;
if (type != REF /* REF can do byte comparison */
@@ -8038,7 +8045,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
sayNO;
}
locinput = limit;
- break;
+ goto ref_yes;
}
/* Not utf8: Inline the first character, for speed. */
@@ -8058,8 +8065,26 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
: ! folder(aTHX_ locinput, s, ln)))
sayNO;
locinput += ln;
- break;
}
+ ref_yes:
+ if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */
+ ST.cp = regcppush(rex, 0, maxopenparen);
+ REGCP_SET(ST.lastcp);
+ PUSH_STATE_GOTO(REF_next, next, locinput, loceol,
+ script_run_begin);
+ }
+ break;
+ NOT_REACHED; /* NOTREACHED */
+
+ case REF_next:
+ sayYES;
+ break;
+
+ case REF_next_fail:
+ REGCP_UNWIND(ST.lastcp);
+ regcppop(rex, &maxopenparen);
+ sayNO;
+ break;
case NOTHING: /* null op; e.g. the 'nothing' following
* the '*' in m{(a+|b)*}' */
@@ -9005,8 +9030,7 @@ NULL
);
/* Try grabbing another A and see if it helps. */
cur_curlyx->u.curlyx.lastloc = locinput;
- ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
- maxopenparen);
+ ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, maxopenparen);
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(WHILEM_A_min,
/*A*/ REGNODE_AFTER(ST.save_curlyx->u.curlyx.me),
@@ -9035,8 +9059,10 @@ NULL
ST.lastcloseparen = rex->lastcloseparen;
ST.next_branch = next;
REGCP_SET(ST.cp);
- regcppush(rex, 0, maxopenparen);
- REGCP_SET(ST.lastcp);
+ if (RE_PESSIMISTIC_PARENS) {
+ regcppush(rex, 0, maxopenparen);
+ REGCP_SET(ST.lastcp);
+ }
/* Now go into the branch */
if (has_cutgroup) {
@@ -9073,8 +9099,10 @@ NULL
do_cutgroup = 0;
no_final = 0;
}
- REGCP_UNWIND(ST.lastcp);
- regcppop(rex,&maxopenparen);
+ if (RE_PESSIMISTIC_PARENS) {
+ REGCP_UNWIND(ST.lastcp);
+ regcppop(rex,&maxopenparen);
+ }
REGCP_UNWIND(ST.cp);
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
CAPTURE_CLEAR(ST.before_paren+1,ST.after_paren,"BRANCH_next_fail");
@@ -9439,8 +9467,10 @@ NULL
case CURLY_B_min_fail:
/* failed to find B in a non-greedy match. */
- REGCP_UNWIND(ST.lastcp);
- regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
+ if (RE_PESSIMISTIC_PARENS) {
+ REGCP_UNWIND(ST.lastcp);
+ regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
+ }
REGCP_UNWIND(ST.cp);
if (ST.paren) {
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
@@ -9553,8 +9583,10 @@ NULL
}
curly_try_B_min:
- (void)regcppush(rex, 0, maxopenparen);
- REGCP_SET(ST.lastcp);
+ if (RE_PESSIMISTIC_PARENS) {
+ (void)regcppush(rex, 0, maxopenparen);
+ REGCP_SET(ST.lastcp);
+ }
CURLY_SETPAREN(ST.paren, ST.count);
PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol,
script_run_begin);
@@ -9568,8 +9600,10 @@ NULL
&& locinput + ST.Binfo.min_length <= loceol
&& S_test_EXACTISH_ST(locinput, ST.Binfo)))
{
- (void)regcppush(rex, 0, maxopenparen);
- REGCP_SET(ST.lastcp);
+ if (RE_PESSIMISTIC_PARENS) {
+ (void)regcppush(rex, 0, maxopenparen);
+ REGCP_SET(ST.lastcp);
+ }
CURLY_SETPAREN(ST.paren, ST.count);
PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol,
script_run_begin);
@@ -9581,8 +9615,10 @@ NULL
case CURLY_B_max_fail:
/* failed to find B in a greedy match */
- REGCP_UNWIND(ST.lastcp);
- regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
+ if (RE_PESSIMISTIC_PARENS) {
+ REGCP_UNWIND(ST.lastcp);
+ regcppop(rex, &maxopenparen); /* Restore some previous $<digit>s? */
+ }
CURLY_B_all_failed:
REGCP_UNWIND(ST.cp);
if (ST.paren) {
diff --git a/regexp.h b/regexp.h
index 93459573e4..86b11e94f0 100644
--- a/regexp.h
+++ b/regexp.h
@@ -958,7 +958,7 @@ typedef struct regmatch_state {
struct {
U32 paren;
CHECKPOINT cp;
- CHECKPOINT lastcp; /* remember current savestack index */
+ CHECKPOINT lastcp; /* remember current savestack index */
U32 lastparen;
U32 lastcloseparen;
char *maxpos; /* highest possible point in string to match */
@@ -969,6 +969,10 @@ typedef struct regmatch_state {
struct next_matchable_info Binfo;
} curly; /* and CURLYN/PLUS/STAR */
+ struct {
+ CHECKPOINT cp;
+ CHECKPOINT lastcp;
+ } backref; /* REF and friends */
} u;
} regmatch_state;
diff --git a/regnodes.h b/regnodes.h
index 321a841989..77e8e66c0a 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -128,7 +128,7 @@ typedef struct regnode tregnode_WHILEM;
/* Regops and State definitions */
#define REGNODE_MAX 111
-#define REGMATCH_STATE_MAX 151
+#define REGMATCH_STATE_MAX 153
/* -- For regexec.c to switch on target being utf8 (t8) or not (tb, b='byte'); */
#define with_t_UTF8ness(op, t_utf8) (((op) << 1) + (cBOOL(t_utf8)))
@@ -1573,6 +1573,22 @@ typedef struct regnode tregnode_WHILEM;
#define KEEPS_next_fail_t8_pb 606 /* 0x25e */
#define KEEPS_next_fail_t8_p8 607 /* 0x25f */
+#define REF_next 152 /* 0x98 state for REF */
+#define REF_next_tb 304 /* 0x130 */
+#define REF_next_t8 305 /* 0x131 */
+#define REF_next_tb_pb 608 /* 0x260 */
+#define REF_next_tb_p8 609 /* 0x261 */
+#define REF_next_t8_pb 610 /* 0x262 */
+#define REF_next_t8_p8 611 /* 0x263 */
+
+#define REF_next_fail 153 /* 0x99 state for REF */
+#define REF_next_fail_tb 306 /* 0x132 */
+#define REF_next_fail_t8 307 /* 0x133 */
+#define REF_next_fail_tb_pb 612 /* 0x264 */
+#define REF_next_fail_tb_p8 613 /* 0x265 */
+#define REF_next_fail_t8_pb 614 /* 0x266 */
+#define REF_next_fail_t8_p8 615 /* 0x267 */
+
/* PL_regnode_name[] - Opcode/state names in string form, for debugging */
@@ -1733,6 +1749,8 @@ EXTCONST char * const PL_regnode_name[] = {
"CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */
"KEEPS_next", /* REGNODE_MAX +0x27 */
"KEEPS_next_fail", /* REGNODE_MAX +0x28 */
+ "REF_next", /* REGNODE_MAX +0x29 */
+ "REF_next_fail", /* REGNODE_MAX +0x2a */
};
#endif /* DOINIT */
@@ -2806,6 +2824,20 @@ EXTCONST struct regnode_meta PL_regnode_info[] = {
.arg_len = 0,
.arg_len_varies = 0,
.off_by_arg = 0
+ },
+ {
+ /* #152 state REF_next */
+ .type = REF,
+ .arg_len = 0,
+ .arg_len_varies = 0,
+ .off_by_arg = 0
+ },
+ {
+ /* #153 state REF_next_fail */
+ .type = REF,
+ .arg_len = 0,
+ .arg_len_varies = 0,
+ .off_by_arg = 0
}
};
#endif /* DOINIT */