diff options
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | pcre_exec.c | 24 | ||||
-rw-r--r-- | pcre_internal.h | 3 | ||||
-rw-r--r-- | testdata/testinput1 | 12 | ||||
-rw-r--r-- | testdata/testoutput1 | 18 |
5 files changed, 50 insertions, 10 deletions
@@ -101,6 +101,9 @@ Version 8.33 xx-xxxx-201x 26. Fix infinite loop when /(?<=(*SKIP)ac)a/ is matched against aa. +27. Fix the case where there are two or more SKIPs with arguments that may be + ignored. + Version 8.32 30-November-2012 ----------------------------- diff --git a/pcre_exec.c b/pcre_exec.c index b86ffdf..2118ced 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -821,11 +821,16 @@ for (;;) RRETURN(MATCH_SKIP); /* Note that, for Perl compatibility, SKIP with an argument does NOT set - nomatch_mark. There is a flag that disables this opcode when re-matching a - pattern that ended with a SKIP for which there was not a matching MARK. */ + nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was + not a matching mark, we have to re-run the match, ignoring the SKIP_ARG + that failed and any that preceed it (either they also failed, or were not + triggered). To do this, we maintain a count of executed SKIP_ARGs. If a + SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg + set to the count of the one that failed. */ case OP_SKIP_ARG: - if (md->ignore_skip_arg) + md->skip_arg_count++; + if (md->skip_arg_count <= md->ignore_skip_arg) { ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; break; @@ -834,11 +839,11 @@ for (;;) eptrb, RM57); if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) RRETURN(rrc); - + /* Pass back the current skip name by overloading md->start_match_ptr and returning the special MATCH_SKIP_ARG return code. This will either be caught by a matching MARK, or get to the top, where it causes a rematch - with the md->ignore_skip_arg flag set. */ + with md->ignore_skip_arg set to the value of md->skip_arg_count. */ md->start_match_ptr = ecode + 2; RRETURN(MATCH_SKIP_ARG); @@ -6516,7 +6521,7 @@ end_subject = md->end_subject; md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; md->use_ucp = (re->options & PCRE_UCP) != 0; md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; -md->ignore_skip_arg = FALSE; +md->ignore_skip_arg = 0; /* Some options are unpacked into BOOL variables in the hope that testing them will be faster than individual option bits. */ @@ -6898,6 +6903,7 @@ for(;;) md->match_call_count = 0; md->match_function_type = 0; md->end_offset_top = 0; + md->skip_arg_count = 0; rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0); if (md->hitend && start_partial == NULL) { @@ -6916,7 +6922,7 @@ for(;;) case MATCH_SKIP_ARG: new_start_match = start_match; - md->ignore_skip_arg = TRUE; + md->ignore_skip_arg = md->skip_arg_count; break; /* SKIP passes back the next starting point explicitly, but if it is no @@ -6931,12 +6937,12 @@ for(;;) /* Fall through */ /* NOMATCH and PRUNE advance by one character. THEN at this level acts - exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */ + exactly like PRUNE. Unset ignore SKIP-with-argument. */ case MATCH_NOMATCH: case MATCH_PRUNE: case MATCH_THEN: - md->ignore_skip_arg = FALSE; + md->ignore_skip_arg = 0; new_start_match = start_match + 1; #ifdef SUPPORT_UTF if (utf) diff --git a/pcre_internal.h b/pcre_internal.h index 31ecbc4..d9e0c60 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -2469,6 +2469,8 @@ typedef struct match_data { int nllen; /* Newline string length */ int name_count; /* Number of names in name table */ int name_entry_size; /* Size of entry in names table */ + unsigned int skip_arg_count; /* For counting SKIP_ARGs */ + unsigned int ignore_skip_arg; /* For re-run when SKIP arg name not found */ pcre_uchar *name_table; /* Table of names */ pcre_uchar nl[4]; /* Newline string when fixed */ const pcre_uint8 *lcc; /* Points to lower casing table */ @@ -2485,7 +2487,6 @@ typedef struct match_data { BOOL hitend; /* Hit the end of the subject at some point */ BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ BOOL hasthen; /* Pattern contains (*THEN) */ - BOOL ignore_skip_arg; /* For re-run when SKIP name not found */ const pcre_uchar *start_code; /* For use when recursing */ PCRE_PUCHAR start_subject; /* Start of the subject string */ PCRE_PUCHAR end_subject; /* End of the subject string */ diff --git a/testdata/testinput1 b/testdata/testinput1 index 6e19ca6..dcecc6a 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5315,4 +5315,16 @@ name were given. ---/ /(?<=(*SKIP)ac)a/ aa +/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/xK + AAAC + +/a(*SKIP:m)x|ac(*:n)(*SKIP:n)d|ac/K + acacd + +/A(*SKIP:m)x|A(*SKIP:n)x|AB/K + AB + +/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/K + acacd + /-- End of testinput1 --/ diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 1cb6264..c2fab0d 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -8811,4 +8811,22 @@ No match aa No match +/A(*MARK:A)A+(*SKIP:B)(B|Z) | AC/xK + AAAC + 0: AC + +/a(*SKIP:m)x|ac(*:n)(*SKIP:n)d|ac/K + acacd + 0: acd +MK: n + +/A(*SKIP:m)x|A(*SKIP:n)x|AB/K + AB + 0: AB + +/((*SKIP:r)d){0}a(*SKIP:m)x|ac(*:n)|ac/K + acacd + 0: ac +MK: n + /-- End of testinput1 --/ |