diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-03-19 16:29:12 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-03-19 16:29:12 +0000 |
commit | 71b68580ed5c7d81eaa00be59d860272eabfe3d3 (patch) | |
tree | 6c12edddaa4f3d083ccf0ef7070b77a6f46cae93 /pcre_exec.c | |
parent | 536545ea7dd23c619f577c193a19108327f7dfb6 (diff) | |
download | pcre-71b68580ed5c7d81eaa00be59d860272eabfe3d3.tar.gz |
Code changes for simpler backtracking handling (docs to follow).
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1296 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_exec.c')
-rw-r--r-- | pcre_exec.c | 67 |
1 files changed, 46 insertions, 21 deletions
diff --git a/pcre_exec.c b/pcre_exec.c index 877e3af..bcc2c63 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -1603,6 +1603,8 @@ for (;;) } else condassert = FALSE; + /* Loop for each branch */ + do { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4); @@ -1613,18 +1615,28 @@ for (;;) } md->mark = save_mark; - /* A COMMIT failure must fail the entire assertion, without trying any - subsequent branches. */ - - if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH); + /* See comment in the code for capturing groups above about handling + THEN. */ - /* PCRE does not allow THEN to escape beyond an assertion; it - is treated as NOMATCH. */ + if (rrc == MATCH_THEN) + { + next = ecode + GET(ecode,1); + if (md->start_match_ptr < next && + (*ecode == OP_ALT || *next == OP_ALT)) + rrc = MATCH_NOMATCH; + } + + /* Anything other than NOMATCH causes the assertion to fail. This + includes COMMIT, SKIP, and PRUNE. However, this consistent approach does + not always have exactly the same effect as in Perl. */ - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += GET(ecode, 1); } while (*ecode == OP_ALT); + + /* If we have tried all the alternative branches, the assertion has + failed. */ if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); @@ -1632,17 +1644,16 @@ for (;;) if (condassert) RRETURN(MATCH_MATCH); - /* Continue from after the assertion, updating the offsets high water - mark, since extracts may have been taken during the assertion. */ + /* Continue from after a successful assertion, updating the offsets high + water mark, since extracts may have been taken during the assertion. */ do ecode += GET(ecode,1); while (*ecode == OP_ALT); ecode += 1 + LINK_SIZE; offset_top = md->end_offset_top; continue; - /* Negative assertion: all branches must fail to match. Encountering SKIP, - PRUNE, or COMMIT means we must assume failure without checking subsequent - branches. */ + /* Negative assertion: all branches must fail to match for the assertion to + succeed. */ case OP_ASSERT_NOT: case OP_ASSERTBACK_NOT: @@ -1654,28 +1665,42 @@ for (;;) } else condassert = FALSE; + /* Loop for each alternative branch. */ + do { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); md->mark = save_mark; + + /* A successful match means the assertion has failed. */ + if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH); - if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) + + /* See comment in the code for capturing groups above about handling + THEN. */ + + if (rrc == MATCH_THEN) { - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - break; + next = ecode + GET(ecode,1); + if (md->start_match_ptr < next && + (*ecode == OP_ALT || *next == OP_ALT)) + rrc = MATCH_NOMATCH; } + + /* No match on a branch means we must carry on and try the next branch. + Anything else, in particular, SKIP, PRUNE, etc. causes a failure in the + enclosing branch. This is a consistent approach, but does not always have + the same effect as in Perl. */ - /* PCRE does not allow THEN to escape beyond an assertion; it is treated - as NOMATCH. */ - - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += GET(ecode,1); } while (*ecode == OP_ALT); + + /* All branches in the assertion failed to match. */ if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */ - - ecode += 1 + LINK_SIZE; + ecode += 1 + LINK_SIZE; /* Continue with current branch */ continue; /* Move the subject pointer back. This occurs only at the start of |