diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2009-03-03 16:08:23 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2009-03-03 16:08:23 +0000 |
commit | bb83db6447434ffc92fdee4feb08d99b45e00f9d (patch) | |
tree | c287052564082140b69d9758f0729dc296423c68 | |
parent | ccb6ec2e2cbd01c49832a4ba80f0ec4425846138 (diff) | |
download | pcre-bb83db6447434ffc92fdee4feb08d99b45e00f9d.tar.gz |
Fix bug with (?(?=.*b)b|^) thinking it must match at start of line; also fix
bug causing a crash when auto-callout is used with a conditional assertion.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@381 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | pcre_compile.c | 18 | ||||
-rw-r--r-- | pcre_exec.c | 30 | ||||
-rw-r--r-- | testdata/testinput1 | 6 | ||||
-rw-r--r-- | testdata/testinput2 | 8 | ||||
-rw-r--r-- | testdata/testoutput1 | 8 | ||||
-rw-r--r-- | testdata/testoutput2 | 52 |
7 files changed, 117 insertions, 11 deletions
@@ -39,7 +39,11 @@ Version 7.9 xx-xxx-09 8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at start or after a newline", because the conditional assertion was not being - skipped when checking for this condition. + correctly handled. The rule now is that both the assertion and what follows + in the first alternative must satisfy the test. + +9. If auto-callout was enabled in a pattern with a conditional group, PCRE + could crash during matching. Version 7.8 05-Sep-08 diff --git a/pcre_compile.c b/pcre_compile.c index 8c55e75..5f6d89f 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2009 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -5811,28 +5811,28 @@ do { NULL, 0, FALSE); register int op = *scode; - /* If we are at the start of a conditional group, skip over the condition. - before inspecting the first opcode after the condition. */ + /* If we are at the start of a conditional assertion group, *both* the + conditional assertion *and* what follows the condition must satisfy the test + for start of line. Other kinds of condition fail. Note that there may be an + auto-callout at the start of a condition. */ if (op == OP_COND) { scode += 1 + LINK_SIZE; + if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT]; switch (*scode) { case OP_CREF: case OP_RREF: - scode += 3; - break; - case OP_DEF: - scode += 1; - break; + return FALSE; default: /* Assertion */ + if (!is_startline(scode, bracket_map, backref_map)) return FALSE; do scode += GET(scode, 1); while (*scode == OP_ALT); + scode += 1 + LINK_SIZE; break; } - scode = first_significant_code(scode, NULL, 0, FALSE); op = *scode; } diff --git a/pcre_exec.c b/pcre_exec.c index 8096f3a..43af1c8 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2009 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -787,6 +787,34 @@ for (;;) case OP_COND: case OP_SCOND: + /* Because of the way auto-callout works during compile, a callout item is + inserted between OP_COND and an assertion condition. */ + + if (ecode[LINK_SIZE+1] == OP_CALLOUT) + { + if (pcre_callout != NULL) + { + pcre_callout_block cb; + cb.version = 1; /* Version 1 of the callout block */ + cb.callout_number = ecode[LINK_SIZE+2]; + cb.offset_vector = md->offset_vector; + cb.subject = (PCRE_SPTR)md->start_subject; + cb.subject_length = md->end_subject - md->start_subject; + cb.start_match = mstart - md->start_subject; + cb.current_position = eptr - md->start_subject; + cb.pattern_position = GET(ecode, LINK_SIZE + 3); + cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE); + cb.capture_top = offset_top/2; + cb.capture_last = md->capture_last; + cb.callout_data = md->callout_data; + if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH); + if (rrc < 0) RRETURN(rrc); + } + ecode += _pcre_OP_lengths[OP_CALLOUT]; + } + + /* Now see what the actual condition is */ + if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */ { offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ diff --git a/testdata/testinput1 b/testdata/testinput1 index dc01c0c..ecfd365 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -4055,4 +4055,10 @@ adc abc +/(?(?=b).*b|^d)/ + abc + +/(?(?=.*b).*b|^d)/ + abc + / End of testinput1 / diff --git a/testdata/testinput2 b/testdata/testinput2 index 2b64546..2f80926 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -2726,4 +2726,12 @@ a random value. /Ix /(abc|pqr|123){0}[xyz]/SI +/(?(?=.*b)b|^)/CI + adc + abc + +/(?(?=b).*b|^d)/I + +/(?(?=.*b).*b|^d)/I + / End of testinput2 / diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 0e26007..83ef39c 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -6633,4 +6633,12 @@ No match abc 0: b +/(?(?=b).*b|^d)/ + abc + 0: b + +/(?(?=.*b).*b|^d)/ + abc + 0: ab + / End of testinput1 / diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 2ac018b..477c145 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -9638,4 +9638,56 @@ No first char No need char Starting byte set: x y z +/(?(?=.*b)b|^)/CI +Capturing subpattern count = 0 +Partial matching not supported +Options: +No first char +No need char + adc +--->adc + +0 ^ (?(?=.*b)b|^) + +2 ^ (?=.*b) + +5 ^ .* + +7 ^ ^ b + +7 ^ ^ b + +7 ^^ b + +7 ^ b + 0: + abc +--->abc + +0 ^ (?(?=.*b)b|^) + +2 ^ (?=.*b) + +5 ^ .* + +7 ^ ^ b + +7 ^ ^ b + +7 ^^ b + +8 ^ ^ ) + +9 ^ b + +0 ^ (?(?=.*b)b|^) + +2 ^ (?=.*b) + +5 ^ .* + +7 ^ ^ b + +7 ^^ b + +7 ^ b + +8 ^^ ) + +9 ^ b ++10 ^^ | ++13 ^^ + 0: b + +/(?(?=b).*b|^d)/I +Capturing subpattern count = 0 +Partial matching not supported +No options +No first char +No need char + +/(?(?=.*b).*b|^d)/I +Capturing subpattern count = 0 +Partial matching not supported +No options +First char at start or follows newline +No need char + / End of testinput2 / |