summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2009-03-03 16:08:23 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2009-03-03 16:08:23 +0000
commitbb83db6447434ffc92fdee4feb08d99b45e00f9d (patch)
treec287052564082140b69d9758f0729dc296423c68
parentccb6ec2e2cbd01c49832a4ba80f0ec4425846138 (diff)
downloadpcre-bb83db6447434ffc92fdee4feb08d99b45e00f9d.tar.gz
Fix bug with (?(?=.*b)b|^) thinking it must match at start of line; also fix
bug causing a crash when auto-callout is used with a conditional assertion. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@381 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog6
-rw-r--r--pcre_compile.c18
-rw-r--r--pcre_exec.c30
-rw-r--r--testdata/testinput16
-rw-r--r--testdata/testinput28
-rw-r--r--testdata/testoutput18
-rw-r--r--testdata/testoutput252
7 files changed, 117 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 5f7e09c..91a3e0b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -39,7 +39,11 @@ Version 7.9 xx-xxx-09
8. The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
start or after a newline", because the conditional assertion was not being
- skipped when checking for this condition.
+ correctly handled. The rule now is that both the assertion and what follows
+ in the first alternative must satisfy the test.
+
+9. If auto-callout was enabled in a pattern with a conditional group, PCRE
+ could crash during matching.
Version 7.8 05-Sep-08
diff --git a/pcre_compile.c b/pcre_compile.c
index 8c55e75..5f6d89f 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -5811,28 +5811,28 @@ do {
NULL, 0, FALSE);
register int op = *scode;
- /* If we are at the start of a conditional group, skip over the condition.
- before inspecting the first opcode after the condition. */
+ /* If we are at the start of a conditional assertion group, *both* the
+ conditional assertion *and* what follows the condition must satisfy the test
+ for start of line. Other kinds of condition fail. Note that there may be an
+ auto-callout at the start of a condition. */
if (op == OP_COND)
{
scode += 1 + LINK_SIZE;
+ if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
switch (*scode)
{
case OP_CREF:
case OP_RREF:
- scode += 3;
- break;
-
case OP_DEF:
- scode += 1;
- break;
+ return FALSE;
default: /* Assertion */
+ if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
do scode += GET(scode, 1); while (*scode == OP_ALT);
+ scode += 1 + LINK_SIZE;
break;
}
-
scode = first_significant_code(scode, NULL, 0, FALSE);
op = *scode;
}
diff --git a/pcre_exec.c b/pcre_exec.c
index 8096f3a..43af1c8 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -787,6 +787,34 @@ for (;;)
case OP_COND:
case OP_SCOND:
+ /* Because of the way auto-callout works during compile, a callout item is
+ inserted between OP_COND and an assertion condition. */
+
+ if (ecode[LINK_SIZE+1] == OP_CALLOUT)
+ {
+ if (pcre_callout != NULL)
+ {
+ pcre_callout_block cb;
+ cb.version = 1; /* Version 1 of the callout block */
+ cb.callout_number = ecode[LINK_SIZE+2];
+ cb.offset_vector = md->offset_vector;
+ cb.subject = (PCRE_SPTR)md->start_subject;
+ cb.subject_length = md->end_subject - md->start_subject;
+ cb.start_match = mstart - md->start_subject;
+ cb.current_position = eptr - md->start_subject;
+ cb.pattern_position = GET(ecode, LINK_SIZE + 3);
+ cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
+ cb.capture_top = offset_top/2;
+ cb.capture_last = md->capture_last;
+ cb.callout_data = md->callout_data;
+ if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
+ if (rrc < 0) RRETURN(rrc);
+ }
+ ecode += _pcre_OP_lengths[OP_CALLOUT];
+ }
+
+ /* Now see what the actual condition is */
+
if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
{
offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
diff --git a/testdata/testinput1 b/testdata/testinput1
index dc01c0c..ecfd365 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -4055,4 +4055,10 @@
adc
abc
+/(?(?=b).*b|^d)/
+ abc
+
+/(?(?=.*b).*b|^d)/
+ abc
+
/ End of testinput1 /
diff --git a/testdata/testinput2 b/testdata/testinput2
index 2b64546..2f80926 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2726,4 +2726,12 @@ a random value. /Ix
/(abc|pqr|123){0}[xyz]/SI
+/(?(?=.*b)b|^)/CI
+ adc
+ abc
+
+/(?(?=b).*b|^d)/I
+
+/(?(?=.*b).*b|^d)/I
+
/ End of testinput2 /
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 0e26007..83ef39c 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -6633,4 +6633,12 @@ No match
abc
0: b
+/(?(?=b).*b|^d)/
+ abc
+ 0: b
+
+/(?(?=.*b).*b|^d)/
+ abc
+ 0: ab
+
/ End of testinput1 /
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 2ac018b..477c145 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -9638,4 +9638,56 @@ No first char
No need char
Starting byte set: x y z
+/(?(?=.*b)b|^)/CI
+Capturing subpattern count = 0
+Partial matching not supported
+Options:
+No first char
+No need char
+ adc
+--->adc
+ +0 ^ (?(?=.*b)b|^)
+ +2 ^ (?=.*b)
+ +5 ^ .*
+ +7 ^ ^ b
+ +7 ^ ^ b
+ +7 ^^ b
+ +7 ^ b
+ 0:
+ abc
+--->abc
+ +0 ^ (?(?=.*b)b|^)
+ +2 ^ (?=.*b)
+ +5 ^ .*
+ +7 ^ ^ b
+ +7 ^ ^ b
+ +7 ^^ b
+ +8 ^ ^ )
+ +9 ^ b
+ +0 ^ (?(?=.*b)b|^)
+ +2 ^ (?=.*b)
+ +5 ^ .*
+ +7 ^ ^ b
+ +7 ^^ b
+ +7 ^ b
+ +8 ^^ )
+ +9 ^ b
++10 ^^ |
++13 ^^
+ 0: b
+
+/(?(?=b).*b|^d)/I
+Capturing subpattern count = 0
+Partial matching not supported
+No options
+No first char
+No need char
+
+/(?(?=.*b).*b|^d)/I
+Capturing subpattern count = 0
+Partial matching not supported
+No options
+First char at start or follows newline
+No need char
+
/ End of testinput2 /