summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-12-03 16:58:31 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-12-03 16:58:31 +0000
commit146e061d0700d7e4f469d1628e978bf59ad356e6 (patch)
tree46428e190aa8bc7d685a187c8c60f7849207246e
parent461d23f4e56605c6db3e53720dcaf6fba26319c2 (diff)
downloadpcre2-146e061d0700d7e4f469d1628e978bf59ad356e6.tar.gz
Fix /x bug when pattern starts with whitespace followed by (?-x).
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@456 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog6
-rw-r--r--src/pcre2_compile.c40
-rw-r--r--testdata/testinput211
-rw-r--r--testdata/testoutput244
4 files changed, 42 insertions, 59 deletions
diff --git a/ChangeLog b/ChangeLog
index 3bc48ef..87979bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -365,6 +365,12 @@ displaying fields containing NULLS:
(a) Within /x extended #-comments
(b) Within the "name" part of (*MARK) and other *verbs
(c) Within the text argument of a callout
+
+108. If a pattern that was compiled with PCRE2_EXTENDED started with white
+space or a #-type comment that was followed by (?-x), which turns off
+PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again,
+pcre2_compile() assumed that (?-x) applied to the whole pattern and
+consequently mis-compiled it. This bug was found by the LLVM fuzzer.
Version 10.20 30-June-2015
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 373d3fd..b959c38 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -6862,44 +6862,16 @@ for (;; ptr++)
newoptions = (options | set) & (~unset);
/* If the options ended with ')' this is not the start of a nested
- group with option changes, so the options change at this level. If this
- item is right at the start of the pattern, the options can be
- abstracted and made external in the pre-compile phase, and ignored in
- the compile phase. This can be helpful when matching -- for instance in
- caseless checking of required bytes.
-
- If the code pointer is not (cb->start_code + 1 + LINK_SIZE), we are
- definitely *not* at the start of the pattern because something has been
- compiled. In the pre-compile phase, however, the code pointer can have
- that value after the start, because it gets reset as code is discarded
- during the pre-compile. However, this can happen only at top level - if
- we are within parentheses, the starting BRA will still be present. At
- any parenthesis level, the length value can be used to test if anything
- has been compiled at that level. Thus, a test for both these conditions
- is necessary to ensure we correctly detect the start of the pattern in
- both phases.
-
- If we are not at the pattern start, reset the greedy defaults and the
- case value for firstcu and reqcu. */
+ group with option changes, so the options change at this level. They
+ must also be passed back for use in subsequent branches. Reset the
+ greedy defaults and the case value for firstcu and reqcu. */
if (*ptr == CHAR_RIGHT_PARENTHESIS)
{
- if (code == cb->start_code + 1 + LINK_SIZE &&
- (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
- {
- cb->external_options = newoptions;
- }
- else
- {
- greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
- greedy_non_default = greedy_default ^ 1;
- req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
- }
-
- /* Change options at this level, and pass them back for use
- in subsequent branches. */
-
*optionsptr = options = newoptions;
+ greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
+ greedy_non_default = greedy_default ^ 1;
+ req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
previous = NULL; /* This item can't be repeated */
continue; /* It is complete */
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index 8b85d53..42a859d 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4724,4 +4724,15 @@ a)"xI
# /A(?#X\x00Y)B/
/41 28 3f 23 7b 00 7d 29 42/B,hex
+# Tests for leading comment in extended patterns
+
+/ (?-x):?/extended
+
+/ (?-x):?/extended
+
+/0b 28 3f 2d 78 29 3a/hex,extended
+
+/#comment
+(?-x):?/extended
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 50993c8..55ebd5d 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -431,8 +431,6 @@ Subject length lower bound = 2
/(?U)<.*>/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: ungreedy
First code unit = '<'
Last code unit = '>'
Subject length lower bound = 2
@@ -459,8 +457,6 @@ Subject length lower bound = 3
/(?U)={3,}?/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: ungreedy
First code unit = '='
Last code unit = '='
Subject length lower bound = 3
@@ -494,8 +490,6 @@ Failed: error 125 at offset 12: lookbehind assertion is not fixed length
/(?i)abc/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: caseless
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
Subject length lower bound = 3
@@ -508,7 +502,7 @@ Subject length lower bound = 1
/(?i)^1234/I
Capturing subpattern count = 0
Compile options: <none>
-Overall options: anchored caseless
+Overall options: anchored
Subject length lower bound = 4
/(^b|(?i)^d)/I
@@ -521,7 +515,7 @@ Subject length lower bound = 1
Capturing subpattern count = 0
May match empty string
Compile options: <none>
-Overall options: anchored dotall
+Overall options: anchored
Subject length lower bound = 0
/[abcd]/I
@@ -531,15 +525,11 @@ Subject length lower bound = 1
/(?i)[abcd]/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: caseless
Starting code units: A B C D a b c d
Subject length lower bound = 1
/(?m)[xy]|(b|c)/I
Capturing subpattern count = 1
-Compile options: <none>
-Overall options: multiline
Starting code units: b c x y
Subject length lower bound = 1
@@ -551,8 +541,7 @@ Subject length lower bound = 1
/(?i)(^a|^b)/Im
Capturing subpattern count = 1
-Compile options: multiline
-Overall options: caseless multiline
+Options: multiline
First code unit at start or follows newline
Subject length lower bound = 1
@@ -1153,7 +1142,7 @@ Subject length lower bound = 1
------------------------------------------------------------------
Capturing subpattern count = 1
Compile options: <none>
-Overall options: anchored dotall
+Overall options: anchored
Subject length lower bound = 1
/(?s:.*X|^B)/IB
@@ -2682,8 +2671,7 @@ No match
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Compile options: extended
-Overall options: caseless extended
+Options: extended
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
Subject length lower bound = 3
@@ -2697,8 +2685,7 @@ Subject length lower bound = 3
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Compile options: extended
-Overall options: caseless extended
+Options: extended
First code unit = 'a' (caseless)
Last code unit = 'c' (caseless)
Subject length lower bound = 3
@@ -3043,8 +3030,6 @@ Subject length lower bound = 3
End
------------------------------------------------------------------
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: ungreedy
First code unit = 'x'
Last code unit = 'b'
Subject length lower bound = 3
@@ -3427,8 +3412,6 @@ Subject length lower bound = 1
/(?i)[ab]/I
Capturing subpattern count = 0
-Compile options: <none>
-Overall options: caseless
Starting code units: A B a b
Subject length lower bound = 1
@@ -5841,7 +5824,7 @@ Named capturing subpatterns:
A 2
A 3
Compile options: <none>
-Overall options: anchored dupnames
+Overall options: anchored
Duplicate name status changes
Subject length lower bound = 2
a1b\=copy=A
@@ -13734,7 +13717,7 @@ Subject length lower bound = 1
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
Capturing subpattern count = 0
Compile options: <none>
-Overall options: dotall no_dotstar_anchor
+Overall options: no_dotstar_anchor
Subject length lower bound = 1
'^(?:(a)|b)(?(1)A|B)'
@@ -15060,4 +15043,15 @@ Subject length lower bound = 0
End
------------------------------------------------------------------
+# Tests for leading comment in extended patterns
+
+/ (?-x):?/extended
+
+/ (?-x):?/extended
+
+/0b 28 3f 2d 78 29 3a/hex,extended
+
+/#comment
+(?-x):?/extended
+
# End of testinput2