diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2015-12-03 16:58:31 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2015-12-03 16:58:31 +0000 |
commit | 146e061d0700d7e4f469d1628e978bf59ad356e6 (patch) | |
tree | 46428e190aa8bc7d685a187c8c60f7849207246e | |
parent | 461d23f4e56605c6db3e53720dcaf6fba26319c2 (diff) | |
download | pcre2-146e061d0700d7e4f469d1628e978bf59ad356e6.tar.gz |
Fix /x bug when pattern starts with whitespace followed by (?-x).
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@456 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | src/pcre2_compile.c | 40 | ||||
-rw-r--r-- | testdata/testinput2 | 11 | ||||
-rw-r--r-- | testdata/testoutput2 | 44 |
4 files changed, 42 insertions, 59 deletions
@@ -365,6 +365,12 @@ displaying fields containing NULLS: (a) Within /x extended #-comments (b) Within the "name" part of (*MARK) and other *verbs (c) Within the text argument of a callout + +108. If a pattern that was compiled with PCRE2_EXTENDED started with white +space or a #-type comment that was followed by (?-x), which turns off +PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again, +pcre2_compile() assumed that (?-x) applied to the whole pattern and +consequently mis-compiled it. This bug was found by the LLVM fuzzer. Version 10.20 30-June-2015 diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 373d3fd..b959c38 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -6862,44 +6862,16 @@ for (;; ptr++) newoptions = (options | set) & (~unset); /* If the options ended with ')' this is not the start of a nested - group with option changes, so the options change at this level. If this - item is right at the start of the pattern, the options can be - abstracted and made external in the pre-compile phase, and ignored in - the compile phase. This can be helpful when matching -- for instance in - caseless checking of required bytes. - - If the code pointer is not (cb->start_code + 1 + LINK_SIZE), we are - definitely *not* at the start of the pattern because something has been - compiled. In the pre-compile phase, however, the code pointer can have - that value after the start, because it gets reset as code is discarded - during the pre-compile. However, this can happen only at top level - if - we are within parentheses, the starting BRA will still be present. At - any parenthesis level, the length value can be used to test if anything - has been compiled at that level. Thus, a test for both these conditions - is necessary to ensure we correctly detect the start of the pattern in - both phases. - - If we are not at the pattern start, reset the greedy defaults and the - case value for firstcu and reqcu. */ + group with option changes, so the options change at this level. They + must also be passed back for use in subsequent branches. Reset the + greedy defaults and the case value for firstcu and reqcu. */ if (*ptr == CHAR_RIGHT_PARENTHESIS) { - if (code == cb->start_code + 1 + LINK_SIZE && - (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE)) - { - cb->external_options = newoptions; - } - else - { - greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0); - greedy_non_default = greedy_default ^ 1; - req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; - } - - /* Change options at this level, and pass them back for use - in subsequent branches. */ - *optionsptr = options = newoptions; + greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0); + greedy_non_default = greedy_default ^ 1; + req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; previous = NULL; /* This item can't be repeated */ continue; /* It is complete */ } diff --git a/testdata/testinput2 b/testdata/testinput2 index 8b85d53..42a859d 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4724,4 +4724,15 @@ a)"xI # /A(?#X\x00Y)B/ /41 28 3f 23 7b 00 7d 29 42/B,hex +# Tests for leading comment in extended patterns + +/ (?-x):?/extended + +/(?-x):?/extended + +/0b 28 3f 2d 78 29 3a/hex,extended + +/#comment +(?-x):?/extended + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 50993c8..55ebd5d 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -431,8 +431,6 @@ Subject length lower bound = 2 /(?U)<.*>/I Capturing subpattern count = 0 -Compile options: <none> -Overall options: ungreedy First code unit = '<' Last code unit = '>' Subject length lower bound = 2 @@ -459,8 +457,6 @@ Subject length lower bound = 3 /(?U)={3,}?/I Capturing subpattern count = 0 -Compile options: <none> -Overall options: ungreedy First code unit = '=' Last code unit = '=' Subject length lower bound = 3 @@ -494,8 +490,6 @@ Failed: error 125 at offset 12: lookbehind assertion is not fixed length /(?i)abc/I Capturing subpattern count = 0 -Compile options: <none> -Overall options: caseless First code unit = 'a' (caseless) Last code unit = 'c' (caseless) Subject length lower bound = 3 @@ -508,7 +502,7 @@ Subject length lower bound = 1 /(?i)^1234/I Capturing subpattern count = 0 Compile options: <none> -Overall options: anchored caseless +Overall options: anchored Subject length lower bound = 4 /(^b|(?i)^d)/I @@ -521,7 +515,7 @@ Subject length lower bound = 1 Capturing subpattern count = 0 May match empty string Compile options: <none> -Overall options: anchored dotall +Overall options: anchored Subject length lower bound = 0 /[abcd]/I @@ -531,15 +525,11 @@ Subject length lower bound = 1 /(?i)[abcd]/I Capturing subpattern count = 0 -Compile options: <none> -Overall options: caseless Starting code units: A B C D a b c d Subject length lower bound = 1 /(?m)[xy]|(b|c)/I Capturing subpattern count = 1 -Compile options: <none> -Overall options: multiline Starting code units: b c x y Subject length lower bound = 1 @@ -551,8 +541,7 @@ Subject length lower bound = 1 /(?i)(^a|^b)/Im Capturing subpattern count = 1 -Compile options: multiline -Overall options: caseless multiline +Options: multiline First code unit at start or follows newline Subject length lower bound = 1 @@ -1153,7 +1142,7 @@ Subject length lower bound = 1 ------------------------------------------------------------------ Capturing subpattern count = 1 Compile options: <none> -Overall options: anchored dotall +Overall options: anchored Subject length lower bound = 1 /(?s:.*X|^B)/IB @@ -2682,8 +2671,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 0 -Compile options: extended -Overall options: caseless extended +Options: extended First code unit = 'a' (caseless) Last code unit = 'c' (caseless) Subject length lower bound = 3 @@ -2697,8 +2685,7 @@ Subject length lower bound = 3 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Compile options: extended -Overall options: caseless extended +Options: extended First code unit = 'a' (caseless) Last code unit = 'c' (caseless) Subject length lower bound = 3 @@ -3043,8 +3030,6 @@ Subject length lower bound = 3 End ------------------------------------------------------------------ Capturing subpattern count = 0 -Compile options: <none> -Overall options: ungreedy First code unit = 'x' Last code unit = 'b' Subject length lower bound = 3 @@ -3427,8 +3412,6 @@ Subject length lower bound = 1 /(?i)[ab]/I Capturing subpattern count = 0 -Compile options: <none> -Overall options: caseless Starting code units: A B a b Subject length lower bound = 1 @@ -5841,7 +5824,7 @@ Named capturing subpatterns: A 2 A 3 Compile options: <none> -Overall options: anchored dupnames +Overall options: anchored Duplicate name status changes Subject length lower bound = 2 a1b\=copy=A @@ -13734,7 +13717,7 @@ Subject length lower bound = 1 /(*NO_DOTSTAR_ANCHOR)(?s).*\d/info Capturing subpattern count = 0 Compile options: <none> -Overall options: dotall no_dotstar_anchor +Overall options: no_dotstar_anchor Subject length lower bound = 1 '^(?:(a)|b)(?(1)A|B)' @@ -15060,4 +15043,15 @@ Subject length lower bound = 0 End ------------------------------------------------------------------ +# Tests for leading comment in extended patterns + +/ (?-x):?/extended + +/(?-x):?/extended + +/0b 28 3f 2d 78 29 3a/hex,extended + +/#comment +(?-x):?/extended + # End of testinput2 |