diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2016-12-11 16:42:29 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2016-12-11 16:42:29 +0000 |
commit | a8fa57206858a29c007a881ace566ce124250a8d (patch) | |
tree | 2c7abb9c8991d028bc39d3e9f172a93989390de0 | |
parent | ef9d9c3d18ecfbe18434bd86a229a05ac3dcad29 (diff) | |
download | pcre2-a8fa57206858a29c007a881ace566ce124250a8d.tar.gz |
Fix \Q\E quantification bug.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@618 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | src/pcre2_compile.c | 13 | ||||
-rw-r--r-- | testdata/testinput2 | 10 | ||||
-rw-r--r-- | testdata/testoutput2 | 27 |
4 files changed, 42 insertions, 12 deletions
@@ -218,6 +218,10 @@ followed by a caseful back reference, could lose the caselessness of the first repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX but didn't). +35. If \Q was preceded by a quantified item, and the following \E was followed +by '?' or '+', and there was at least one literal character between them, an +internal error "unexpected repeat" occurred (example: /.+\QX\E+/). + Version 10.22 29-July-2016 -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 15d6dd9..3e1cddf 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1352,7 +1352,7 @@ entry, ptr is pointing at the character after \. On exit, it points after the final code unit of the escape sequence. This function is also called from pcre2_substitute() to handle escape sequences -in replacement strings. In this case, the cb argument is NULL, and in the case +in replacement strings. In this case, the cb argument is NULL, and in the case of escapes that have further processing, only sequences that define a data character are recognised. The isclass argument is not relevant; the options argument is the final value of the compiled pattern's options. @@ -2327,6 +2327,7 @@ while (ptr < ptrend) parsed_pattern = manage_callouts(thisptr, &previous_callout, options, parsed_pattern, cb); PARSED_LITERAL(c, parsed_pattern); + meta_quantifier = 0; } continue; /* Next character */ } @@ -2362,7 +2363,7 @@ while (ptr < ptrend) case CHAR_RIGHT_PARENTHESIS: inverbname = FALSE; - okquantifier = FALSE; /* Was probably set by literals */ + okquantifier = FALSE; /* Was probably set by literals */ /* This is the length in characters */ verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1); /* But the limit on the length is in code units */ @@ -2405,10 +2406,10 @@ while (ptr < ptrend) continue; /* Next character in pattern */ } - /* At the point we must process everything that must not change the - qualification state. This is mainly comments, but we handle \Q and \E here as - well, so that an item such as A\Q\E+ is treated as A+, as in Perl. An - isolated \E is ignored. */ + /* Not a verb name character. At this point we must process everything that + must not change the quantification state. This is mainly comments, but we + handle \Q and \E here as well, so that an item such as A\Q\E+ is treated as + A+, as in Perl. An isolated \E is ignored. */ if (c == CHAR_BACKSLASH && ptr < ptrend) { diff --git a/testdata/testinput2 b/testdata/testinput2 index 998b2cf..aa1013c 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4923,12 +4923,16 @@ a)"xI %(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout /./newline=crlf -\=ph + \=ph /(\x0e00\000000\xc)/replace=\P,substitute_extended -\x0e00\000000\xc + \x0e00\000000\xc //replace=0 -\=offset=7 + \=offset=7 + +".+\QX\E+"B,no_auto_possess + +".+\QX\E+"B,auto_callout,no_auto_possess # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 2c35960..7f177ce 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15375,17 +15375,38 @@ Failed: error 109 at offset 6: quantifier does not follow a repeatable item %(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout /./newline=crlf -\=ph + \=ph No match /(\x0e00\000000\xc)/replace=\P,substitute_extended -\x0e00\000000\xc + \x0e00\000000\xc Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement string //replace=0 -\=offset=7 + \=offset=7 Failed: error -33: bad offset value +".+\QX\E+"B,no_auto_possess +------------------------------------------------------------------ + Bra + Any+ + X+ + Ket + End +------------------------------------------------------------------ + +".+\QX\E+"B,auto_callout,no_auto_possess +------------------------------------------------------------------ + Bra + Callout 255 0 4 + Any+ + Callout 255 4 4 + X+ + Callout 255 8 0 + Ket + End +------------------------------------------------------------------ + # End of testinput2 Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data |