summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2016-12-11 16:42:29 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2016-12-11 16:42:29 +0000
commita8fa57206858a29c007a881ace566ce124250a8d (patch)
tree2c7abb9c8991d028bc39d3e9f172a93989390de0
parentef9d9c3d18ecfbe18434bd86a229a05ac3dcad29 (diff)
downloadpcre2-a8fa57206858a29c007a881ace566ce124250a8d.tar.gz
Fix \Q\E quantification bug.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@618 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog4
-rw-r--r--src/pcre2_compile.c13
-rw-r--r--testdata/testinput210
-rw-r--r--testdata/testoutput227
4 files changed, 42 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index e39f544..14f510c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -218,6 +218,10 @@ followed by a caseful back reference, could lose the caselessness of the first
repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX
but didn't).
+35. If \Q was preceded by a quantified item, and the following \E was followed
+by '?' or '+', and there was at least one literal character between them, an
+internal error "unexpected repeat" occurred (example: /.+\QX\E+/).
+
Version 10.22 29-July-2016
--------------------------
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 15d6dd9..3e1cddf 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1352,7 +1352,7 @@ entry, ptr is pointing at the character after \. On exit, it points after the
final code unit of the escape sequence.
This function is also called from pcre2_substitute() to handle escape sequences
-in replacement strings. In this case, the cb argument is NULL, and in the case
+in replacement strings. In this case, the cb argument is NULL, and in the case
of escapes that have further processing, only sequences that define a data
character are recognised. The isclass argument is not relevant; the options
argument is the final value of the compiled pattern's options.
@@ -2327,6 +2327,7 @@ while (ptr < ptrend)
parsed_pattern = manage_callouts(thisptr, &previous_callout, options,
parsed_pattern, cb);
PARSED_LITERAL(c, parsed_pattern);
+ meta_quantifier = 0;
}
continue; /* Next character */
}
@@ -2362,7 +2363,7 @@ while (ptr < ptrend)
case CHAR_RIGHT_PARENTHESIS:
inverbname = FALSE;
- okquantifier = FALSE; /* Was probably set by literals */
+ okquantifier = FALSE; /* Was probably set by literals */
/* This is the length in characters */
verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
/* But the limit on the length is in code units */
@@ -2405,10 +2406,10 @@ while (ptr < ptrend)
continue; /* Next character in pattern */
}
- /* At the point we must process everything that must not change the
- qualification state. This is mainly comments, but we handle \Q and \E here as
- well, so that an item such as A\Q\E+ is treated as A+, as in Perl. An
- isolated \E is ignored. */
+ /* Not a verb name character. At this point we must process everything that
+ must not change the quantification state. This is mainly comments, but we
+ handle \Q and \E here as well, so that an item such as A\Q\E+ is treated as
+ A+, as in Perl. An isolated \E is ignored. */
if (c == CHAR_BACKSLASH && ptr < ptrend)
{
diff --git a/testdata/testinput2 b/testdata/testinput2
index 998b2cf..aa1013c 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4923,12 +4923,16 @@ a)"xI
%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout
/./newline=crlf
-\=ph
+ \=ph
/(\x0e00\000000\xc)/replace=\P,substitute_extended
-\x0e00\000000\xc
+ \x0e00\000000\xc
//replace=0
-\=offset=7
+ \=offset=7
+
+".+\QX\E+"B,no_auto_possess
+
+".+\QX\E+"B,auto_callout,no_auto_possess
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 2c35960..7f177ce 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -15375,17 +15375,38 @@ Failed: error 109 at offset 6: quantifier does not follow a repeatable item
%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout
/./newline=crlf
-\=ph
+ \=ph
No match
/(\x0e00\000000\xc)/replace=\P,substitute_extended
-\x0e00\000000\xc
+ \x0e00\000000\xc
Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement string
//replace=0
-\=offset=7
+ \=offset=7
Failed: error -33: bad offset value
+".+\QX\E+"B,no_auto_possess
+------------------------------------------------------------------
+ Bra
+ Any+
+ X+
+ Ket
+ End
+------------------------------------------------------------------
+
+".+\QX\E+"B,auto_callout,no_auto_possess
+------------------------------------------------------------------
+ Bra
+ Callout 255 0 4
+ Any+
+ Callout 255 4 4
+ X+
+ Callout 255 8 0
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data