summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2016-10-26 16:59:22 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2016-10-26 16:59:22 +0000
commit4f042fa520f12e0453677997f7242b7964bffb4b (patch)
tree07f77cc9d91bbe50c74e282df8551870f2149d20
parentfc0a6f6701f3856d99e93a7c30a70ba731b02788 (diff)
downloadpcre2-4f042fa520f12e0453677997f7242b7964bffb4b.tar.gz
Fix *MARK length check in UTF mode (it was checking characters, not code
units). git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@578 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog12
-rw-r--r--doc/pcre2limits.37
-rw-r--r--src/pcre2_compile.c10
-rw-r--r--testdata/testinput102
-rw-r--r--testdata/testinput92
-rw-r--r--testdata/testoutput103
-rw-r--r--testdata/testoutput93
7 files changed, 30 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index 422245c..7ca7f24 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -40,6 +40,15 @@ parenthesis item, not the length of the whole group. A length of zero is now
given only for a callout at the end of the pattern. Automatic callouts are no
longer inserted before and after explicit callouts in the pattern.
+Some bugs in the refactored code were subsequently fixed before release:
+
+ (a) An overall recursion such as (?0) inside a lookbehind assertion was not
+ being diagnosed as an error.
+
+ (b) In utf mode, the length of a *MARK (or other verb) name was being checked
+ in characters instead of code units, which could lead to bad code being
+ compiled, leading to unpredictable behaviour.
+
4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the
reference is by name, there is only one group of that name. The referenced
@@ -96,9 +105,6 @@ only when PCRE2_NO_START_OPTIMIZE was *not* set:
16. The "offset" modifier in pcre2test was not being ignored (as documented)
when the POSIX API was in use.
-17. An overall recursion such as (?0) inside a lookbehind assertion was not
-being diagnosed as an error.
-
Version 10.22 29-July-2016
--------------------------
diff --git a/doc/pcre2limits.3 b/doc/pcre2limits.3
index fdb49fa..573c8d3 100644
--- a/doc/pcre2limits.3
+++ b/doc/pcre2limits.3
@@ -1,4 +1,4 @@
-.TH PCRE2LIMITS 3 "29 September 2016" "PCRE2 10.23"
+.TH PCRE2LIMITS 3 "26 October 2016" "PCRE2 10.23"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "SIZE AND OTHER LIMITATIONS"
@@ -55,7 +55,8 @@ The maximum length of name for a named subpattern is 32 code units, and the
maximum number of named subpatterns is 10000.
.P
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
-is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
+is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
+32-bit libraries.
.P
The maximum length of a string argument to a callout is the largest number a
32-bit unsigned integer can hold.
@@ -75,6 +76,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 29 September 2016
+Last updated: 26 October 2016
Copyright (c) 1997-2016 University of Cambridge.
.fi
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 8bb4251..a76ca0f 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -2161,6 +2161,7 @@ BOOL negate_class;
BOOL okquantifier = FALSE;
PCRE2_SPTR name;
PCRE2_SPTR ptrend = cb->end_pattern;
+PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */
named_group *ng;
nest_save *top_nest = NULL;
nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
@@ -2248,8 +2249,10 @@ while (ptr < ptrend)
case CHAR_RIGHT_PARENTHESIS:
inverbname = FALSE;
+ /* This is the length in characters */
verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
- if (verbnamelength > MAX_MARK)
+ /* But the limit on the length is in code units */
+ if (ptr - verbnamestart - 1 > MAX_MARK)
{
ptr--;
errorcode = ERR76;
@@ -3149,6 +3152,7 @@ while (ptr < ptrend)
*parsed_pattern++ = verbs[i].meta +
((verbs[i].meta != META_MARK)? 0x00010000u:0);
verblengthptr = parsed_pattern++;
+ verbnamestart = ptr;
inverbname = TRUE;
}
else /* No verb "name" argument */
@@ -8503,7 +8507,7 @@ for (;; pptr++)
if (META_CODE(*gptr) == META_BIGVALUE) gptr++;
else if (*gptr == (META_CAPTURE | group)) break;
}
-
+
gptrend = parsed_skip(gptr, PSKIP_KET);
if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */
for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
@@ -8862,7 +8866,7 @@ if (pattern == NULL)
*errorptr = ERR16;
return NULL;
}
-
+
/* Check that all undefined public option bits are zero. */
if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
diff --git a/testdata/testinput10 b/testdata/testinput10
index 4b80778..a1806ae 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -454,4 +454,6 @@
\= Expect no match
123
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
+
# End of testinput10
diff --git a/testdata/testinput9 b/testdata/testinput9
index 9a26f5f..7be4b15 100644
--- a/testdata/testinput9
+++ b/testdata/testinput9
@@ -258,4 +258,6 @@
/(*MARK:a\x{100}b)z/alt_verbnames
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
+
# End of testinput9
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 0c1e9b2..3c35f0b 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1564,4 +1564,7 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
123
No match
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
+Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+
# End of testinput10
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 99ee77a..6b014e5 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -364,4 +364,7 @@ Failed: error 177 at offset 7: character code point value in \u.... sequence is
/(*MARK:a\x{100}b)z/alt_verbnames
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
+/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
+Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+
# End of testinput9