summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2017-01-11 16:40:35 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2017-01-11 16:40:35 +0000
commit0c89beea88e6d30f292a369e6e16f5e1f11446c8 (patch)
tree6de48203f30893a344633444b489102cf57dccc4
parent1417100e1030a31dd4a9488467d27b899ea6a03b (diff)
downloadpcre2-0c89beea88e6d30f292a369e6e16f5e1f11446c8.tar.gz
Fix hyphen after \E after POSIX class causing an error.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@649 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog3
-rw-r--r--src/pcre2_compile.c52
-rw-r--r--testdata/testinput16
-rw-r--r--testdata/testoutput18
4 files changed, 47 insertions, 22 deletions
diff --git a/ChangeLog b/ChangeLog
index 8f7f3e7..cd7a470 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -125,6 +125,9 @@ fully released code, but are noted here for the record.
(r) If a character whose code point was greater than 0xffff appeared within
a lookbehind that was within another lookbehind, the calculation of the
lookbehind length went wrong and could provoke an internal error.
+
+ (t) The sequence \E- or \Q\E- after a POSIX class in a character class caused
+ an internal error. Now the hyphen is treated as a literal.
4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index c2a8688..93326f8 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -3010,6 +3010,14 @@ while (ptr < ptrend)
goto FAILED;
}
+ /* Set "a hyphen is not the start of a range" just in case the POSIX
+ class is followed by \E or \Q\E (possibly repeated - fuzzers do that
+ kind of thing) and *then* a hyphen. This causes that hyphen to be
+ treated as a literal. I don't think it's worth setting up special
+ apparatus to do otherwise. */
+
+ class_range_state = RANGE_NO;
+
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
use Unicode properties \p or \P or, in one case, \h or \H. The
substitutes table has two values per class, containing the type and
@@ -4224,10 +4232,10 @@ return 0;
/* This function packages up the logic of adding a character or range of
characters to a class. The character values in the arguments will be within the
-valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
-called only from within the "add to class" group of functions, some of which
-are recursive and mutually recursive. The external entry point is
-add_to_class().
+valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
+called only from within the "add to class" group of functions, some of which
+are recursive and mutually recursive. The external entry point is
+add_to_class().
Arguments:
classbits the bit map for characters < 256
@@ -4242,7 +4250,7 @@ Returns: the number of < 256 characters added
*/
static unsigned int
-add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
+add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
uint32_t options, compile_block *cb, uint32_t start, uint32_t end)
{
uint32_t c;
@@ -4307,7 +4315,7 @@ can be used in all cases. */
if ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR)
end = MAX_NON_UTF_CHAR;
-
+
if (start > cb->class_range_start && end < cb->class_range_end) return n8;
/* Use the bitmap for characters < 256. Otherwise use extra data.*/
@@ -4380,8 +4388,8 @@ return n8; /* Number of 8-bit characters */
/* This function is used for adding a list of case-equivalent characters to a
class, and also for adding a list of horizontal or vertical whitespace. If the
list is in order (which it should be), ranges of characters are detected and
-handled appropriately. This function is called (sometimes recursively) only
-from within the "add to class" set of functions. The external entry point is
+handled appropriately. This function is called (sometimes recursively) only
+from within the "add to class" set of functions. The external entry point is
add_list_to_class().
Arguments:
@@ -4399,7 +4407,7 @@ Returns: the number of < 256 characters added
*/
static unsigned int
-add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
+add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except)
{
unsigned int n8 = 0;
@@ -4422,7 +4430,7 @@ return n8;
* External entry point for add range to class *
*************************************************/
-/* This function sets the overall range so that the internal functions can try
+/* This function sets the overall range so that the internal functions can try
to avoid duplication when handling case-independence.
Arguments:
@@ -4451,7 +4459,7 @@ return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
* External entry point for add list to class *
*************************************************/
-/* This function sets the overall range so that the internal functions can try
+/* This function sets the overall range so that the internal functions can try
to avoid duplication when handling case-independence.
Arguments:
@@ -4480,7 +4488,7 @@ while (p[0] < NOTACHAR)
{
while(p[n+1] == p[0] + n + 1) n++;
cb->class_range_start = p[0];
- cb->class_range_end = p[n];
+ cb->class_range_end = p[n];
n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
}
p += n + 1;
@@ -4736,7 +4744,7 @@ for (;; pptr++)
meta = META_CODE(*pptr);
meta_arg = META_DATA(*pptr);
-
+
/* If we are in the pre-compile phase, accumulate the length used for the
previous cycle of this loop, unless the next item is a quantifier. */
@@ -5148,30 +5156,30 @@ for (;; pptr++)
should_flip_negation = TRUE;
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
break;
-
- /* When adding the horizontal or vertical space lists to a class, or
- their complements, disable PCRE2_CASELESS, because it justs wastes
- time, and in the "not-x" UTF cases can create unwanted duplicates in
- the XCLASS list (provoked by characters that have more than one other
+
+ /* When adding the horizontal or vertical space lists to a class, or
+ their complements, disable PCRE2_CASELESS, because it justs wastes
+ time, and in the "not-x" UTF cases can create unwanted duplicates in
+ the XCLASS list (provoked by characters that have more than one other
case and by both cases being in the same "not-x" sublist). */
case ESC_h:
- (void)add_list_to_class(classbits, &class_uchardata,
+ (void)add_list_to_class(classbits, &class_uchardata,
options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR);
break;
case ESC_H:
- (void)add_not_list_to_class(classbits, &class_uchardata,
+ (void)add_not_list_to_class(classbits, &class_uchardata,
options & ~PCRE2_CASELESS, cb, PRIV(hspace_list));
break;
case ESC_v:
- (void)add_list_to_class(classbits, &class_uchardata,
+ (void)add_list_to_class(classbits, &class_uchardata,
options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR);
break;
case ESC_V:
- (void)add_not_list_to_class(classbits, &class_uchardata,
+ (void)add_not_list_to_class(classbits, &class_uchardata,
options & ~PCRE2_CASELESS, cb, PRIV(vspace_list));
break;
diff --git a/testdata/testinput1 b/testdata/testinput1
index 12a7e82..08a9bcc 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -5820,4 +5820,10 @@ ef) x/x,mark
/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
+/[s[:digit:]\E-H]+/
+ s09-H
+
+/[s[:digit:]\Q\E-H]+/
+ s09-H
+
# End of testinput1
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index ec47dcd..d07b657 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -9297,4 +9297,12 @@ No match
/(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
+/[s[:digit:]\E-H]+/
+ s09-H
+ 0: s09-H
+
+/[s[:digit:]\Q\E-H]+/
+ s09-H
+ 0: s09-H
+
# End of testinput1