summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-11-27 17:03:58 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-11-27 17:03:58 +0000
commit3fa0331bacd0e7e6f2b273c6e762002a4e1711e2 (patch)
treeb208d60a5ebcc83c9fd1c9d16e8668668616c0ce
parent1ed357073a113d6f10c6fc38d2c098743afd64e0 (diff)
downloadpcre2-3fa0331bacd0e7e6f2b273c6e762002a4e1711e2.tar.gz
Fix negated POSIX class bug.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@448 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog3
-rw-r--r--src/pcre2_compile.c48
2 files changed, 27 insertions, 24 deletions
diff --git a/ChangeLog b/ChangeLog
index 1e60a78..58ac521 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -337,6 +337,9 @@ misbehaved. This bug was found by the LLVM fuzzer.
100. The error for an invalid UTF pattern string always gave the code unit
offset as zero instead of where the invalidity was found.
+101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not
+working correctly in UCP mode.
+
Version 10.20 30-June-2015
--------------------------
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 453e206..64fac22 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -3857,7 +3857,7 @@ for (;; ptr++)
{
BOOL negate_class;
BOOL should_flip_negation;
- BOOL match_all_wide_chars;
+ BOOL match_all_or_no_wide_chars;
BOOL possessive_quantifier;
BOOL is_quantifier;
BOOL is_recurse;
@@ -4207,9 +4207,10 @@ for (;; ptr++)
/* If a non-extended class contains a negative special such as \S, we need
to flip the negation flag at the end, so that support for characters > 255
works correctly (they are all included in the class). An extended class may
- need to insert specific matching code for wide characters. */
+ need to insert specific matching or non-matching code for wide characters.
+ */
- should_flip_negation = match_all_wide_chars = FALSE;
+ should_flip_negation = match_all_or_no_wide_chars = FALSE;
/* Extended class (xclass) will be used when characters > 255
might match. */
@@ -4365,21 +4366,20 @@ for (;; ptr++)
/* For the other POSIX classes (ascii, xdigit) we are going to fall
through to the non-UCP case and build a bit map for characters with
- code points less than 256. If we are in a negated POSIX class
- within a non-negated overall class, characters with code points
- greater than 255 must all match. In the special case where we have
- not yet generated any xclass data, and this is the final item in
- the overall class, we need do nothing: later on, the opcode
- OP_NCLASS will be used to indicate that characters greater than 255
- are acceptable. If we have already seen an xclass item or one may
- follow (we have to assume that it might if this is not the end of
- the class), set a flag to cause the generation of an explicit range
- for all wide codepoints. */
+ code points less than 256. However, if we are in a negated POSIX
+ class, characters with code points greater than 255 must either all
+ match or all not match, depending on whether the whole class is not
+ or is negated. For example, for [[:^ascii:]... they must all match,
+ whereas for [^[:^xdigit:]... they must not.
+
+ In the special case where there are no xclass items, this is
+ automatically handled by the use of OP_CLASS or OP_NCLASS, but an
+ explicit range is needed for OP_XCLASS. Setting a flag here causes
+ the range to be generated later when it is known that OP_XCLASS is
+ required. */
default:
- if (!negate_class && local_negate &&
- (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
- match_all_wide_chars = TRUE;
+ match_all_or_no_wide_chars |= local_negate;
break;
}
}
@@ -4878,13 +4878,14 @@ for (;; ptr++)
(\p or \P), we have to compile an extended class, with its own opcode,
unless there were no property settings and there was a negated special such
as \S in the class, and PCRE2_UCP is not set, because in that case all
- characters > 255 are in the class, so any that were explicitly given as
- well can be ignored.
+ characters > 255 are in or not in the class, so any that were explicitly
+ given as well can be ignored.
In the UCP case, if certain negated POSIX classes ([:^ascii:] or
- {^:xdigit:]) were present in a non-negative class, we again have to match
- all wide characters, indicated by match_all_wide_chars being true. We do
- this by including an explicit range.
+ [^:xdigit:]) were present in a class, we either have to match or not match
+ all wide characters (depending on whether the whole class is or is not
+ negated). This requirement is indicated by match_all_or_no_wide_chars being
+ true. We do this by including an explicit range, which works in both cases.
If, when generating an xclass, there are no characters < 256, we can omit
the bitmap in the actual compiled code. */
@@ -4897,12 +4898,11 @@ for (;; ptr++)
if (xclass && (xclass_has_prop || !should_flip_negation))
#endif
{
- if (match_all_wide_chars)
+ if (match_all_or_no_wide_chars)
{
*class_uchardata++ = XCL_RANGE;
class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
- class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT,
- class_uchardata);
+ class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata);
}
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS;