diff options
Diffstat (limited to 'pcre/pcre_compile.c')
-rw-r--r-- | pcre/pcre_compile.c | 80 |
1 files changed, 56 insertions, 24 deletions
diff --git a/pcre/pcre_compile.c b/pcre/pcre_compile.c index 7cd39501230..de92313e2f8 100644 --- a/pcre/pcre_compile.c +++ b/pcre/pcre_compile.c @@ -5579,6 +5579,34 @@ for (;; ptr++) #endif #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 { + /* For non-UCP wide characters, in a non-negative class containing \S or + similar (should_flip_negation is set), all characters greater than 255 + must be in the class. */ + + if ( +#if defined COMPILE_PCRE8 + utf && +#endif + should_flip_negation && !negate_class && (options & PCRE_UCP) == 0) + { + *class_uchardata++ = XCL_RANGE; + if (utf) /* Will always be utf in the 8-bit library */ + { + class_uchardata += PRIV(ord2utf)(0x100, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); + } + else /* Can only happen for the 16-bit & 32-bit libraries */ + { +#if defined COMPILE_PCRE16 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffu; +#elif defined COMPILE_PCRE32 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffffffu; +#endif + } + } + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ *code++ = OP_XCLASS; code += LINK_SIZE; @@ -6923,7 +6951,8 @@ for (;; ptr++) slot = cd->name_table; for (i = 0; i < cd->names_found; i++) { - if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break; + if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 && + slot[IMM2_SIZE+namelen] == 0) break; slot += cd->name_entry_size; } @@ -7889,15 +7918,17 @@ for (;; ptr++) } } - /* For a forward assertion, we take the reqchar, if set. This can be - helpful if the pattern that follows the assertion doesn't set a different - char. For example, it's useful for /(?=abcde).+/. We can't set firstchar - for an assertion, however because it leads to incorrect effect for patterns - such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead - of a firstchar. This is overcome by a scan at the end if there's no - firstchar, looking for an asserted first char. */ - - else if (bravalue == OP_ASSERT && subreqcharflags >= 0) + /* For a forward assertion, we take the reqchar, if set, provided that the + group has also set a first char. This can be helpful if the pattern that + follows the assertion doesn't set a different char. For example, it's + useful for /(?=abcde).+/. We can't set firstchar for an assertion, however + because it leads to incorrect effect for patterns such as /(?=a)a.+/ when + the "real" "a" would then become a reqchar instead of a firstchar. This is + overcome by a scan at the end if there's no firstchar, looking for an + asserted first char. */ + + else if (bravalue == OP_ASSERT && subreqcharflags >= 0 && + subfirstcharflags >= 0) { reqchar = subreqchar; reqcharflags = subreqcharflags; @@ -8686,8 +8717,8 @@ matching and for non-DOTALL patterns that start with .* (which must start at the beginning or after \n). As in the case of is_anchored() (see above), we have to take account of back references to capturing brackets that contain .* because in that case we can't make the assumption. Also, the appearance of .* -inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not -count, because once again the assumption no longer holds. +inside atomic brackets or in an assertion, or in a pattern that contains *PRUNE +or *SKIP does not count, because once again the assumption no longer holds. Arguments: code points to start of expression (the bracket) @@ -8696,13 +8727,14 @@ Arguments: the less precise approach cd points to the compile data atomcount atomic group level + inassert TRUE if in an assertion Returns: TRUE or FALSE */ static BOOL is_startline(const pcre_uchar *code, unsigned int bracket_map, - compile_data *cd, int atomcount) + compile_data *cd, int atomcount, BOOL inassert) { do { const pcre_uchar *scode = first_significant_code( @@ -8729,7 +8761,7 @@ do { return FALSE; default: /* Assertion */ - if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; + if (!is_startline(scode, bracket_map, cd, atomcount, TRUE)) return FALSE; do scode += GET(scode, 1); while (*scode == OP_ALT); scode += 1 + LINK_SIZE; break; @@ -8743,7 +8775,7 @@ do { if (op == OP_BRA || op == OP_BRAPOS || op == OP_SBRA || op == OP_SBRAPOS) { - if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; + if (!is_startline(scode, bracket_map, cd, atomcount, inassert)) return FALSE; } /* Capturing brackets */ @@ -8753,33 +8785,33 @@ do { { int n = GET2(scode, 1+LINK_SIZE); int new_map = bracket_map | ((n < 32)? (1 << n) : 1); - if (!is_startline(scode, new_map, cd, atomcount)) return FALSE; + if (!is_startline(scode, new_map, cd, atomcount, inassert)) return FALSE; } /* Positive forward assertions */ else if (op == OP_ASSERT) { - if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; + if (!is_startline(scode, bracket_map, cd, atomcount, TRUE)) return FALSE; } /* Atomic brackets */ else if (op == OP_ONCE || op == OP_ONCE_NC) { - if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE; + if (!is_startline(scode, bracket_map, cd, atomcount + 1, inassert)) return FALSE; } /* .* means "start at start or after \n" if it isn't in atomic brackets or - brackets that may be referenced, as long as the pattern does not contain - *PRUNE or *SKIP, because these break the feature. Consider, for example, - /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the - start of a line. */ + brackets that may be referenced or an assertion, as long as the pattern does + not contain *PRUNE or *SKIP, because these break the feature. Consider, for + example, /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. + not at the start of a line. */ else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) { if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 || - atomcount > 0 || cd->had_pruneorskip) + atomcount > 0 || cd->had_pruneorskip || inassert) return FALSE; } @@ -9634,7 +9666,7 @@ if ((re->options & PCRE_ANCHORED) == 0) re->flags |= PCRE_FIRSTSET; } - else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE; + else if (is_startline(codestart, 0, cd, 0, FALSE)) re->flags |= PCRE_STARTLINE; } } |