summaryrefslogtreecommitdiff
path: root/pcre/pcre_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'pcre/pcre_compile.c')
-rw-r--r--pcre/pcre_compile.c80
1 files changed, 56 insertions, 24 deletions
diff --git a/pcre/pcre_compile.c b/pcre/pcre_compile.c
index 7cd39501230..de92313e2f8 100644
--- a/pcre/pcre_compile.c
+++ b/pcre/pcre_compile.c
@@ -5579,6 +5579,34 @@ for (;; ptr++)
#endif
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
{
+ /* For non-UCP wide characters, in a non-negative class containing \S or
+ similar (should_flip_negation is set), all characters greater than 255
+ must be in the class. */
+
+ if (
+#if defined COMPILE_PCRE8
+ utf &&
+#endif
+ should_flip_negation && !negate_class && (options & PCRE_UCP) == 0)
+ {
+ *class_uchardata++ = XCL_RANGE;
+ if (utf) /* Will always be utf in the 8-bit library */
+ {
+ class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+ }
+ else /* Can only happen for the 16-bit & 32-bit libraries */
+ {
+#if defined COMPILE_PCRE16
+ *class_uchardata++ = 0x100;
+ *class_uchardata++ = 0xffffu;
+#elif defined COMPILE_PCRE32
+ *class_uchardata++ = 0x100;
+ *class_uchardata++ = 0xffffffffu;
+#endif
+ }
+ }
+
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS;
code += LINK_SIZE;
@@ -6923,7 +6951,8 @@ for (;; ptr++)
slot = cd->name_table;
for (i = 0; i < cd->names_found; i++)
{
- if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break;
+ if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
+ slot[IMM2_SIZE+namelen] == 0) break;
slot += cd->name_entry_size;
}
@@ -7889,15 +7918,17 @@ for (;; ptr++)
}
}
- /* For a forward assertion, we take the reqchar, if set. This can be
- helpful if the pattern that follows the assertion doesn't set a different
- char. For example, it's useful for /(?=abcde).+/. We can't set firstchar
- for an assertion, however because it leads to incorrect effect for patterns
- such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead
- of a firstchar. This is overcome by a scan at the end if there's no
- firstchar, looking for an asserted first char. */
-
- else if (bravalue == OP_ASSERT && subreqcharflags >= 0)
+ /* For a forward assertion, we take the reqchar, if set, provided that the
+ group has also set a first char. This can be helpful if the pattern that
+ follows the assertion doesn't set a different char. For example, it's
+ useful for /(?=abcde).+/. We can't set firstchar for an assertion, however
+ because it leads to incorrect effect for patterns such as /(?=a)a.+/ when
+ the "real" "a" would then become a reqchar instead of a firstchar. This is
+ overcome by a scan at the end if there's no firstchar, looking for an
+ asserted first char. */
+
+ else if (bravalue == OP_ASSERT && subreqcharflags >= 0 &&
+ subfirstcharflags >= 0)
{
reqchar = subreqchar;
reqcharflags = subreqcharflags;
@@ -8686,8 +8717,8 @@ matching and for non-DOTALL patterns that start with .* (which must start at
the beginning or after \n). As in the case of is_anchored() (see above), we
have to take account of back references to capturing brackets that contain .*
because in that case we can't make the assumption. Also, the appearance of .*
-inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not
-count, because once again the assumption no longer holds.
+inside atomic brackets or in an assertion, or in a pattern that contains *PRUNE
+or *SKIP does not count, because once again the assumption no longer holds.
Arguments:
code points to start of expression (the bracket)
@@ -8696,13 +8727,14 @@ Arguments:
the less precise approach
cd points to the compile data
atomcount atomic group level
+ inassert TRUE if in an assertion
Returns: TRUE or FALSE
*/
static BOOL
is_startline(const pcre_uchar *code, unsigned int bracket_map,
- compile_data *cd, int atomcount)
+ compile_data *cd, int atomcount, BOOL inassert)
{
do {
const pcre_uchar *scode = first_significant_code(
@@ -8729,7 +8761,7 @@ do {
return FALSE;
default: /* Assertion */
- if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
+ if (!is_startline(scode, bracket_map, cd, atomcount, TRUE)) return FALSE;
do scode += GET(scode, 1); while (*scode == OP_ALT);
scode += 1 + LINK_SIZE;
break;
@@ -8743,7 +8775,7 @@ do {
if (op == OP_BRA || op == OP_BRAPOS ||
op == OP_SBRA || op == OP_SBRAPOS)
{
- if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
+ if (!is_startline(scode, bracket_map, cd, atomcount, inassert)) return FALSE;
}
/* Capturing brackets */
@@ -8753,33 +8785,33 @@ do {
{
int n = GET2(scode, 1+LINK_SIZE);
int new_map = bracket_map | ((n < 32)? (1 << n) : 1);
- if (!is_startline(scode, new_map, cd, atomcount)) return FALSE;
+ if (!is_startline(scode, new_map, cd, atomcount, inassert)) return FALSE;
}
/* Positive forward assertions */
else if (op == OP_ASSERT)
{
- if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE;
+ if (!is_startline(scode, bracket_map, cd, atomcount, TRUE)) return FALSE;
}
/* Atomic brackets */
else if (op == OP_ONCE || op == OP_ONCE_NC)
{
- if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE;
+ if (!is_startline(scode, bracket_map, cd, atomcount + 1, inassert)) return FALSE;
}
/* .* means "start at start or after \n" if it isn't in atomic brackets or
- brackets that may be referenced, as long as the pattern does not contain
- *PRUNE or *SKIP, because these break the feature. Consider, for example,
- /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the
- start of a line. */
+ brackets that may be referenced or an assertion, as long as the pattern does
+ not contain *PRUNE or *SKIP, because these break the feature. Consider, for
+ example, /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e.
+ not at the start of a line. */
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
{
if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 ||
- atomcount > 0 || cd->had_pruneorskip)
+ atomcount > 0 || cd->had_pruneorskip || inassert)
return FALSE;
}
@@ -9634,7 +9666,7 @@ if ((re->options & PCRE_ANCHORED) == 0)
re->flags |= PCRE_FIRSTSET;
}
- else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE;
+ else if (is_startline(codestart, 0, cd, 0, FALSE)) re->flags |= PCRE_STARTLINE;
}
}