summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-12-02 17:39:26 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-12-02 17:39:26 +0000
commit461d23f4e56605c6db3e53720dcaf6fba26319c2 (patch)
tree199a21d1df662bfcac08cc9e28ee3563402a5e93
parentf6728aeb09032c682dbb8d08feca66d3a5b0a96c (diff)
downloadpcre2-461d23f4e56605c6db3e53720dcaf6fba26319c2.tar.gz
Fix issues with NULL characters in patterns.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@455 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog8
-rw-r--r--src/pcre2_compile.c18
-rw-r--r--src/pcre2_printint.c33
-rw-r--r--testdata/testinput220
-rw-r--r--testdata/testoutput262
5 files changed, 122 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index 332b3c2..3bc48ef 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -358,6 +358,14 @@ other verb "name" ended with whitespace immediately before the closing
parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when
both those options were set.
+107. In a number of places pcre2_compile() was not handling NULL characters
+correctly, and pcre2test with the "bincode" modifier was not always correctly
+displaying fields containing NULLS:
+
+ (a) Within /x extended #-comments
+ (b) Within the "name" part of (*MARK) and other *verbs
+ (c) Within the text argument of a callout
+
Version 10.20 30-June-2015
--------------------------
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 3e92436..373d3fd 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -3017,12 +3017,12 @@ for (; ptr < cb->end_pattern; ptr++)
if ((options & PCRE2_EXTENDED) != 0)
{
- PCRE2_SPTR wscptr = ptr;
+ PCRE2_SPTR wscptr = ptr;
while (MAX_255(x) && (cb->ctypes[x] & ctype_space) != 0) x = *(++ptr);
if (x == CHAR_NUMBER_SIGN)
- {
+ {
ptr++;
- while (*ptr != CHAR_NULL)
+ while (*ptr != CHAR_NULL || ptr < cb->end_pattern)
{
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
{ /* IS_NEWLINE sets cb->nllen. */
@@ -3034,10 +3034,10 @@ for (; ptr < cb->end_pattern; ptr++)
if (utf) FORWARDCHAR(ptr);
#endif
}
- }
-
+ }
+
/* If we have skipped any characters, restart the loop. */
-
+
if (ptr > wscptr)
{
ptr--;
@@ -4008,7 +4008,7 @@ for (;; ptr++)
if (c == CHAR_NUMBER_SIGN)
{
ptr++;
- while (*ptr != CHAR_NULL)
+ while (ptr < cb->end_pattern)
{
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
{ /* IS_NEWLINE sets cb->nllen. */
@@ -5044,7 +5044,7 @@ for (;; ptr++)
while (MAX_255(*p) && (cb->ctypes[*p] & ctype_space) != 0) p++;
if (*p != CHAR_NUMBER_SIGN) break;
p++;
- while (*p != CHAR_NULL)
+ while (ptr < cb->end_pattern)
{
if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */
{ /* IS_NEWLINE sets cb->nllen. */
@@ -5832,7 +5832,7 @@ for (;; ptr++)
if ((options & PCRE2_ALT_VERBNAMES) == 0)
{
arglen = 0;
- while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS)
+ while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
{
ptr++; /* Check length as we go */
arglen++; /* along, to avoid the */
diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c
index 2cd01ab..6d4fe60 100644
--- a/src/pcre2_printint.c
+++ b/src/pcre2_printint.c
@@ -58,12 +58,13 @@ static const char *OP_names[] = { OP_NAME_LIST };
/* The functions and tables herein must all have mode-dependent names. */
-#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
-#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
-#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
-#define print_char PCRE2_SUFFIX(print_char_)
-#define print_custring PCRE2_SUFFIX(print_custring_)
-#define print_prop PCRE2_SUFFIX(print_prop_)
+#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
+#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
+#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
+#define print_char PCRE2_SUFFIX(print_char_)
+#define print_custring PCRE2_SUFFIX(print_custring_)
+#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
+#define print_prop PCRE2_SUFFIX(print_prop_)
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre2_internal.h.
@@ -188,12 +189,14 @@ return 0;
* Print string as a list of code units *
*************************************************/
-/* This takes no account of UTF as it always prints each individual code unit.
-The string is zero-terminated.
+/* These take no account of UTF as they always print each individual code unit.
+The string is zero-terminated for print_custring(); the length is given for
+print_custring_bylen().
Arguments:
f file to write to
ptr point to the string
+ len length for print_custring_bylen()
Returns: nothing
*/
@@ -208,6 +211,16 @@ while (*ptr != '\0')
}
}
+static void
+print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
+{
+while (len-- > 0)
+ {
+ register uint32_t c = *ptr++;
+ if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
+ }
+}
+
/*************************************************
@@ -603,7 +616,7 @@ for(;;)
c = code[1 + 4*LINK_SIZE];
fprintf(f, " %s %c", OP_names[*code], c);
extra = GET(code, 1 + 2*LINK_SIZE);
- print_custring(f, code + 2 + 4*LINK_SIZE);
+ print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
if (c == PRIV(callout_start_delims)[i])
{
@@ -791,7 +804,7 @@ for(;;)
case OP_SKIP_ARG:
case OP_THEN_ARG:
fprintf(f, " %s ", OP_names[*code]);
- print_custring(f, code + 2);
+ print_custring_bylen(f, code + 2, code[1]);
extra += code[1];
break;
diff --git a/testdata/testinput2 b/testdata/testinput2
index eb6b9e4..8b85d53 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4704,4 +4704,24 @@ a)"xI
/\x8a+f|;T?(*:;.'?`(\xeap ){![^()!y*''C*(?';]{1;(\x08)/B,alt_verbnames,dupnames,extended
+# Tests for NULL characters in comments and verb "names" and callouts
+
+# /A#B\x00C\x0aZ/
+/41 23 42 00 43 0a 5a/Bx,hex
+
+# /A+#B\x00C\x0a+/
+/41 2b 23 42 00 43 0a 2b/Bx,hex
+
+# /A(*:B\x00W#X\00Y\x0aC)Z/
+/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames
+
+# /A(*:B\x00W#X\00Y\x0aC)Z/
+/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex
+
+# /A(?C{X\x00Y})B/
+/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex
+
+# /A(?#X\x00Y)B/
+/41 28 3f 23 7b 00 7d 29 42/B,hex
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 698c44d..50993c8 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14998,4 +14998,66 @@ Subject length lower bound = 0
End
------------------------------------------------------------------
+# Tests for NULL characters in comments and verb "names" and callouts
+
+# /A#B\x00C\x0aZ/
+/41 23 42 00 43 0a 5a/Bx,hex
+------------------------------------------------------------------
+ Bra
+ AZ
+ Ket
+ End
+------------------------------------------------------------------
+
+# /A+#B\x00C\x0a+/
+/41 2b 23 42 00 43 0a 2b/Bx,hex
+------------------------------------------------------------------
+ Bra
+ A++
+ Ket
+ End
+------------------------------------------------------------------
+
+# /A(*:B\x00W#X\00Y\x0aC)Z/
+/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames
+------------------------------------------------------------------
+ Bra
+ A
+ *MARK B\x{0}WC
+ Z
+ Ket
+ End
+------------------------------------------------------------------
+
+# /A(*:B\x00W#X\00Y\x0aC)Z/
+/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex
+------------------------------------------------------------------
+ Bra
+ A
+ *MARK B\x{0}W#X\x{0}Y\x{a}C
+ Z
+ Ket
+ End
+------------------------------------------------------------------
+
+# /A(?C{X\x00Y})B/
+/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex
+------------------------------------------------------------------
+ Bra
+ A
+ CalloutStr {X\x{0}Y} 5 10 1
+ B
+ Ket
+ End
+------------------------------------------------------------------
+
+# /A(?#X\x00Y)B/
+/41 28 3f 23 7b 00 7d 29 42/B,hex
+------------------------------------------------------------------
+ Bra
+ AB
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput2