summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-05-04 13:03:39 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-05-04 13:03:39 +0000
commit9ea15e49861810c711ba92ea3faaad38d3492015 (patch)
tree50a03241f173399598bd7b321ac83a02c781adba
parent0c6344b222dca94ccf3895e72035845c7abfba45 (diff)
downloadpcre-9ea15e49861810c711ba92ea3faaad38d3492015.tar.gz
Check for overlong name in (*MARK) etc.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@964 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rw-r--r--doc/pcreapi.35
-rw-r--r--doc/pcrelimits.37
-rw-r--r--doc/pcrepattern.313
-rw-r--r--pcre_compile.c9
-rw-r--r--pcre_internal.h7
-rw-r--r--pcreposix.c4
-rw-r--r--testdata/testinput146
-rw-r--r--testdata/testinput176
-rw-r--r--testdata/testoutput148
-rw-r--r--testdata/testoutput1710
11 files changed, 65 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 8cce64e..84d9ce6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -107,6 +107,9 @@ Version 8.31
28. To catch bugs like 27 using valgrind, when pcretest is asked to specify an
ovector size, it uses memory at the end of the block that it has got.
+
+29. Check for an overlong MARK name and give an error at compile time. The
+ limit is 255 for the 8-bit library and 65535 for the 16-bit library.
Version 8.30 04-February-2012
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index 87e7faa..59c9a34 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "19 April 2012" "PCRE 8.31"
+.TH PCREAPI 3 "04 May 2012" "PCRE 8.31"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
@@ -926,6 +926,7 @@ fallen out of use. To avoid confusion, they have not been re-used.
72 too many forward references
73 disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
74 invalid UTF-16 string (specifically UTF-16)
+ 75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
.sp
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
be used if the limits were changed when PCRE was built.
@@ -2665,6 +2666,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 19 April 2012
+Last updated: 04 May 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/doc/pcrelimits.3 b/doc/pcrelimits.3
index 83ff53b..0e25f82 100644
--- a/doc/pcrelimits.3
+++ b/doc/pcrelimits.3
@@ -1,4 +1,4 @@
-.TH PCRELIMITS 3 "13 January 2012" "PCRE 8.30"
+.TH PCRELIMITS 3 "04 May 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "SIZE AND OTHER LIMITATIONS"
@@ -32,6 +32,9 @@ the count. There is no limit to the number of backward references.
The maximum length of name for a named subpattern is 32 characters, and the
maximum number of named subpatterns is 10000.
.P
+The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
+is 255 for the 8-bit library and 65535 for the 16-bit library.
+.P
The maximum length of a subject string is the largest positive number that an
integer variable can hold. However, when using the traditional matching
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
@@ -58,6 +61,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 08 January 2012
+Last updated: 04 May 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index 4560c6d..d9ae47d 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -1,4 +1,4 @@
-.TH PCREPATTERN 3 "14 April 2012" "PCRE 8.31"
+.TH PCREPATTERN 3 "04 May 2012" "PCRE 8.31"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION DETAILS"
@@ -2605,10 +2605,11 @@ The new verbs make use of what was previously invalid syntax: an opening
parenthesis followed by an asterisk. They are generally of the form
(*VERB) or (*VERB:NAME). Some may take either form, with differing behaviour,
depending on whether or not an argument is present. A name is any sequence of
-characters that does not include a closing parenthesis. If the name is empty,
-that is, if the closing parenthesis immediately follows the colon, the effect
-is as if the colon were not there. Any number of these verbs may occur in a
-pattern.
+characters that does not include a closing parenthesis. The maximum length of
+name is 255 in the 8-bit library and 65535 in the 16-bit library. If the name
+is empty, that is, if the closing parenthesis immediately follows the colon,
+the effect is as if the colon were not there. Any number of these verbs may
+occur in a pattern.
.
.
.\" HTML <a name="nooptimize"></a>
@@ -2910,6 +2911,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 14 April 2012
+Last updated: 04 May 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/pcre_compile.c b/pcre_compile.c
index 07b8a00..3a11ada 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -489,6 +489,8 @@ static const char error_texts[] =
"too many forward references\0"
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
"invalid UTF-16 string\0"
+ /* 75 */
+ "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -5591,7 +5593,7 @@ for (;; ptr++)
ptr++;
while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
namelen = (int)(ptr - name);
-
+
/* It appears that Perl allows any characters whatsoever, other than
a closing parenthesis, to appear in arguments, so we no longer insist on
letters, digits, and underscores. */
@@ -5601,6 +5603,11 @@ for (;; ptr++)
arg = ++ptr;
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
arglen = (int)(ptr - arg);
+ if (arglen > (int)MAX_MARK)
+ {
+ *errorcodeptr = ERR75;
+ goto FAILED;
+ }
}
if (*ptr != CHAR_RIGHT_PARENTHESIS)
diff --git a/pcre_internal.h b/pcre_internal.h
index 3e434ae..a599e52 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -523,6 +523,11 @@ capturing parenthesis numbers in back references. */
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
+/* The maximum length of a MARK name is currently one data unit; it may be
+changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
+
+#define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)
+
/* When UTF encoding is being used, a character is no longer just a single
character. The macros for character handling generate simple sequences when
used in character-mode, and more complicated ones for UTF characters.
@@ -1940,7 +1945,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT };
+ ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
diff --git a/pcreposix.c b/pcreposix.c
index c1a9d1c..99cc410 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -158,7 +158,9 @@ static const int eint[] = {
REG_BADPAT, /* \N is not supported in a class */
REG_BADPAT, /* too many forward references */
REG_BADPAT, /* disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) */
- REG_BADPAT /* invalid UTF-16 string (should not occur) */
+ REG_BADPAT, /* invalid UTF-16 string (should not occur) */
+ /* 75 */
+ REG_BADPAT /* overlong MARK name */
};
/* Table of texts corresponding to POSIX error codes */
diff --git a/testdata/testinput14 b/testdata/testinput14
index 0db5346..5564276 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -314,4 +314,10 @@ not matter. --/
/\777/I
+/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K
+ XX
+
+/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
+ XX
+
/-- End of testinput14 --/
diff --git a/testdata/testinput17 b/testdata/testinput17
index 9600d70..691a49f 100644
--- a/testdata/testinput17
+++ b/testdata/testinput17
@@ -280,4 +280,10 @@
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi
+/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K
+ XX
+
+/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
+ XX
+
/-- End of testinput17 --/
diff --git a/testdata/testoutput14 b/testdata/testoutput14
index 7c889d0..6133b6e 100644
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@@ -453,4 +453,12 @@ Starting byte set: \x0a \x0b \x0c \x0d \x85
/\777/I
Failed: octal value is greater than \377 in 8-bit non-UTF-8 mode at offset 3
+/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K
+Failed: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) at offset 259
+
+/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
+ XX
+ 0: XX
+MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
+
/-- End of testinput14 --/
diff --git a/testdata/testoutput17 b/testdata/testoutput17
index 32fb295..e1a20d9 100644
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
@@ -506,4 +506,14 @@ Need char = \x{dd00}
End
------------------------------------------------------------------
+/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K
+ XX
+ 0: XX
+MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
+
+/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
+ XX
+ 0: XX
+MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
+
/-- End of testinput17 --/