diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-05-04 13:03:39 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-05-04 13:03:39 +0000 |
commit | 9ea15e49861810c711ba92ea3faaad38d3492015 (patch) | |
tree | 50a03241f173399598bd7b321ac83a02c781adba | |
parent | 0c6344b222dca94ccf3895e72035845c7abfba45 (diff) | |
download | pcre-9ea15e49861810c711ba92ea3faaad38d3492015.tar.gz |
Check for overlong name in (*MARK) etc.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@964 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | doc/pcreapi.3 | 5 | ||||
-rw-r--r-- | doc/pcrelimits.3 | 7 | ||||
-rw-r--r-- | doc/pcrepattern.3 | 13 | ||||
-rw-r--r-- | pcre_compile.c | 9 | ||||
-rw-r--r-- | pcre_internal.h | 7 | ||||
-rw-r--r-- | pcreposix.c | 4 | ||||
-rw-r--r-- | testdata/testinput14 | 6 | ||||
-rw-r--r-- | testdata/testinput17 | 6 | ||||
-rw-r--r-- | testdata/testoutput14 | 8 | ||||
-rw-r--r-- | testdata/testoutput17 | 10 |
11 files changed, 65 insertions, 13 deletions
@@ -107,6 +107,9 @@ Version 8.31 28. To catch bugs like 27 using valgrind, when pcretest is asked to specify an ovector size, it uses memory at the end of the block that it has got. + +29. Check for an overlong MARK name and give an error at compile time. The + limit is 255 for the 8-bit library and 65535 for the 16-bit library. Version 8.30 04-February-2012 diff --git a/doc/pcreapi.3 b/doc/pcreapi.3 index 87e7faa..59c9a34 100644 --- a/doc/pcreapi.3 +++ b/doc/pcreapi.3 @@ -1,4 +1,4 @@ -.TH PCREAPI 3 "19 April 2012" "PCRE 8.31" +.TH PCREAPI 3 "04 May 2012" "PCRE 8.31" .SH NAME PCRE - Perl-compatible regular expressions .sp @@ -926,6 +926,7 @@ fallen out of use. To avoid confusion, they have not been re-used. 72 too many forward references 73 disallowed Unicode code point (>= 0xd800 && <= 0xdfff) 74 invalid UTF-16 string (specifically UTF-16) + 75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) .sp The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may be used if the limits were changed when PCRE was built. @@ -2665,6 +2666,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 19 April 2012 +Last updated: 04 May 2012 Copyright (c) 1997-2012 University of Cambridge. .fi diff --git a/doc/pcrelimits.3 b/doc/pcrelimits.3 index 83ff53b..0e25f82 100644 --- a/doc/pcrelimits.3 +++ b/doc/pcrelimits.3 @@ -1,4 +1,4 @@ -.TH PCRELIMITS 3 "13 January 2012" "PCRE 8.30" +.TH PCRELIMITS 3 "04 May 2012" "PCRE 8.30" .SH NAME PCRE - Perl-compatible regular expressions .SH "SIZE AND OTHER LIMITATIONS" @@ -32,6 +32,9 @@ the count. There is no limit to the number of backward references. The maximum length of name for a named subpattern is 32 characters, and the maximum number of named subpatterns is 10000. .P +The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb +is 255 for the 8-bit library and 65535 for the 16-bit library. +.P The maximum length of a subject string is the largest positive number that an integer variable can hold. However, when using the traditional matching function, PCRE uses recursion to handle subpatterns and indefinite repetition. @@ -58,6 +61,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 08 January 2012 +Last updated: 04 May 2012 Copyright (c) 1997-2012 University of Cambridge. .fi diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 index 4560c6d..d9ae47d 100644 --- a/doc/pcrepattern.3 +++ b/doc/pcrepattern.3 @@ -1,4 +1,4 @@ -.TH PCREPATTERN 3 "14 April 2012" "PCRE 8.31" +.TH PCREPATTERN 3 "04 May 2012" "PCRE 8.31" .SH NAME PCRE - Perl-compatible regular expressions .SH "PCRE REGULAR EXPRESSION DETAILS" @@ -2605,10 +2605,11 @@ The new verbs make use of what was previously invalid syntax: an opening parenthesis followed by an asterisk. They are generally of the form (*VERB) or (*VERB:NAME). Some may take either form, with differing behaviour, depending on whether or not an argument is present. A name is any sequence of -characters that does not include a closing parenthesis. If the name is empty, -that is, if the closing parenthesis immediately follows the colon, the effect -is as if the colon were not there. Any number of these verbs may occur in a -pattern. +characters that does not include a closing parenthesis. The maximum length of +name is 255 in the 8-bit library and 65535 in the 16-bit library. If the name +is empty, that is, if the closing parenthesis immediately follows the colon, +the effect is as if the colon were not there. Any number of these verbs may +occur in a pattern. . . .\" HTML <a name="nooptimize"></a> @@ -2910,6 +2911,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 14 April 2012 +Last updated: 04 May 2012 Copyright (c) 1997-2012 University of Cambridge. .fi diff --git a/pcre_compile.c b/pcre_compile.c index 07b8a00..3a11ada 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -489,6 +489,8 @@ static const char error_texts[] = "too many forward references\0" "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" "invalid UTF-16 string\0" + /* 75 */ + "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" ; /* Table to identify digits and hex digits. This is used when compiling @@ -5591,7 +5593,7 @@ for (;; ptr++) ptr++; while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; namelen = (int)(ptr - name); - + /* It appears that Perl allows any characters whatsoever, other than a closing parenthesis, to appear in arguments, so we no longer insist on letters, digits, and underscores. */ @@ -5601,6 +5603,11 @@ for (;; ptr++) arg = ++ptr; while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; arglen = (int)(ptr - arg); + if (arglen > (int)MAX_MARK) + { + *errorcodeptr = ERR75; + goto FAILED; + } } if (*ptr != CHAR_RIGHT_PARENTHESIS) diff --git a/pcre_internal.h b/pcre_internal.h index 3e434ae..a599e52 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -523,6 +523,11 @@ capturing parenthesis numbers in back references. */ #define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE +/* The maximum length of a MARK name is currently one data unit; it may be +changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */ + +#define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1) + /* When UTF encoding is being used, a character is no longer just a single character. The macros for character handling generate simple sequences when used in character-mode, and more complicated ones for UTF characters. @@ -1940,7 +1945,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, - ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT }; + ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERRCOUNT }; /* JIT compiling modes. The function list is indexed by them. */ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, diff --git a/pcreposix.c b/pcreposix.c index c1a9d1c..99cc410 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -158,7 +158,9 @@ static const int eint[] = { REG_BADPAT, /* \N is not supported in a class */ REG_BADPAT, /* too many forward references */ REG_BADPAT, /* disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) */ - REG_BADPAT /* invalid UTF-16 string (should not occur) */ + REG_BADPAT, /* invalid UTF-16 string (should not occur) */ + /* 75 */ + REG_BADPAT /* overlong MARK name */ }; /* Table of texts corresponding to POSIX error codes */ diff --git a/testdata/testinput14 b/testdata/testinput14 index 0db5346..5564276 100644 --- a/testdata/testinput14 +++ b/testdata/testinput14 @@ -314,4 +314,10 @@ not matter. --/ /\777/I +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K + XX + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K + XX + /-- End of testinput14 --/ diff --git a/testdata/testinput17 b/testdata/testinput17 index 9600d70..691a49f 100644 --- a/testdata/testinput17 +++ b/testdata/testinput17 @@ -280,4 +280,10 @@ /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/BZi +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K + XX + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K + XX + /-- End of testinput17 --/ diff --git a/testdata/testoutput14 b/testdata/testoutput14 index 7c889d0..6133b6e 100644 --- a/testdata/testoutput14 +++ b/testdata/testoutput14 @@ -453,4 +453,12 @@ Starting byte set: \x0a \x0b \x0c \x0d \x85 /\777/I Failed: octal value is greater than \377 in 8-bit non-UTF-8 mode at offset 3 +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K +Failed: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) at offset 259 + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE + /-- End of testinput14 --/ diff --git a/testdata/testoutput17 b/testdata/testoutput17 index 32fb295..e1a20d9 100644 --- a/testdata/testoutput17 +++ b/testdata/testoutput17 @@ -506,4 +506,14 @@ Need char = \x{dd00} End ------------------------------------------------------------------ +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/K + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF + +/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K + XX + 0: XX +MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE + /-- End of testinput17 --/ |