diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-21 12:05:36 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-21 12:05:36 +0000 |
commit | 130da81a5142a5cef80543ae5b64167229c23432 (patch) | |
tree | affe096c119e1bf0f17b0bc7be667fca876cc7b9 | |
parent | 8f0ed27a10ee3efb8b11a044637144fb8fc6641f (diff) | |
download | pcre-130da81a5142a5cef80543ae5b64167229c23432.tar.gz |
Disallow \N in character classes, for Perl compatibility.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@758 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | doc/pcrepattern.3 | 14 | ||||
-rw-r--r-- | pcre_compile.c | 6 | ||||
-rw-r--r-- | pcre_internal.h | 2 | ||||
-rw-r--r-- | pcreposix.c | 1 | ||||
-rw-r--r-- | testdata/testinput2 | 5 | ||||
-rw-r--r-- | testdata/testoutput2 | 6 |
7 files changed, 30 insertions, 7 deletions
@@ -52,6 +52,9 @@ Version 8.21 13. In non-UTF-8 mode, \C is now supported in lookbehinds and DFA matching. +14. Perl does not support \N without a following name in a [] class; PCRE now + also gives an error. + Version 8.20 21-Oct-2011 ------------------------ diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 index 18def50..100ae9e 100644 --- a/doc/pcrepattern.3 +++ b/doc/pcrepattern.3 @@ -328,12 +328,14 @@ Note that octal values of 100 or greater must not be introduced by a leading zero, because no more than three octal digits are ever read. .P All the sequences that define a single character value can be used both inside -and outside character classes. In addition, inside a character class, the -sequence \eb is interpreted as the backspace character (hex 08). The sequences -\eB, \eN, \eR, and \eX are not special inside a character class. Like any other -unrecognized escape sequences, they are treated as the literal characters "B", -"N", "R", and "X" by default, but cause an error if the PCRE_EXTRA option is -set. Outside a character class, these sequences have different meanings. +and outside character classes. In addition, inside a character class, \eb is +interpreted as the backspace character (hex 08). +.P +\eN is not allowed in a character class. \eB, \eR, and \eX are not special +inside a character class. Like other unrecognized escape sequences, they are +treated as the literal characters "B", "R", and "X" by default, but cause an +error if the PCRE_EXTRA option is set. Outside a character class, these +sequences have different meanings. . . .SS "Unsupported escape sequences" diff --git a/pcre_compile.c b/pcre_compile.c index 0252261..4a63e69 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -412,6 +412,7 @@ static const char error_texts[] = "\\k is not followed by a braced, angle-bracketed, or quoted name\0" /* 70 */ "internal error: unknown opcode in find_fixedlength()\0" + "\\N is not supported in a class\0" ; /* Table to identify digits and hex digits. This is used when compiling @@ -3770,6 +3771,11 @@ for (;; ptr++) if (*errorcodeptr != 0) goto FAILED; if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ + else if (-c == ESC_N) /* \N is not supported in a class */ + { + *errorcodeptr = ERR71; + goto FAILED; + } else if (-c == ESC_Q) /* Handle start of quoted string */ { if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) diff --git a/pcre_internal.h b/pcre_internal.h index 0c5d676..3655349 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1665,7 +1665,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, - ERR70, ERRCOUNT }; + ERR70, ERR71, ERRCOUNT }; /* The real format of the start of the pcre block; the index of names and the code vector run on as long as necessary after the end. We store an explicit diff --git a/pcreposix.c b/pcreposix.c index 648254b..b5f9d0e 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -155,6 +155,7 @@ static const int eint[] = { REG_BADPAT, /* \k is not followed by a braced, angle-bracketed, or quoted name */ /* 70 */ REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */ + REG_BADPAT, /* \N is not supported in a class */ }; /* Table of texts corresponding to POSIX error codes */ diff --git a/testdata/testinput2 b/testdata/testinput2 index b673fef..c7c5c16 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4009,4 +4009,9 @@ AbcdCBefgBhiBqz /(?<=ab\Cde)X/8 +/a[\NB]c/ + aNc + +/a[B-\Nc]/ + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 417225a..ba2f53c 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -12594,4 +12594,10 @@ No match /(?<=ab\Cde)X/8 Failed: \C not allowed in lookbehind assertion at offset 10 +/a[\NB]c/ +Failed: \N is not supported in a class at offset 3 + +/a[B-\Nc]/ +Failed: \N is not supported in a class at offset 5 + /-- End of testinput2 --/ |