summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-21 12:05:36 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-21 12:05:36 +0000
commit130da81a5142a5cef80543ae5b64167229c23432 (patch)
treeaffe096c119e1bf0f17b0bc7be667fca876cc7b9
parent8f0ed27a10ee3efb8b11a044637144fb8fc6641f (diff)
downloadpcre-130da81a5142a5cef80543ae5b64167229c23432.tar.gz
Disallow \N in character classes, for Perl compatibility.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@758 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rw-r--r--doc/pcrepattern.314
-rw-r--r--pcre_compile.c6
-rw-r--r--pcre_internal.h2
-rw-r--r--pcreposix.c1
-rw-r--r--testdata/testinput25
-rw-r--r--testdata/testoutput26
7 files changed, 30 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index 1cc0d94..d9aed78 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -52,6 +52,9 @@ Version 8.21
13. In non-UTF-8 mode, \C is now supported in lookbehinds and DFA matching.
+14. Perl does not support \N without a following name in a [] class; PCRE now
+ also gives an error.
+
Version 8.20 21-Oct-2011
------------------------
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index 18def50..100ae9e 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -328,12 +328,14 @@ Note that octal values of 100 or greater must not be introduced by a leading
zero, because no more than three octal digits are ever read.
.P
All the sequences that define a single character value can be used both inside
-and outside character classes. In addition, inside a character class, the
-sequence \eb is interpreted as the backspace character (hex 08). The sequences
-\eB, \eN, \eR, and \eX are not special inside a character class. Like any other
-unrecognized escape sequences, they are treated as the literal characters "B",
-"N", "R", and "X" by default, but cause an error if the PCRE_EXTRA option is
-set. Outside a character class, these sequences have different meanings.
+and outside character classes. In addition, inside a character class, \eb is
+interpreted as the backspace character (hex 08).
+.P
+\eN is not allowed in a character class. \eB, \eR, and \eX are not special
+inside a character class. Like other unrecognized escape sequences, they are
+treated as the literal characters "B", "R", and "X" by default, but cause an
+error if the PCRE_EXTRA option is set. Outside a character class, these
+sequences have different meanings.
.
.
.SS "Unsupported escape sequences"
diff --git a/pcre_compile.c b/pcre_compile.c
index 0252261..4a63e69 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -412,6 +412,7 @@ static const char error_texts[] =
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
/* 70 */
"internal error: unknown opcode in find_fixedlength()\0"
+ "\\N is not supported in a class\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -3770,6 +3771,11 @@ for (;; ptr++)
if (*errorcodeptr != 0) goto FAILED;
if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
+ else if (-c == ESC_N) /* \N is not supported in a class */
+ {
+ *errorcodeptr = ERR71;
+ goto FAILED;
+ }
else if (-c == ESC_Q) /* Handle start of quoted string */
{
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
diff --git a/pcre_internal.h b/pcre_internal.h
index 0c5d676..3655349 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1665,7 +1665,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERRCOUNT };
+ ERR70, ERR71, ERRCOUNT };
/* The real format of the start of the pcre block; the index of names and the
code vector run on as long as necessary after the end. We store an explicit
diff --git a/pcreposix.c b/pcreposix.c
index 648254b..b5f9d0e 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -155,6 +155,7 @@ static const int eint[] = {
REG_BADPAT, /* \k is not followed by a braced, angle-bracketed, or quoted name */
/* 70 */
REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */
+ REG_BADPAT, /* \N is not supported in a class */
};
/* Table of texts corresponding to POSIX error codes */
diff --git a/testdata/testinput2 b/testdata/testinput2
index b673fef..c7c5c16 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4009,4 +4009,9 @@ AbcdCBefgBhiBqz
/(?<=ab\Cde)X/8
+/a[\NB]c/
+ aNc
+
+/a[B-\Nc]/
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 417225a..ba2f53c 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -12594,4 +12594,10 @@ No match
/(?<=ab\Cde)X/8
Failed: \C not allowed in lookbehind assertion at offset 10
+/a[\NB]c/
+Failed: \N is not supported in a class at offset 3
+
+/a[B-\Nc]/
+Failed: \N is not supported in a class at offset 5
+
/-- End of testinput2 --/