summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-06-17 16:55:07 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-06-17 16:55:07 +0000
commit9a61463ee5099a08610fb08ada15b3c8fc3e0ad5 (patch)
tree66d801a879dcf0a81299d5961d3e923368b8c021
parent4b5e82c43d6529a00a98be0351493d3f56d5e4eb (diff)
downloadpcre-9a61463ee5099a08610fb08ada15b3c8fc3e0ad5.tar.gz
Apply character value checks to \u.... in JavaScript mode, for compatibility
with \x{....} in non-JavaScript mode. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@978 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog5
-rw-r--r--doc/pcreapi.33
-rw-r--r--doc/pcrepattern.34
-rw-r--r--pcre_compile.c13
-rw-r--r--pcre_internal.h2
-rw-r--r--pcreposix.c3
-rw-r--r--testdata/testinput144
-rw-r--r--testdata/testinput176
-rw-r--r--testdata/testinput56
-rw-r--r--testdata/testoutput146
-rw-r--r--testdata/testoutput1724
-rw-r--r--testdata/testoutput519
12 files changed, 91 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 93905df..a29548d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -132,6 +132,11 @@ Version 8.31 02-June-2012
37. Optimizing single character iterators in JIT.
+38. Wide characters specified with \uxxxx in JavaScript mode are now subject to
+ the same checks as \x{...} characters in non-JavaScript mode. Specifically,
+ codepoints that are too big for the mode are faulted, and in a UTF mode,
+ disallowed codepoints are also faulted.
+
Version 8.30 04-February-2012
-----------------------------
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index f67a241..633f311 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -927,6 +927,7 @@ fallen out of use. To avoid confusion, they have not been re-used.
73 disallowed Unicode code point (>= 0xd800 && <= 0xdfff)
74 invalid UTF-16 string (specifically UTF-16)
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+ 76 character value in \eu.... sequence is too large
.sp
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
be used if the limits were changed when PCRE was built.
@@ -2666,6 +2667,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 04 May 2012
+Last updated: 17 June 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index 220d7cf..0d19f6d 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -277,6 +277,8 @@ as just described only when it is followed by two hexadecimal digits.
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
code points greater than 256 is provided by \eu, which must be followed by
four hexadecimal digits; otherwise it matches a literal "u" character.
+Character codes specified by \eu in JavaScript mode are constrained in the same
+was as those specified by \ex in non-JavaScript mode.
.P
Characters whose value is less than 256 can be defined by either of the two
syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the
@@ -2911,6 +2913,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 01 June 2012
+Last updated: 17 June 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi
diff --git a/pcre_compile.c b/pcre_compile.c
index 1d2c003..30c2171 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -491,6 +491,7 @@ static const char error_texts[] =
"invalid UTF-16 string\0"
/* 75 */
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
+ "character value in \\u.... sequence is too large\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -831,6 +832,18 @@ else
c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif
}
+
+#ifdef COMPILE_PCRE8
+ if (c > (utf ? 0x10ffff : 0xff))
+#else
+#ifdef COMPILE_PCRE16
+ if (c > (utf ? 0x10ffff : 0xffff))
+#endif
+#endif
+ {
+ *errorcodeptr = ERR76;
+ }
+ else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
}
}
else
diff --git a/pcre_internal.h b/pcre_internal.h
index 181c312..b8f40ec 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1945,7 +1945,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERRCOUNT };
+ ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
diff --git a/pcreposix.c b/pcreposix.c
index 6c1ca25..b148f08 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -160,7 +160,8 @@ static const int eint[] = {
REG_BADPAT, /* disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) */
REG_BADPAT, /* invalid UTF-16 string (should not occur) */
/* 75 */
- REG_BADPAT /* overlong MARK name */
+ REG_BADPAT, /* overlong MARK name */
+ REG_BADPAT /* character value in \u.... sequence is too large */
};
/* Table of texts corresponding to POSIX error codes */
diff --git a/testdata/testinput14 b/testdata/testinput14
index 5564276..689f168 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -320,4 +320,8 @@ not matter. --/
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
XX
+/\u0100/<JS>
+
+/[\u0100-\u0200]/<JS>
+
/-- End of testinput14 --/
diff --git a/testdata/testinput17 b/testdata/testinput17
index 691a49f..154846e 100644
--- a/testdata/testinput17
+++ b/testdata/testinput17
@@ -286,4 +286,10 @@
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/K
XX
+/\u0100/<JS>BZ
+
+/[\u0100-\u0200]/<JS>BZ
+
+/\ud800/<JS>BZ
+
/-- End of testinput17 --/
diff --git a/testdata/testinput5 b/testdata/testinput5
index 008e660..642749c 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -763,4 +763,10 @@
/(?<!^)ETA/8
ETA
+/\u0100/<JS>8BZ
+
+/[\u0100-\u0200]/<JS>8BZ
+
+/\ud800/<JS>8
+
/-- End of testinput5 --/
diff --git a/testdata/testoutput14 b/testdata/testoutput14
index 6133b6e..d5b456d 100644
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@@ -461,4 +461,10 @@ Failed: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) at offset 259
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
+/\u0100/<JS>
+Failed: character value in \u.... sequence is too large at offset 5
+
+/[\u0100-\u0200]/<JS>
+Failed: character value in \u.... sequence is too large at offset 6
+
/-- End of testinput14 --/
diff --git a/testdata/testoutput17 b/testdata/testoutput17
index e1a20d9..950882c 100644
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
@@ -516,4 +516,28 @@ MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789AB
0: XX
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
+/\u0100/<JS>BZ
+------------------------------------------------------------------
+ Bra
+ \x{100}
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\u0100-\u0200]/<JS>BZ
+------------------------------------------------------------------
+ Bra
+ [\x{100}-\x{200}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/\ud800/<JS>BZ
+------------------------------------------------------------------
+ Bra
+ \x{d800}
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput17 --/
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index dfabd5a..c4f9a0c 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -1827,4 +1827,23 @@ Max lookbehind = 2
ETA
No match
+/\u0100/<JS>8BZ
+------------------------------------------------------------------
+ Bra
+ \x{100}
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\u0100-\u0200]/<JS>8BZ
+------------------------------------------------------------------
+ Bra
+ [\x{100}-\x{200}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/\ud800/<JS>8
+Failed: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) at offset 5
+
/-- End of testinput5 --/