summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-13 16:31:38 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-13 16:31:38 +0000
commitfce480ed2031901b511711ff50ca67afe06080f0 (patch)
tree5e881c3c0cb46de8a3d83a84538cbc18f04e7f5b
parent0cba29be59c8ca542c0982fa506d813970cdb84f (diff)
downloadpcre-fce480ed2031901b511711ff50ca67afe06080f0.tar.gz
Correctly supporting \x and \u in JavaScript compatibility mode
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@744 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog9
-rw-r--r--pcre_compile.c55
-rw-r--r--testdata/testinput234
-rw-r--r--testdata/testoutput252
4 files changed, 146 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 2abf3f8..97afcc3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -12,11 +12,14 @@ Version 8.21
3. Fix cache-flush issue on PowerPC (It is still an experimental JIT port).
PCRE_EXTRA_TABLES is not suported by JIT, and should be checked before
calling _pcre_jit_exec. Some extra comments are added.
-
-4. Mark settings inside atomic groups that do not contain any capturing
- parentheses, for example, (?>a(*:m)), were not being passed out. This bug
+
+4. Mark settings inside atomic groups that do not contain any capturing
+ parentheses, for example, (?>a(*:m)), were not being passed out. This bug
was introduced by change 18 for 8.20.
+5. Supporting of \x and \u in JavaScript compatibility mode based on the
+ ECMA-262 standard.
+
Version 8.20 21-Oct-2011
------------------------
diff --git a/pcre_compile.c b/pcre_compile.c
index 3fc7c82..2687c0b 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -676,9 +676,39 @@ else
case CHAR_l:
case CHAR_L:
+ *errorcodeptr = ERR37;
+ break;
+
case CHAR_u:
+ if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
+ {
+ /* In JavaScript, \u must be followed by four hexadecimal numbers.
+ Otherwise it is a lowercase u letter. */
+ if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
+ && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
+ {
+ int i;
+ c = 0;
+ for (i = 0; i < 4; ++i)
+ {
+ register int cc = *(++ptr);
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
+#else /* EBCDIC coding */
+ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
+#endif
+ }
+ }
+ }
+ else
+ *errorcodeptr = ERR37;
+ break;
+
case CHAR_U:
- *errorcodeptr = ERR37;
+ /* In JavaScript, \U is an uppercase U letter. */
+ if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
break;
/* In a character class, \g is just a literal "g". Outside a character
@@ -828,6 +858,29 @@ else
treated as a data character. */
case CHAR_x:
+ if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
+ {
+ /* In JavaScript, \x must be followed by two hexadecimal numbers.
+ Otherwise it is a lowercase x letter. */
+ if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
+ {
+ int i;
+ c = 0;
+ for (i = 0; i < 2; ++i)
+ {
+ register int cc = *(++ptr);
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
+#else /* EBCDIC coding */
+ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
+#endif
+ }
+ }
+ break;
+ }
+
if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
{
const uschar *pt = ptr + 2;
diff --git a/testdata/testinput2 b/testdata/testinput2
index b101097..ae822de 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3969,4 +3969,38 @@ AbcdCBefgBhiBqz
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
\Maabbccddee
+/^a\x41z/<JS>
+ aAz
+ *** Failers
+ ax41z
+
+/^a[m\x41]z/<JS>
+ aAz
+
+/^a\x1z/<JS>
+ ax1z
+
+/^a\X41z/<JS>
+ aX41z
+ *** Failers
+ aAz
+
+/^a\u0041z/<JS>
+ aAz
+ *** Failers
+ au0041z
+
+/^a[m\u0041]z/<JS>
+ aAz
+
+/^a\u041z/<JS>
+ au041z
+ *** Failers
+ aAz
+
+/^a\U0041z/<JS>
+ aU0041z
+ *** Failers
+ aAz
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index f3ea88a..17e28bb 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -12502,4 +12502,56 @@ Minimum match() recursion limit = 13
2: cc
3: ee
+/^a\x41z/<JS>
+ aAz
+ 0: aAz
+ *** Failers
+No match
+ ax41z
+No match
+
+/^a[m\x41]z/<JS>
+ aAz
+ 0: aAz
+
+/^a\x1z/<JS>
+ ax1z
+ 0: ax1z
+
+/^a\X41z/<JS>
+ aX41z
+ 0: aX41z
+ *** Failers
+No match
+ aAz
+No match
+
+/^a\u0041z/<JS>
+ aAz
+ 0: aAz
+ *** Failers
+No match
+ au0041z
+No match
+
+/^a[m\u0041]z/<JS>
+ aAz
+ 0: aAz
+
+/^a\u041z/<JS>
+ au041z
+ 0: au041z
+ *** Failers
+No match
+ aAz
+No match
+
+/^a\U0041z/<JS>
+ aU0041z
+ 0: aU0041z
+ *** Failers
+No match
+ aAz
+No match
+
/-- End of testinput2 --/