summaryrefslogtreecommitdiff
path: root/pcre_compile.c
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-13 16:31:38 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-13 16:31:38 +0000
commitfce480ed2031901b511711ff50ca67afe06080f0 (patch)
tree5e881c3c0cb46de8a3d83a84538cbc18f04e7f5b /pcre_compile.c
parent0cba29be59c8ca542c0982fa506d813970cdb84f (diff)
downloadpcre-fce480ed2031901b511711ff50ca67afe06080f0.tar.gz
Correctly supporting \x and \u in JavaScript compatibility mode
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@744 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_compile.c')
-rw-r--r--pcre_compile.c55
1 files changed, 54 insertions, 1 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index 3fc7c82..2687c0b 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -676,9 +676,39 @@ else
case CHAR_l:
case CHAR_L:
+ *errorcodeptr = ERR37;
+ break;
+
case CHAR_u:
+ if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
+ {
+ /* In JavaScript, \u must be followed by four hexadecimal numbers.
+ Otherwise it is a lowercase u letter. */
+ if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
+ && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
+ {
+ int i;
+ c = 0;
+ for (i = 0; i < 4; ++i)
+ {
+ register int cc = *(++ptr);
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
+#else /* EBCDIC coding */
+ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
+#endif
+ }
+ }
+ }
+ else
+ *errorcodeptr = ERR37;
+ break;
+
case CHAR_U:
- *errorcodeptr = ERR37;
+ /* In JavaScript, \U is an uppercase U letter. */
+ if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37;
break;
/* In a character class, \g is just a literal "g". Outside a character
@@ -828,6 +858,29 @@ else
treated as a data character. */
case CHAR_x:
+ if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
+ {
+ /* In JavaScript, \x must be followed by two hexadecimal numbers.
+ Otherwise it is a lowercase x letter. */
+ if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
+ {
+ int i;
+ c = 0;
+ for (i = 0; i < 2; ++i)
+ {
+ register int cc = *(++ptr);
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
+#else /* EBCDIC coding */
+ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
+#endif
+ }
+ }
+ break;
+ }
+
if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
{
const uschar *pt = ptr + 2;