diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-13 16:31:38 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-13 16:31:38 +0000 |
commit | fce480ed2031901b511711ff50ca67afe06080f0 (patch) | |
tree | 5e881c3c0cb46de8a3d83a84538cbc18f04e7f5b /pcre_compile.c | |
parent | 0cba29be59c8ca542c0982fa506d813970cdb84f (diff) | |
download | pcre-fce480ed2031901b511711ff50ca67afe06080f0.tar.gz |
Correctly supporting \x and \u in JavaScript compatibility mode
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@744 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_compile.c')
-rw-r--r-- | pcre_compile.c | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/pcre_compile.c b/pcre_compile.c index 3fc7c82..2687c0b 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -676,9 +676,39 @@ else case CHAR_l: case CHAR_L: + *errorcodeptr = ERR37; + break; + case CHAR_u: + if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) + { + /* In JavaScript, \u must be followed by four hexadecimal numbers. + Otherwise it is a lowercase u letter. */ + if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0 + && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0) + { + int i; + c = 0; + for (i = 0; i < 4; ++i) + { + register int cc = *(++ptr); +#ifndef EBCDIC /* ASCII/UTF-8 coding */ + if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ + c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); +#else /* EBCDIC coding */ + if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ + c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); +#endif + } + } + } + else + *errorcodeptr = ERR37; + break; + case CHAR_U: - *errorcodeptr = ERR37; + /* In JavaScript, \U is an uppercase U letter. */ + if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37; break; /* In a character class, \g is just a literal "g". Outside a character @@ -828,6 +858,29 @@ else treated as a data character. */ case CHAR_x: + if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) + { + /* In JavaScript, \x must be followed by two hexadecimal numbers. + Otherwise it is a lowercase x letter. */ + if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0) + { + int i; + c = 0; + for (i = 0; i < 2; ++i) + { + register int cc = *(++ptr); +#ifndef EBCDIC /* ASCII/UTF-8 coding */ + if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ + c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); +#else /* EBCDIC coding */ + if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ + c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); +#endif + } + } + break; + } + if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) { const uschar *pt = ptr + 2; |