diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-13 16:31:38 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-13 16:31:38 +0000 |
commit | fce480ed2031901b511711ff50ca67afe06080f0 (patch) | |
tree | 5e881c3c0cb46de8a3d83a84538cbc18f04e7f5b | |
parent | 0cba29be59c8ca542c0982fa506d813970cdb84f (diff) | |
download | pcre-fce480ed2031901b511711ff50ca67afe06080f0.tar.gz |
Correctly supporting \x and \u in JavaScript compatibility mode
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@744 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | pcre_compile.c | 55 | ||||
-rw-r--r-- | testdata/testinput2 | 34 | ||||
-rw-r--r-- | testdata/testoutput2 | 52 |
4 files changed, 146 insertions, 4 deletions
@@ -12,11 +12,14 @@ Version 8.21 3. Fix cache-flush issue on PowerPC (It is still an experimental JIT port). PCRE_EXTRA_TABLES is not suported by JIT, and should be checked before calling _pcre_jit_exec. Some extra comments are added. - -4. Mark settings inside atomic groups that do not contain any capturing - parentheses, for example, (?>a(*:m)), were not being passed out. This bug + +4. Mark settings inside atomic groups that do not contain any capturing + parentheses, for example, (?>a(*:m)), were not being passed out. This bug was introduced by change 18 for 8.20. +5. Supporting of \x and \u in JavaScript compatibility mode based on the + ECMA-262 standard. + Version 8.20 21-Oct-2011 ------------------------ diff --git a/pcre_compile.c b/pcre_compile.c index 3fc7c82..2687c0b 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -676,9 +676,39 @@ else case CHAR_l: case CHAR_L: + *errorcodeptr = ERR37; + break; + case CHAR_u: + if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) + { + /* In JavaScript, \u must be followed by four hexadecimal numbers. + Otherwise it is a lowercase u letter. */ + if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0 + && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0) + { + int i; + c = 0; + for (i = 0; i < 4; ++i) + { + register int cc = *(++ptr); +#ifndef EBCDIC /* ASCII/UTF-8 coding */ + if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ + c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); +#else /* EBCDIC coding */ + if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ + c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); +#endif + } + } + } + else + *errorcodeptr = ERR37; + break; + case CHAR_U: - *errorcodeptr = ERR37; + /* In JavaScript, \U is an uppercase U letter. */ + if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37; break; /* In a character class, \g is just a literal "g". Outside a character @@ -828,6 +858,29 @@ else treated as a data character. */ case CHAR_x: + if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) + { + /* In JavaScript, \x must be followed by two hexadecimal numbers. + Otherwise it is a lowercase x letter. */ + if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0) + { + int i; + c = 0; + for (i = 0; i < 2; ++i) + { + register int cc = *(++ptr); +#ifndef EBCDIC /* ASCII/UTF-8 coding */ + if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ + c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); +#else /* EBCDIC coding */ + if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ + c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); +#endif + } + } + break; + } + if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) { const uschar *pt = ptr + 2; diff --git a/testdata/testinput2 b/testdata/testinput2 index b101097..ae822de 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -3969,4 +3969,38 @@ AbcdCBefgBhiBqz /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ \Maabbccddee +/^a\x41z/<JS> + aAz + *** Failers + ax41z + +/^a[m\x41]z/<JS> + aAz + +/^a\x1z/<JS> + ax1z + +/^a\X41z/<JS> + aX41z + *** Failers + aAz + +/^a\u0041z/<JS> + aAz + *** Failers + au0041z + +/^a[m\u0041]z/<JS> + aAz + +/^a\u041z/<JS> + au041z + *** Failers + aAz + +/^a\U0041z/<JS> + aU0041z + *** Failers + aAz + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index f3ea88a..17e28bb 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -12502,4 +12502,56 @@ Minimum match() recursion limit = 13 2: cc 3: ee +/^a\x41z/<JS> + aAz + 0: aAz + *** Failers +No match + ax41z +No match + +/^a[m\x41]z/<JS> + aAz + 0: aAz + +/^a\x1z/<JS> + ax1z + 0: ax1z + +/^a\X41z/<JS> + aX41z + 0: aX41z + *** Failers +No match + aAz +No match + +/^a\u0041z/<JS> + aAz + 0: aAz + *** Failers +No match + au0041z +No match + +/^a[m\u0041]z/<JS> + aAz + 0: aAz + +/^a\u041z/<JS> + au041z + 0: au041z + *** Failers +No match + aAz +No match + +/^a\U0041z/<JS> + aU0041z + 0: aU0041z + *** Failers +No match + aAz +No match + /-- End of testinput2 --/ |