diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2008-04-18 20:00:21 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2008-04-18 20:00:21 +0000 |
commit | 3bbe280000d68a6360bae77fdf03737822bf020d (patch) | |
tree | d4dfaa11abe2864170bce1d09733828913c6924f | |
parent | e91ca7c93fb58dbf6186de78f340729337043cd7 (diff) | |
download | pcre-3bbe280000d68a6360bae77fdf03737822bf020d.tar.gz |
Fix incorrect error for patterns like /(?2)[]a()b](abc)/
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@340 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | pcre_compile.c | 25 | ||||
-rw-r--r-- | testdata/testinput2 | 25 | ||||
-rw-r--r-- | testdata/testoutput2 | 36 |
4 files changed, 94 insertions, 1 deletions
@@ -62,6 +62,15 @@ Version 7.7 05-Mar-08 (a) A lone ] character is dis-allowed (Perl treats it as data). (b) A back reference to an unmatched subpattern matches an empty string (Perl fails the current match path). + +14. A pattern such as /(?2)[]a()b](abc)/ which had a forward reference to a + non-existent subpattern following a character class starting with ']' and + containing () gave an internal compiling error instead of "reference to + non-existent subpattern". Fortunately, when the pattern did exist, the + compiled code was correct. (When scanning forwards to check for the + existencd of the subpattern, it was treating the data ']' as terminating + the class, so got the count wrong. When actually compiling, the reference + was subsequently set up correctly.) Version 7.6 28-Jan-08 diff --git a/pcre_compile.c b/pcre_compile.c index 9b10356..81a76b2 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -1008,10 +1008,33 @@ for (; *ptr != 0; ptr++) continue; } - /* Skip over character classes */ + /* Skip over character classes; this logic must be similar to the way they + are handled for real. If the first character is '^', skip it. Also, if the + first few characters (either before or after ^) are \Q\E or \E we skip them + too. This makes for compatibility with Perl. */ if (*ptr == '[') { + BOOL negate_class = FALSE; + for (;;) + { + int c = *(++ptr); + if (c == '\\') + { + if (ptr[1] == 'E') ptr++; + else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3; + else break; + } + else if (!negate_class && c == '^') + negate_class = TRUE; + else break; + } + + /* If the next character is ']', it is a data character that must be + skipped. */ + + if (ptr[1] == ']') ptr++; + while (*(++ptr) != ']') { if (*ptr == 0) return -1; diff --git a/testdata/testinput2 b/testdata/testinput2 index 52d4ef8..6c29b39 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -2667,4 +2667,29 @@ a random value. /Ix /TA]/<JS> The ACTA] comes +/(?2)[]a()b](abc)/ + abcbabc + +/(?2)[^]a()b](abc)/ + abcbabc + +/(?1)[]a()b](abc)/ + abcbabc + ** Failers + abcXabc + +/(?1)[^]a()b](abc)/ + abcXabc + ** Failers + abcbabc + +/(?2)[]a()b](abc)(xyz)/ + xyzbabcxyz + +/(?&N)[]a(?<N>)](?<M>abc)/ + abc<abc + +/(?&N)[]a(?<N>)](abc)/ + abc<abc + / End of testinput2 / diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 783e383..a2b9b8b 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -9545,4 +9545,40 @@ No match /TA]/<JS> Failed: ] is an invalid data character in JavaScript compatibility mode at offset 2 +/(?2)[]a()b](abc)/ +Failed: reference to non-existent subpattern at offset 3 + +/(?2)[^]a()b](abc)/ +Failed: reference to non-existent subpattern at offset 3 + +/(?1)[]a()b](abc)/ + abcbabc + 0: abcbabc + 1: abc + ** Failers +No match + abcXabc +No match + +/(?1)[^]a()b](abc)/ + abcXabc + 0: abcXabc + 1: abc + ** Failers +No match + abcbabc +No match + +/(?2)[]a()b](abc)(xyz)/ + xyzbabcxyz + 0: xyzbabcxyz + 1: abc + 2: xyz + +/(?&N)[]a(?<N>)](?<M>abc)/ +Failed: reference to non-existent subpattern at offset 4 + +/(?&N)[]a(?<N>)](abc)/ +Failed: reference to non-existent subpattern at offset 4 + / End of testinput2 / |