summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2008-04-18 20:00:21 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2008-04-18 20:00:21 +0000
commit3bbe280000d68a6360bae77fdf03737822bf020d (patch)
treed4dfaa11abe2864170bce1d09733828913c6924f
parente91ca7c93fb58dbf6186de78f340729337043cd7 (diff)
downloadpcre-3bbe280000d68a6360bae77fdf03737822bf020d.tar.gz
Fix incorrect error for patterns like /(?2)[]a()b](abc)/
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@340 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog9
-rw-r--r--pcre_compile.c25
-rw-r--r--testdata/testinput225
-rw-r--r--testdata/testoutput236
4 files changed, 94 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 8199744..5b55099 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -62,6 +62,15 @@ Version 7.7 05-Mar-08
(a) A lone ] character is dis-allowed (Perl treats it as data).
(b) A back reference to an unmatched subpattern matches an empty string
(Perl fails the current match path).
+
+14. A pattern such as /(?2)[]a()b](abc)/ which had a forward reference to a
+ non-existent subpattern following a character class starting with ']' and
+ containing () gave an internal compiling error instead of "reference to
+ non-existent subpattern". Fortunately, when the pattern did exist, the
+ compiled code was correct. (When scanning forwards to check for the
+ existencd of the subpattern, it was treating the data ']' as terminating
+ the class, so got the count wrong. When actually compiling, the reference
+ was subsequently set up correctly.)
Version 7.6 28-Jan-08
diff --git a/pcre_compile.c b/pcre_compile.c
index 9b10356..81a76b2 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1008,10 +1008,33 @@ for (; *ptr != 0; ptr++)
continue;
}
- /* Skip over character classes */
+ /* Skip over character classes; this logic must be similar to the way they
+ are handled for real. If the first character is '^', skip it. Also, if the
+ first few characters (either before or after ^) are \Q\E or \E we skip them
+ too. This makes for compatibility with Perl. */
if (*ptr == '[')
{
+ BOOL negate_class = FALSE;
+ for (;;)
+ {
+ int c = *(++ptr);
+ if (c == '\\')
+ {
+ if (ptr[1] == 'E') ptr++;
+ else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
+ else break;
+ }
+ else if (!negate_class && c == '^')
+ negate_class = TRUE;
+ else break;
+ }
+
+ /* If the next character is ']', it is a data character that must be
+ skipped. */
+
+ if (ptr[1] == ']') ptr++;
+
while (*(++ptr) != ']')
{
if (*ptr == 0) return -1;
diff --git a/testdata/testinput2 b/testdata/testinput2
index 52d4ef8..6c29b39 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2667,4 +2667,29 @@ a random value. /Ix
/TA]/<JS>
The ACTA] comes
+/(?2)[]a()b](abc)/
+ abcbabc
+
+/(?2)[^]a()b](abc)/
+ abcbabc
+
+/(?1)[]a()b](abc)/
+ abcbabc
+ ** Failers
+ abcXabc
+
+/(?1)[^]a()b](abc)/
+ abcXabc
+ ** Failers
+ abcbabc
+
+/(?2)[]a()b](abc)(xyz)/
+ xyzbabcxyz
+
+/(?&N)[]a(?<N>)](?<M>abc)/
+ abc<abc
+
+/(?&N)[]a(?<N>)](abc)/
+ abc<abc
+
/ End of testinput2 /
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 783e383..a2b9b8b 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -9545,4 +9545,40 @@ No match
/TA]/<JS>
Failed: ] is an invalid data character in JavaScript compatibility mode at offset 2
+/(?2)[]a()b](abc)/
+Failed: reference to non-existent subpattern at offset 3
+
+/(?2)[^]a()b](abc)/
+Failed: reference to non-existent subpattern at offset 3
+
+/(?1)[]a()b](abc)/
+ abcbabc
+ 0: abcbabc
+ 1: abc
+ ** Failers
+No match
+ abcXabc
+No match
+
+/(?1)[^]a()b](abc)/
+ abcXabc
+ 0: abcXabc
+ 1: abc
+ ** Failers
+No match
+ abcbabc
+No match
+
+/(?2)[]a()b](abc)(xyz)/
+ xyzbabcxyz
+ 0: xyzbabcxyz
+ 1: abc
+ 2: xyz
+
+/(?&N)[]a(?<N>)](?<M>abc)/
+Failed: reference to non-existent subpattern at offset 4
+
+/(?&N)[]a(?<N>)](abc)/
+Failed: reference to non-existent subpattern at offset 4
+
/ End of testinput2 /