summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-11-16 19:55:16 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-11-16 19:55:16 +0000
commite94b4c5ecbd32d8d425fae1bb82c41dfd9ce5cee (patch)
tree5b834a6d97ef822cef1378f25955ae37983a4bf5
parent0d089a4646853f3c4d3fa08ccbb555684a9ccd0c (diff)
downloadpcre-e94b4c5ecbd32d8d425fae1bb82c41dfd9ce5cee.tar.gz
Diagnose conditional numerical reference to a non-existent subpattern.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@270 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog6
-rw-r--r--pcre_compile.c21
-rw-r--r--pcre_internal.h3
-rw-r--r--testdata/testinput15
-rw-r--r--testdata/testinput224
-rw-r--r--testdata/testoutput16
-rw-r--r--testdata/testoutput249
7 files changed, 54 insertions, 60 deletions
diff --git a/ChangeLog b/ChangeLog
index 7e3fbf9..bb67f68 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -26,6 +26,12 @@ Version 7.5 12-Nov-07
6. The erroneous patterns (?+-a) and (?-+a) give different error messages;
this is right because (?- can be followed by option settings as well as by
digits. I have, however, made the messages clearer.
+
+7. Patterns such as (?(1)a|b) (a pattern that contains fewer subpatterns
+ than the number used in the conditional) now cause a compile-time error.
+ This is actually not compatible with Perl, which accepts such patterns, but
+ treats the conditional as always being FALSE (as PCRE used to), but it
+ seems to me that giving a diagnostic is better.
Version 7.4 21-Sep-07
diff --git a/pcre_compile.c b/pcre_compile.c
index 0d579db..21b1a8b 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4248,16 +4248,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
*errorcodeptr = ERR58;
goto FAILED;
}
- if (refsign == '-')
+ recno = (refsign == '-')?
+ cd->bracount - recno + 1 : recno +cd->bracount;
+ if (recno <= 0 || recno > cd->final_bracount)
{
- recno = cd->bracount - recno + 1;
- if (recno <= 0)
- {
- *errorcodeptr = ERR15;
- goto FAILED;
- }
+ *errorcodeptr = ERR15;
+ goto FAILED;
}
- else recno += cd->bracount;
PUT2(code, 2+LINK_SIZE, recno);
break;
}
@@ -4329,9 +4326,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
skipbytes = 1;
}
- /* Check for the "name" actually being a subpattern number. */
+ /* Check for the "name" actually being a subpattern number. We are
+ in the second pass here, so final_bracount is set. */
- else if (recno > 0)
+ else if (recno > 0 && recno <= cd->final_bracount)
{
PUT2(code, 2+LINK_SIZE, recno);
}
@@ -5939,7 +5937,7 @@ to compile parts of the pattern into; the compiled code is discarded when it is
no longer needed, so hopefully this workspace will never overflow, though there
is a test for its doing so. */
-cd->bracount = 0;
+cd->bracount = cd->final_bracount = 0;
cd->names_found = 0;
cd->name_entry_size = 0;
cd->name_table = NULL;
@@ -6016,6 +6014,7 @@ field. Reset the bracket count and the names_found field. Also reset the hwm
field; this time it's used for remembering forward references to subpatterns.
*/
+cd->final_bracount = cd->bracount; /* Save for checking forward references */
cd->bracount = 0;
cd->names_found = 0;
cd->name_table = (uschar *)re + re->name_table_offset;
diff --git a/pcre_internal.h b/pcre_internal.h
index 7ffae2f..c72af6e 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -934,7 +934,8 @@ typedef struct compile_data {
uschar *name_table; /* The name/number table */
int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */
- int bracount; /* Count of capturing parens */
+ int bracount; /* Count of capturing parens as we compile */
+ int final_bracount; /* Saved value after first pass */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
int external_options; /* External (initial) options */
diff --git a/testdata/testinput1 b/testdata/testinput1
index 79c98fa..c7f264a 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -3421,11 +3421,6 @@
/((?m)^b)/
a\nb\nc\n
-/(?(1)a|b)/
-
-/(?(1)b|a)/
- a
-
/(x)?(?(1)a|b)/
*** Failers
a
diff --git a/testdata/testinput2 b/testdata/testinput2
index 2a180e3..da3b01e 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -398,8 +398,6 @@
/(?(1?)a|b)/
-/(?(1)a|b|c)/
-
/[a[:xyz:/
/(?<=x+)y/
@@ -568,15 +566,15 @@
/ab\d+/I
-/a(?(1)b)/I
+/a(?(1)b)(.)/I
-/a(?(1)bag|big)/I
+/a(?(1)bag|big)(.)/I
-/a(?(1)bag|big)*/I
+/a(?(1)bag|big)*(.)/I
-/a(?(1)bag|big)+/I
+/a(?(1)bag|big)+(.)/I
-/a(?(1)b..|b..)/I
+/a(?(1)b..|b..)(.)/I
/ab\d{0}e/I
@@ -977,13 +975,13 @@
/()a/I
-/(?(1)ab|ac)/I
+/(?(1)ab|ac)(.)/I
-/(?(1)abz|acz)/I
+/(?(1)abz|acz)(.)/I
-/(?(1)abz)/I
+/(?(1)abz)(.)/I
-/(?(1)abz)123/I
+/(?(1)abz)(1)23/I
/(a)+/I
@@ -2190,8 +2188,8 @@ a random value. /Ix
/((?(-2)a))/BZ
-/^(?(+1)X|Y)/BZ
- Y
+/^(?(+1)X|Y)(.)/BZ
+ Y!
/(foo)\Kbar/
foobar
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 4c0e680..83682f7 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -5551,12 +5551,6 @@ No match
0: b
1: b
-/(?(1)a|b)/
-
-/(?(1)b|a)/
- a
- 0: a
-
/(x)?(?(1)a|b)/
*** Failers
No match
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 85d931a..cb69551 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -963,9 +963,6 @@ Failed: unrecognized character after (? or (?- at offset 3
/(?(1?)a|b)/
Failed: malformed number or name after (?( at offset 4
-/(?(1)a|b|c)/
-Failed: conditional group contains more than two branches at offset 10
-
/[a[:xyz:/
Failed: missing terminating ] for character class at offset 8
@@ -1599,32 +1596,32 @@ No options
First char = 'a'
Need char = 'b'
-/a(?(1)b)/I
-Capturing subpattern count = 0
+/a(?(1)b)(.)/I
+Capturing subpattern count = 1
No options
First char = 'a'
No need char
-/a(?(1)bag|big)/I
-Capturing subpattern count = 0
+/a(?(1)bag|big)(.)/I
+Capturing subpattern count = 1
No options
First char = 'a'
Need char = 'g'
-/a(?(1)bag|big)*/I
-Capturing subpattern count = 0
+/a(?(1)bag|big)*(.)/I
+Capturing subpattern count = 1
No options
First char = 'a'
No need char
-/a(?(1)bag|big)+/I
-Capturing subpattern count = 0
+/a(?(1)bag|big)+(.)/I
+Capturing subpattern count = 1
No options
First char = 'a'
Need char = 'g'
-/a(?(1)b..|b..)/I
-Capturing subpattern count = 0
+/a(?(1)b..|b..)(.)/I
+Capturing subpattern count = 1
No options
First char = 'a'
Need char = 'b'
@@ -3441,26 +3438,26 @@ No options
No first char
Need char = 'a'
-/(?(1)ab|ac)/I
-Capturing subpattern count = 0
+/(?(1)ab|ac)(.)/I
+Capturing subpattern count = 1
No options
First char = 'a'
No need char
-/(?(1)abz|acz)/I
-Capturing subpattern count = 0
+/(?(1)abz|acz)(.)/I
+Capturing subpattern count = 1
No options
First char = 'a'
Need char = 'z'
-/(?(1)abz)/I
-Capturing subpattern count = 0
+/(?(1)abz)(.)/I
+Capturing subpattern count = 1
No options
No first char
No need char
-/(?(1)abz)123/I
-Capturing subpattern count = 0
+/(?(1)abz)(1)23/I
+Capturing subpattern count = 1
No options
No first char
Need char = '3'
@@ -8308,7 +8305,7 @@ Failed: reference to non-existent subpattern at offset 6
/((?(-2)a))/BZ
Failed: reference to non-existent subpattern at offset 7
-/^(?(+1)X|Y)/BZ
+/^(?(+1)X|Y)(.)/BZ
------------------------------------------------------------------
Bra
^
@@ -8318,11 +8315,15 @@ Failed: reference to non-existent subpattern at offset 7
Alt
Y
Ket
+ CBra 1
+ Any
+ Ket
Ket
End
------------------------------------------------------------------
- Y
- 0: Y
+ Y!
+ 0: Y!
+ 1: !
/(foo)\Kbar/
foobar