From e94b4c5ecbd32d8d425fae1bb82c41dfd9ce5cee Mon Sep 17 00:00:00 2001 From: ph10 Date: Fri, 16 Nov 2007 19:55:16 +0000 Subject: Diagnose conditional numerical reference to a non-existent subpattern. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@270 2f5784b3-3f2a-0410-8824-cb99058d5e15 --- ChangeLog | 6 ++++++ pcre_compile.c | 21 ++++++++++----------- pcre_internal.h | 3 ++- testdata/testinput1 | 5 ----- testdata/testinput2 | 24 +++++++++++------------- testdata/testoutput1 | 6 ------ testdata/testoutput2 | 49 +++++++++++++++++++++++++------------------------ 7 files changed, 54 insertions(+), 60 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7e3fbf9..bb67f68 100644 --- a/ChangeLog +++ b/ChangeLog @@ -26,6 +26,12 @@ Version 7.5 12-Nov-07 6. The erroneous patterns (?+-a) and (?-+a) give different error messages; this is right because (?- can be followed by option settings as well as by digits. I have, however, made the messages clearer. + +7. Patterns such as (?(1)a|b) (a pattern that contains fewer subpatterns + than the number used in the conditional) now cause a compile-time error. + This is actually not compatible with Perl, which accepts such patterns, but + treats the conditional as always being FALSE (as PCRE used to), but it + seems to me that giving a diagnostic is better. Version 7.4 21-Sep-07 diff --git a/pcre_compile.c b/pcre_compile.c index 0d579db..21b1a8b 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -4248,16 +4248,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ *errorcodeptr = ERR58; goto FAILED; } - if (refsign == '-') + recno = (refsign == '-')? + cd->bracount - recno + 1 : recno +cd->bracount; + if (recno <= 0 || recno > cd->final_bracount) { - recno = cd->bracount - recno + 1; - if (recno <= 0) - { - *errorcodeptr = ERR15; - goto FAILED; - } + *errorcodeptr = ERR15; + goto FAILED; } - else recno += cd->bracount; PUT2(code, 2+LINK_SIZE, recno); break; } @@ -4329,9 +4326,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ skipbytes = 1; } - /* Check for the "name" actually being a subpattern number. */ + /* Check for the "name" actually being a subpattern number. We are + in the second pass here, so final_bracount is set. */ - else if (recno > 0) + else if (recno > 0 && recno <= cd->final_bracount) { PUT2(code, 2+LINK_SIZE, recno); } @@ -5939,7 +5937,7 @@ to compile parts of the pattern into; the compiled code is discarded when it is no longer needed, so hopefully this workspace will never overflow, though there is a test for its doing so. */ -cd->bracount = 0; +cd->bracount = cd->final_bracount = 0; cd->names_found = 0; cd->name_entry_size = 0; cd->name_table = NULL; @@ -6016,6 +6014,7 @@ field. Reset the bracket count and the names_found field. Also reset the hwm field; this time it's used for remembering forward references to subpatterns. */ +cd->final_bracount = cd->bracount; /* Save for checking forward references */ cd->bracount = 0; cd->names_found = 0; cd->name_table = (uschar *)re + re->name_table_offset; diff --git a/pcre_internal.h b/pcre_internal.h index 7ffae2f..c72af6e 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -934,7 +934,8 @@ typedef struct compile_data { uschar *name_table; /* The name/number table */ int names_found; /* Number of entries so far */ int name_entry_size; /* Size of each entry */ - int bracount; /* Count of capturing parens */ + int bracount; /* Count of capturing parens as we compile */ + int final_bracount; /* Saved value after first pass */ int top_backref; /* Maximum back reference */ unsigned int backref_map; /* Bitmap of low back refs */ int external_options; /* External (initial) options */ diff --git a/testdata/testinput1 b/testdata/testinput1 index 79c98fa..c7f264a 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -3421,11 +3421,6 @@ /((?m)^b)/ a\nb\nc\n -/(?(1)a|b)/ - -/(?(1)b|a)/ - a - /(x)?(?(1)a|b)/ *** Failers a diff --git a/testdata/testinput2 b/testdata/testinput2 index 2a180e3..da3b01e 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -398,8 +398,6 @@ /(?(1?)a|b)/ -/(?(1)a|b|c)/ - /[a[:xyz:/ /(?<=x+)y/ @@ -568,15 +566,15 @@ /ab\d+/I -/a(?(1)b)/I +/a(?(1)b)(.)/I -/a(?(1)bag|big)/I +/a(?(1)bag|big)(.)/I -/a(?(1)bag|big)*/I +/a(?(1)bag|big)*(.)/I -/a(?(1)bag|big)+/I +/a(?(1)bag|big)+(.)/I -/a(?(1)b..|b..)/I +/a(?(1)b..|b..)(.)/I /ab\d{0}e/I @@ -977,13 +975,13 @@ /()a/I -/(?(1)ab|ac)/I +/(?(1)ab|ac)(.)/I -/(?(1)abz|acz)/I +/(?(1)abz|acz)(.)/I -/(?(1)abz)/I +/(?(1)abz)(.)/I -/(?(1)abz)123/I +/(?(1)abz)(1)23/I /(a)+/I @@ -2190,8 +2188,8 @@ a random value. /Ix /((?(-2)a))/BZ -/^(?(+1)X|Y)/BZ - Y +/^(?(+1)X|Y)(.)/BZ + Y! /(foo)\Kbar/ foobar diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 4c0e680..83682f7 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -5551,12 +5551,6 @@ No match 0: b 1: b -/(?(1)a|b)/ - -/(?(1)b|a)/ - a - 0: a - /(x)?(?(1)a|b)/ *** Failers No match diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 85d931a..cb69551 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -963,9 +963,6 @@ Failed: unrecognized character after (? or (?- at offset 3 /(?(1?)a|b)/ Failed: malformed number or name after (?( at offset 4 -/(?(1)a|b|c)/ -Failed: conditional group contains more than two branches at offset 10 - /[a[:xyz:/ Failed: missing terminating ] for character class at offset 8 @@ -1599,32 +1596,32 @@ No options First char = 'a' Need char = 'b' -/a(?(1)b)/I -Capturing subpattern count = 0 +/a(?(1)b)(.)/I +Capturing subpattern count = 1 No options First char = 'a' No need char -/a(?(1)bag|big)/I -Capturing subpattern count = 0 +/a(?(1)bag|big)(.)/I +Capturing subpattern count = 1 No options First char = 'a' Need char = 'g' -/a(?(1)bag|big)*/I -Capturing subpattern count = 0 +/a(?(1)bag|big)*(.)/I +Capturing subpattern count = 1 No options First char = 'a' No need char -/a(?(1)bag|big)+/I -Capturing subpattern count = 0 +/a(?(1)bag|big)+(.)/I +Capturing subpattern count = 1 No options First char = 'a' Need char = 'g' -/a(?(1)b..|b..)/I -Capturing subpattern count = 0 +/a(?(1)b..|b..)(.)/I +Capturing subpattern count = 1 No options First char = 'a' Need char = 'b' @@ -3441,26 +3438,26 @@ No options No first char Need char = 'a' -/(?(1)ab|ac)/I -Capturing subpattern count = 0 +/(?(1)ab|ac)(.)/I +Capturing subpattern count = 1 No options First char = 'a' No need char -/(?(1)abz|acz)/I -Capturing subpattern count = 0 +/(?(1)abz|acz)(.)/I +Capturing subpattern count = 1 No options First char = 'a' Need char = 'z' -/(?(1)abz)/I -Capturing subpattern count = 0 +/(?(1)abz)(.)/I +Capturing subpattern count = 1 No options No first char No need char -/(?(1)abz)123/I -Capturing subpattern count = 0 +/(?(1)abz)(1)23/I +Capturing subpattern count = 1 No options No first char Need char = '3' @@ -8308,7 +8305,7 @@ Failed: reference to non-existent subpattern at offset 6 /((?(-2)a))/BZ Failed: reference to non-existent subpattern at offset 7 -/^(?(+1)X|Y)/BZ +/^(?(+1)X|Y)(.)/BZ ------------------------------------------------------------------ Bra ^ @@ -8318,11 +8315,15 @@ Failed: reference to non-existent subpattern at offset 7 Alt Y Ket + CBra 1 + Any + Ket Ket End ------------------------------------------------------------------ - Y - 0: Y + Y! + 0: Y! + 1: ! /(foo)\Kbar/ foobar -- cgit v1.2.1