summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorM.J.T. Guy <mjtg@cus.cam.ac.uk>1997-07-07 18:02:29 +1200
committerTim Bunce <Tim.Bunce@ig.co.uk>1997-08-07 00:00:00 +1200
commit1b1626e441fa9750f3953e46530f2e95046bb007 (patch)
treea3acc09c28a1f9ae185de6c85fd96366acd33cee
parent706a304b44357647b233945e4e432234718ab515 (diff)
downloadperl-1b1626e441fa9750f3953e46530f2e95046bb007.tar.gz
Re: Bug in Regular Expressions when using colon as delimiter
Andreas Klussmann <andreas@infosys.heitec.net> wrote > using > $x =~ m:(?:xx):; > instead of > $x =~ m:(?\:xx):; > terminates perl immediatly (not in the debuger) and gives > Sequence (? > and nothing more as error message. This illustrates at least three bugs: i) The message is truncated because of the "NUL in argument to die" problem which I reported some time ago (and provided a kludge for in Carp.pm). ii) In any case, it would have produced an incorrect error message. iii) This error and many (most? all?) other parsing errors in regular expressions cause compilation to be terminated. The attached patch fixes (i) and (ii) but not (iii). It also extends the regexp tests to test the error messages generated rather than just note that an error has occurred. Additional points which I'll leave to someone else: a) (iii) needs fixing. b) I note that many regexp error messages are incorrect, as they quote the regexp as /(?/ rather than as (e.g.) m:(?: or s:(?::. c) My understanding of Chip's rework of sprintf was that it now provided a mechanism for including strings with embedded NULs. Could this be used to provide a complete fix for (i) rather than kludging each case as it turns up? d) I strongly suspect that the regexp tests a\ and 'a\'i are not doing what the author intended. I've left them so they say "ok" regardless. (Hint: \' is recognised in a '' string.) p5p-msgid: E0wtbhv-0005Mm-00@ursa.cus.cam.ac.uk
-rw-r--r--pod/perldiag.pod4
-rw-r--r--regcomp.c3
-rw-r--r--t/op/re_tests41
-rwxr-xr-xt/op/regexp.t6
4 files changed, 31 insertions, 23 deletions
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index a5527ddd82..d3ea144e0e 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2070,6 +2070,10 @@ or possibly some other missing operator, such as a comma.
(W) The filehandle you're sending to got itself closed sometime before now.
Check your logic flow.
+=item Sequence (? incomplete
+(F) A regular expression ended with an incomplete extension (?.
+See L<perlre>.
+
=item Sequence (?#... not terminated
(F) A regular expression comment must be terminated by a closing
diff --git a/regcomp.c b/regcomp.c
index 0e9846c2e6..3e302531e6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -467,6 +467,9 @@ I32 *flagp;
nextchar();
*flagp = TRYAGAIN;
return NULL;
+ case 0:
+ croak("Sequence (? incomplete");
+ break;
default:
--regparse;
while (*regparse && strchr("iogcmsx", *regparse))
diff --git a/t/op/re_tests b/t/op/re_tests
index 77d97e2aeb..1f035c720d 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -42,9 +42,9 @@ a[b-d]e ace y $& ace
a[b-d] aac y $& ac
a[-b] a- y $& a-
a[b-] a- y $& a-
-a[b-a] - c - -
-a[]b - c - -
-a[ - c - -
+a[b-a] - c - /a[b-a]/: invalid [] range in regexp
+a[]b - c - /a[]b/: unmatched [] in regexp
+a[ - c - /a[/: unmatched [] in regexp
a] a] y $& a]
a[]]b a]b y $& a]b
a[^bc]d aed y $& aed
@@ -92,21 +92,21 @@ a[\S]b a-b y - -
ab|cd abc y $& ab
ab|cd abcd y $& ab
()ef def y $&-$1 ef-
-*a - c - -
-(*)b - c - -
+*a - c - /*a/: ?+*{} follows nothing in regexp
+(*)b - c - /(*)b/: ?+*{} follows nothing in regexp
$b b n - -
-a\ - c - -
+a\ - c - Search pattern not terminated
a\(b a(b y $&-$1 a(b-
a\(*b ab y $& ab
a\(*b a((b y $& a((b
a\\b a\b y $& a\b
-abc) - c - -
-(abc - c - -
+abc) - c - /abc)/: unmatched () in regexp
+(abc - c - /(abc/: unmatched () in regexp
((a)) abc y $&-$1-$2 a-a-a
(a)b(c) abc y $&-$1-$2 abc-a-c
a+b+c aabbabc y $& abc
a{1,}b{1,}c aabbabc y $& abc
-a** - c - -
+a** - c - /a**/: nested *?+ in regexp
a.+?c abcabc y $& abc
(a+|b)* ab y $&-$1 ab-b
(a+|b){0,} ab y $&-$1 ab-b
@@ -114,7 +114,7 @@ a.+?c abcabc y $& abc
(a+|b){1,} ab y $&-$1 ab-b
(a+|b)? ab y $&-$1 a-a
(a+|b){0,1} ab y $&-$1 a-a
-)( - c - -
+)( - c - /)(/: unmatched () in regexp
[^ab]* cde y $& cde
abc n - -
a* y $&
@@ -205,9 +205,9 @@ a[-]?c ac y $& ac
'a[b-d]'i AAC y $& AC
'a[-b]'i A- y $& A-
'a[b-]'i A- y $& A-
-'a[b-a]'i - c - -
-'a[]b'i - c - -
-'a['i - c - -
+'a[b-a]'i - c - /a[b-a]/: invalid [] range in regexp
+'a[]b'i - c - /a[]b/: unmatched [] in regexp
+'a['i - c - /a[/: unmatched [] in regexp
'a]'i A] y $& A]
'a[]]b'i A]B y $& A]B
'a[^bc]d'i AED y $& AED
@@ -219,21 +219,21 @@ a[-]?c ac y $& ac
'ab|cd'i ABC y $& AB
'ab|cd'i ABCD y $& AB
'()ef'i DEF y $&-$1 EF-
-'*a'i - c - -
-'(*)b'i - c - -
+'*a'i - c - /*a/: ?+*{} follows nothing in regexp
+'(*)b'i - c - /(*)b/: ?+*{} follows nothing in regexp
'$b'i B n - -
-'a\'i - c - -
+'a\'i - c - Search pattern not terminated
'a\(b'i A(B y $&-$1 A(B-
'a\(*b'i AB y $& AB
'a\(*b'i A((B y $& A((B
'a\\b'i A\B y $& A\B
-'abc)'i - c - -
-'(abc'i - c - -
+'abc)'i - c - /abc)/: unmatched () in regexp
+'(abc'i - c - /(abc/: unmatched () in regexp
'((a))'i ABC y $&-$1-$2 A-A-A
'(a)b(c)'i ABC y $&-$1-$2 ABC-A-C
'a+b+c'i AABBABC y $& ABC
'a{1,}b{1,}c'i AABBABC y $& ABC
-'a**'i - c - -
+'a**'i - c - /a**/: nested *?+ in regexp
'a.+?c'i ABCABC y $& ABC
'a.*?c'i ABCABC y $& ABC
'a.{0,5}?c'i ABCABC y $& ABC
@@ -244,7 +244,7 @@ a[-]?c ac y $& ac
'(a+|b)?'i AB y $&-$1 A-A
'(a+|b){0,1}'i AB y $&-$1 A-A
'(a+|b){0,1}?'i AB y $&-$1 -
-')('i - c - -
+')('i - c - /)(/: unmatched () in regexp
'[^ab]*'i CDE y $& CDE
'abc'i n - -
'a*'i y $&
@@ -304,3 +304,4 @@ a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce
'([a-z]+)\s\1'i Aa aa y $&-$1 Aa aa-Aa
'([a-z]+)\s\1'i Ab ab y $&-$1 Ab ab-Ab
foo\w*\d{4}baz foobar1234baz y $& foobar1234baz
+:(?: - c - Sequence (? incomplete
diff --git a/t/op/regexp.t b/t/op/regexp.t
index ea470f879b..803f1d0dab 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -14,7 +14,7 @@
# n expect no match
# c expect an error
#
-# Columns 4 and 5 are used only of column 3 contains C<y>.
+# Columns 4 and 5 are used only if column 3 contains C<y> or C<c>.
#
# Column 4 contains a string, usually C<$&>.
#
@@ -35,11 +35,11 @@ TEST:
while (<TESTS>) {
($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_);
$input = join(':',$pat,$subject,$result,$repl,$expect);
- $pat = "'$pat'" unless $pat =~ /^'/;
+ $pat = "'$pat'" unless $pat =~ /^[:']/;
for $study ("", "study \$subject") {
eval "$study; \$match = (\$subject =~ m$pat); \$got = \"$repl\";";
if ($result eq 'c') {
- if ($@ eq '') { print "not ok $.\n"; next TEST }
+ if ($@ !~ m!^\Q$expect!) { print "not ok $.\n"; next TEST }
last; # no need to study a syntax error
}
elsif ($result eq 'n') {