summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pod/perldiag.pod27
-rw-r--r--pod/perlrecharclass.pod4
-rw-r--r--regcomp.c28
-rw-r--r--t/lib/warnings/regcomp6
-rw-r--r--t/re/reg_mesg.t29
-rw-r--r--t/re/regex_sets.t6
6 files changed, 68 insertions, 32 deletions
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 106fe41121..c29925a2a4 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -5904,7 +5904,7 @@ yourself.
a perl4 interpreter, especially if the next 2 tokens are "use strict"
or "my $var" or "our $var".
-=item Syntax error in (?[...]) in regex m/%s/
+=item Syntax error in (?[...]) in regex; marked by <-- HERE in m/%s/
(F) Perl could not figure out what you meant inside this construct; this
notifies you that it is giving up trying.
@@ -6402,6 +6402,31 @@ to find out why that isn't happening.
(F) The unexec() routine failed for some reason. See your local FSF
representative, who probably put it there in the first place.
+=item Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/%s/
+
+(F) While parsing an extended character class a ']' character was encountered
+at a point in the definition where the only legal use of ']' is to close the
+character class definition as part of a '])', you may have forgotten the close
+paren, or otherwise confused the parser.
+
+=item Expecting close paren for nested extended charclass in regex; marked by <-- HERE in m/%s/
+
+(F) While parsing a nested extended character class like:
+
+ (?[ ... (?flags:(?[ ... ])) ... ])
+ ^
+
+we expected to see a close paren ')' (marked by ^) but did not.
+
+=item Expecting close paren for wrapper for nested extended charclass in regex; marked by <-- HERE in m/%s/
+
+(F) While parsing a nested extended character class like:
+
+ (?[ ... (?flags:(?[ ... ])) ... ])
+ ^
+
+we expected to see a close paren ')' (marked by ^) but did not.
+
=item Unexpected binary operator '%c' with no preceding operand in regex;
marked by S<<-- HERE> in m/%s/
diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod
index 79480e4131..8c008507d1 100644
--- a/pod/perlrecharclass.pod
+++ b/pod/perlrecharclass.pod
@@ -1128,8 +1128,8 @@ hence both of the following work:
Any contained POSIX character classes, including things like C<\w> and C<\D>
respect the C<E<sol>a> (and C<E<sol>aa>) modifiers.
-C<< (?[ ]) >> is a regex-compile-time construct. Any attempt to use
-something which isn't knowable at the time the containing regular
+Note that C<< (?[ ]) >> is a regex-compile-time construct. Any attempt
+to use something which isn't knowable at the time the containing regular
expression is compiled is a fatal error. In practice, this means
just three limitations:
diff --git a/regcomp.c b/regcomp.c
index 4ee48ede42..ddac290d2b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -14840,8 +14840,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
TRUE /* Force /x */ );
switch (*RExC_parse) {
- case '?':
- if (RExC_parse[1] == '[') depth++, RExC_parse++;
+ case '(':
+ if (RExC_parse[1] == '?' && RExC_parse[2] == '[')
+ depth++, RExC_parse+=2;
/* FALLTHROUGH */
default:
break;
@@ -14898,9 +14899,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
}
case ']':
- if (depth--) break;
- RExC_parse++;
- if (*RExC_parse == ')') {
+ if (RExC_parse[1] == ')') {
+ RExC_parse++;
+ if (depth--) break;
node = reganode(pRExC_state, ANYOF, 0);
RExC_size += ANYOF_SKIP;
nextchar(pRExC_state);
@@ -14912,20 +14913,25 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
return node;
}
- goto no_close;
+ /* We output the messages even if warnings are off, because we'll fail
+ * the very next thing, and these give a likely diagnosis for that */
+ if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
+ output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
+ }
+ RExC_parse++;
+ vFAIL("Unexpected ']' with no following ')' in (?[...");
}
RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
}
- no_close:
/* We output the messages even if warnings are off, because we'll fail
* the very next thing, and these give a likely diagnosis for that */
if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL);
}
- FAIL("Syntax error in (?[...])");
+ vFAIL("Syntax error in (?[...])");
}
/* Pass 2 only after this. */
@@ -15105,12 +15111,14 @@ redo_curchar:
* inversion list, and RExC_parse points to the trailing
* ']'; the next character should be the ')' */
RExC_parse++;
- assert(UCHARAT(RExC_parse) == ')');
+ if (UCHARAT(RExC_parse) != ')')
+ vFAIL("Expecting close paren for nested extended charclass");
/* Then the ')' matching the original '(' handled by this
* case: statement */
RExC_parse++;
- assert(UCHARAT(RExC_parse) == ')');
+ if (UCHARAT(RExC_parse) != ')')
+ vFAIL("Expecting close paren for wrapper for nested extended charclass");
RExC_flags = save_flags;
goto handle_operand;
diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp
index 2b084c59b0..51ad57ccbe 100644
--- a/t/lib/warnings/regcomp
+++ b/t/lib/warnings/regcomp
@@ -59,21 +59,21 @@ Unmatched [ in regex; marked by <-- HERE in m/abc[ <-- HERE fi[.00./ at - line
qr/(?[[[:word]]])/;
EXPECT
Assuming NOT a POSIX class since there is no terminating ':' in regex; marked by <-- HERE in m/(?[[[:word <-- HERE ]]])/ at - line 2.
-syntax error in (?[...]) in regex m/(?[[[:word]]])/ at - line 2.
+Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/(?[[[:word]] <-- HERE ])/ at - line 2.
########
# NAME qr/(?[ [[:digit: ])/
# OPTION fatal
qr/(?[[[:digit: ])/;
EXPECT
Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[[:digit: ] <-- HERE )/ at - line 2.
-syntax error in (?[...]) in regex m/(?[[[:digit: ])/ at - line 2.
+syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[[:digit: ]) <-- HERE / at - line 2.
########
# NAME qr/(?[ [:digit: ])/
# OPTION fatal
qr/(?[[:digit: ])/
EXPECT
Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[:digit: ] <-- HERE )/ at - line 2.
-syntax error in (?[...]) in regex m/(?[[:digit: ])/ at - line 2.
+syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[:digit: ]) <-- HERE / at - line 2.
########
# NAME [perl #126141]
# OPTION fatal
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index d26a7caf37..5194d93751 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -215,8 +215,9 @@ my @death =
'/\b{gc}/' => "'gc' is an unknown bound type {#} m/\\b{gc{#}}/",
'/\B{gc}/' => "'gc' is an unknown bound type {#} m/\\B{gc{#}}/",
- '/(?[[[::]]])/' => "Syntax error in (?[...]) in regex m/(?[[[::]]])/",
- '/(?[[[:w:]]])/' => "Syntax error in (?[...]) in regex m/(?[[[:w:]]])/",
+
+ '/(?[[[::]]])/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[[[::]]{#}])/",
+ '/(?[[[:w:]]])/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[[[:w:]]{#}])/",
'/(?[[:w:]])/' => "",
'/([.].*)[.]/' => "", # [perl #127582]
'/[.].*[.]/' => "", # [perl #127604]
@@ -239,11 +240,12 @@ my @death =
'/(?[ \p{foo} ])/' => 'Can\'t find Unicode property definition "foo" {#} m/(?[ \p{foo}{#} ])/',
'/(?[ \p{ foo = bar } ])/' => 'Can\'t find Unicode property definition "foo = bar" {#} m/(?[ \p{ foo = bar }{#} ])/',
'/(?[ \8 ])/' => 'Unrecognized escape \8 in character class {#} m/(?[ \8{#} ])/',
- '/(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ]/',
- '/(?[ [ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ \t ]/',
- '/(?[ \t ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ] ]/',
- '/(?[ [ ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ ] ]/',
- '/(?[ \t + \e # This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # This was supposed to be a comment ])/',
+ '/(?[ \t ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[ \\t ]{#}/",
+ '/(?[ [ \t ]/' => "Syntax error in (?[...]) {#} m/(?[ [ \\t ]{#}/",
+ '/(?[ \t ] ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[ \\t ]{#} ]/",
+ '/(?[ [ ] ]/' => "Syntax error in (?[...]) {#} m/(?[ [ ] ]{#}/",
+ '/(?[ \t + \e # This was supposed to be a comment ])/' =>
+ "Syntax error in (?[...]) {#} m/(?[ \\t + \\e # This was supposed to be a comment ]){#}/",
'/(?[ ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ {#}])/',
'm/(?[[a-\d]])/' => 'False [] range "a-\d" {#} m/(?[[a-\d{#}]])/',
'm/(?[[\w-x]])/' => 'False [] range "\w-" {#} m/(?[[\w-{#}x]])/',
@@ -431,10 +433,10 @@ my @death_utf8 = mark_as_utf8(
'/ネ\p{}ネ/' => 'Empty \p{} {#} m/ネ\p{{#}}ネ/',
- '/ネ(?[[[:ネ]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ]]])ネ/",
- '/ネ(?[[[:ネ: ])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ: ])ネ/",
- '/ネ(?[[[::]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[::]]])ネ/",
- '/ネ(?[[[:ネ:]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ:]]])ネ/",
+ '/ネ(?[[[:ネ]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[:ネ]]{#}])ネ/",
+ '/ネ(?[[[:ネ: ])ネ/' => "Syntax error in (?[...]) {#} m/ネ(?[[[:ネ: ])ネ{#}/",
+ '/ネ(?[[[::]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[::]]{#}])ネ/",
+ '/ネ(?[[[:ネ:]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[:ネ:]]{#}])ネ/",
'/ネ(?[[:ネ:]])ネ/' => "",
'/ネ(?[ネ])ネ/' => 'Unexpected character {#} m/ネ(?[ネ{#}])ネ/',
'/ネ(?[ + [ネ] ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/ネ(?[ +{#} [ネ] ])/',
@@ -447,8 +449,9 @@ my @death_utf8 = mark_as_utf8(
'/(?[ \x{ネ} ])ネ/' => 'Non-hex character {#} m/(?[ \x{ネ{#}} ])ネ/',
'/(?[ \p{ネ} ])/' => 'Can\'t find Unicode property definition "ネ" {#} m/(?[ \p{ネ}{#} ])/',
'/(?[ \p{ ネ = bar } ])/' => 'Can\'t find Unicode property definition "ネ = bar" {#} m/(?[ \p{ ネ = bar }{#} ])/',
- '/ネ(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/ネ(?[ \t ]/',
- '/(?[ \t + \e # ネ This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # ネ This was supposed to be a comment ])/',
+ '/ネ(?[ \t ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[ \\t ]{#}/",
+ '/(?[ \t + \e # ネ This was supposed to be a comment ])/' =>
+ "Syntax error in (?[...]) {#} m/(?[ \\t + \\e # ネ This was supposed to be a comment ]){#}/",
'm/(*ネ)ネ/' => q<Unknown verb pattern 'ネ' {#} m/(*ネ){#}ネ/>,
'/\cネ/' => "Character following \"\\c\" must be printable ASCII",
'/\b{ネ}/' => "'ネ' is an unknown bound type {#} m/\\b{ネ{#}}/",
diff --git a/t/re/regex_sets.t b/t/re/regex_sets.t
index 6a79f9d692..e9644bd4e6 100644
--- a/t/re/regex_sets.t
+++ b/t/re/regex_sets.t
@@ -158,13 +158,13 @@ for my $char ("٠", "٥", "٩") {
eval { $_ = '/(?[(\c]) /'; qr/$_/ };
like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic');
eval { $_ = '(?[\c#]' . "\n])"; qr/$_/ };
- like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic');
+ like($@, qr/^Unexpected/, '/(?[(\c]) / should not panic');
eval { $_ = '(?[(\c])'; qr/$_/ };
like($@, qr/^Syntax error/, '/(?[(\c])/ should be a syntax error');
eval { $_ = '(?[(\c]) ]\b'; qr/$_/ };
- like($@, qr/^Syntax error/, '/(?[(\c]) ]\b/ should be a syntax error');
+ like($@, qr/^Unexpected/, '/(?[(\c]) ]\b/ should be a syntax error');
eval { $_ = '(?[\c[]](])'; qr/$_/ };
- like($@, qr/^Syntax error/, '/(?[\c[]](])/ should be a syntax error');
+ like($@, qr/^Unexpected/, '/(?[\c[]](])/ should be a syntax error');
like("\c#", qr/(?[\c#])/, '\c# should match itself');
like("\c[", qr/(?[\c[])/, '\c[ should match itself');
like("\c\ ", qr/(?[\c\])/, '\c\ should match itself');