diff options
-rw-r--r-- | pod/perldiag.pod | 27 | ||||
-rw-r--r-- | pod/perlrecharclass.pod | 4 | ||||
-rw-r--r-- | regcomp.c | 28 | ||||
-rw-r--r-- | t/lib/warnings/regcomp | 6 | ||||
-rw-r--r-- | t/re/reg_mesg.t | 29 | ||||
-rw-r--r-- | t/re/regex_sets.t | 6 |
6 files changed, 68 insertions, 32 deletions
diff --git a/pod/perldiag.pod b/pod/perldiag.pod index c38fc0f032..77726f54a1 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -5945,7 +5945,7 @@ yourself. a perl4 interpreter, especially if the next 2 tokens are "use strict" or "my $var" or "our $var". -=item Syntax error in (?[...]) in regex m/%s/ +=item Syntax error in (?[...]) in regex; marked by <-- HERE in m/%s/ (F) Perl could not figure out what you meant inside this construct; this notifies you that it is giving up trying. @@ -6441,6 +6441,31 @@ to find out why that isn't happening. (F) The unexec() routine failed for some reason. See your local FSF representative, who probably put it there in the first place. +=item Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/%s/ + +(F) While parsing an extended character class a ']' character was encountered +at a point in the definition where the only legal use of ']' is to close the +character class definition as part of a '])', you may have forgotten the close +paren, or otherwise confused the parser. + +=item Expecting close paren for nested extended charclass in regex; marked by <-- HERE in m/%s/ + +(F) While parsing a nested extended character class like: + + (?[ ... (?flags:(?[ ... ])) ... ]) + ^ + +we expected to see a close paren ')' (marked by ^) but did not. + +=item Expecting close paren for wrapper for nested extended charclass in regex; marked by <-- HERE in m/%s/ + +(F) While parsing a nested extended character class like: + + (?[ ... (?flags:(?[ ... ])) ... ]) + ^ + +we expected to see a close paren ')' (marked by ^) but did not. + =item Unexpected binary operator '%c' with no preceding operand in regex; marked by S<<-- HERE> in m/%s/ diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod index 79480e4131..8c008507d1 100644 --- a/pod/perlrecharclass.pod +++ b/pod/perlrecharclass.pod @@ -1128,8 +1128,8 @@ hence both of the following work: Any contained POSIX character classes, including things like C<\w> and C<\D> respect the C<E<sol>a> (and C<E<sol>aa>) modifiers. -C<< (?[ ]) >> is a regex-compile-time construct. Any attempt to use -something which isn't knowable at the time the containing regular +Note that C<< (?[ ]) >> is a regex-compile-time construct. Any attempt +to use something which isn't knowable at the time the containing regular expression is compiled is a fatal error. In practice, this means just three limitations: @@ -14947,8 +14947,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, TRUE /* Force /x */ ); switch (*RExC_parse) { - case '?': - if (RExC_parse[1] == '[') nest_depth++, RExC_parse++; + case '(': + if (RExC_parse[1] == '?' && RExC_parse[2] == '[') + nest_depth++, RExC_parse+=2; /* FALLTHROUGH */ default: break; @@ -15005,9 +15006,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, } case ']': - if (nest_depth--) break; - RExC_parse++; - if (*RExC_parse == ')') { + if (RExC_parse[1] == ')') { + RExC_parse++; + if (nest_depth--) break; node = reganode(pRExC_state, ANYOF, 0); RExC_size += ANYOF_SKIP; nextchar(pRExC_state); @@ -15019,20 +15020,25 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, return node; } - goto no_close; + /* We output the messages even if warnings are off, because we'll fail + * the very next thing, and these give a likely diagnosis for that */ + if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) { + output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL); + } + RExC_parse++; + vFAIL("Unexpected ']' with no following ')' in (?[..."); } RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1; } - no_close: /* We output the messages even if warnings are off, because we'll fail * the very next thing, and these give a likely diagnosis for that */ if (posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) { output_or_return_posix_warnings(pRExC_state, posix_warnings, NULL); } - FAIL("Syntax error in (?[...])"); + vFAIL("Syntax error in (?[...])"); } /* Pass 2 only after this. */ @@ -15212,12 +15218,14 @@ redo_curchar: * inversion list, and RExC_parse points to the trailing * ']'; the next character should be the ')' */ RExC_parse++; - assert(UCHARAT(RExC_parse) == ')'); + if (UCHARAT(RExC_parse) != ')') + vFAIL("Expecting close paren for nested extended charclass"); /* Then the ')' matching the original '(' handled by this * case: statement */ RExC_parse++; - assert(UCHARAT(RExC_parse) == ')'); + if (UCHARAT(RExC_parse) != ')') + vFAIL("Expecting close paren for wrapper for nested extended charclass"); RExC_parse++; RExC_flags = save_flags; diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp index 55265315e5..516de41911 100644 --- a/t/lib/warnings/regcomp +++ b/t/lib/warnings/regcomp @@ -59,21 +59,21 @@ Unmatched [ in regex; marked by <-- HERE in m/abc[ <-- HERE fi[.00./ at - line qr/(?[[[:word]]])/; EXPECT Assuming NOT a POSIX class since there is no terminating ':' in regex; marked by <-- HERE in m/(?[[[:word <-- HERE ]]])/ at - line 2. -syntax error in (?[...]) in regex m/(?[[[:word]]])/ at - line 2. +Unexpected ']' with no following ')' in (?[... in regex; marked by <-- HERE in m/(?[[[:word]] <-- HERE ])/ at - line 2. ######## # NAME qr/(?[ [[:digit: ])/ # OPTION fatal qr/(?[[[:digit: ])/; EXPECT Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[[:digit: ] <-- HERE )/ at - line 2. -syntax error in (?[...]) in regex m/(?[[[:digit: ])/ at - line 2. +syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[[:digit: ]) <-- HERE / at - line 2. ######## # NAME qr/(?[ [:digit: ])/ # OPTION fatal qr/(?[[:digit: ])/ EXPECT Assuming NOT a POSIX class since no blanks are allowed in one in regex; marked by <-- HERE in m/(?[[:digit: ] <-- HERE )/ at - line 2. -syntax error in (?[...]) in regex m/(?[[:digit: ])/ at - line 2. +syntax error in (?[...]) in regex; marked by <-- HERE in m/(?[[:digit: ]) <-- HERE / at - line 2. ######## # NAME [perl #126141] # OPTION fatal diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 86986193d0..24cd986960 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -234,8 +234,9 @@ my @death = '/\b{gc}/' => "'gc' is an unknown bound type {#} m/\\b{gc{#}}/", '/\B{gc}/' => "'gc' is an unknown bound type {#} m/\\B{gc{#}}/", - '/(?[[[::]]])/' => "Syntax error in (?[...]) in regex m/(?[[[::]]])/", - '/(?[[[:w:]]])/' => "Syntax error in (?[...]) in regex m/(?[[[:w:]]])/", + + '/(?[[[::]]])/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[[[::]]{#}])/", + '/(?[[[:w:]]])/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[[[:w:]]{#}])/", '/(?[[:w:]])/' => "", '/([.].*)[.]/' => "", # [perl #127582] '/[.].*[.]/' => "", # [perl #127604] @@ -258,11 +259,12 @@ my @death = '/(?[ \p{foo} ])/' => 'Can\'t find Unicode property definition "foo" {#} m/(?[ \p{foo}{#} ])/', '/(?[ \p{ foo = bar } ])/' => 'Can\'t find Unicode property definition "foo = bar" {#} m/(?[ \p{ foo = bar }{#} ])/', '/(?[ \8 ])/' => 'Unrecognized escape \8 in character class {#} m/(?[ \8{#} ])/', - '/(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ]/', - '/(?[ [ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ \t ]/', - '/(?[ \t ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ] ]/', - '/(?[ [ ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ ] ]/', - '/(?[ \t + \e # This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # This was supposed to be a comment ])/', + '/(?[ \t ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[ \\t ]{#}/", + '/(?[ [ \t ]/' => "Syntax error in (?[...]) {#} m/(?[ [ \\t ]{#}/", + '/(?[ \t ] ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/(?[ \\t ]{#} ]/", + '/(?[ [ ] ]/' => "Syntax error in (?[...]) {#} m/(?[ [ ] ]{#}/", + '/(?[ \t + \e # This was supposed to be a comment ])/' => + "Syntax error in (?[...]) {#} m/(?[ \\t + \\e # This was supposed to be a comment ]){#}/", '/(?[ ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ {#}])/', 'm/(?[[a-\d]])/' => 'False [] range "a-\d" {#} m/(?[[a-\d{#}]])/', 'm/(?[[\w-x]])/' => 'False [] range "\w-" {#} m/(?[[\w-{#}x]])/', @@ -452,10 +454,10 @@ my @death_utf8 = mark_as_utf8( '/ネ\p{}ネ/' => 'Empty \p{} {#} m/ネ\p{{#}}ネ/', - '/ネ(?[[[:ネ]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ]]])ネ/", - '/ネ(?[[[:ネ: ])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ: ])ネ/", - '/ネ(?[[[::]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[::]]])ネ/", - '/ネ(?[[[:ネ:]]])ネ/' => "Syntax error in (?[...]) in regex m/ネ(?[[[:ネ:]]])ネ/", + '/ネ(?[[[:ネ]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[:ネ]]{#}])ネ/", + '/ネ(?[[[:ネ: ])ネ/' => "Syntax error in (?[...]) {#} m/ネ(?[[[:ネ: ])ネ{#}/", + '/ネ(?[[[::]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[::]]{#}])ネ/", + '/ネ(?[[[:ネ:]]])ネ/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[[[:ネ:]]{#}])ネ/", '/ネ(?[[:ネ:]])ネ/' => "", '/ネ(?[ネ])ネ/' => 'Unexpected character {#} m/ネ(?[ネ{#}])ネ/', '/ネ(?[ + [ネ] ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/ネ(?[ +{#} [ネ] ])/', @@ -468,8 +470,9 @@ my @death_utf8 = mark_as_utf8( '/(?[ \x{ネ} ])ネ/' => 'Non-hex character {#} m/(?[ \x{ネ{#}} ])ネ/', '/(?[ \p{ネ} ])/' => 'Can\'t find Unicode property definition "ネ" {#} m/(?[ \p{ネ}{#} ])/', '/(?[ \p{ ネ = bar } ])/' => 'Can\'t find Unicode property definition "ネ = bar" {#} m/(?[ \p{ ネ = bar }{#} ])/', - '/ネ(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/ネ(?[ \t ]/', - '/(?[ \t + \e # ネ This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # ネ This was supposed to be a comment ])/', + '/ネ(?[ \t ]/' => "Unexpected ']' with no following ')' in (?[... {#} m/ネ(?[ \\t ]{#}/", + '/(?[ \t + \e # ネ This was supposed to be a comment ])/' => + "Syntax error in (?[...]) {#} m/(?[ \\t + \\e # ネ This was supposed to be a comment ]){#}/", 'm/(*ネ)ネ/' => q<Unknown verb pattern 'ネ' {#} m/(*ネ){#}ネ/>, '/\cネ/' => "Character following \"\\c\" must be printable ASCII", '/\b{ネ}/' => "'ネ' is an unknown bound type {#} m/\\b{ネ{#}}/", diff --git a/t/re/regex_sets.t b/t/re/regex_sets.t index 6a79f9d692..e9644bd4e6 100644 --- a/t/re/regex_sets.t +++ b/t/re/regex_sets.t @@ -158,13 +158,13 @@ for my $char ("٠", "٥", "٩") { eval { $_ = '/(?[(\c]) /'; qr/$_/ }; like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic'); eval { $_ = '(?[\c#]' . "\n])"; qr/$_/ }; - like($@, qr/^Syntax error/, '/(?[(\c]) / should not panic'); + like($@, qr/^Unexpected/, '/(?[(\c]) / should not panic'); eval { $_ = '(?[(\c])'; qr/$_/ }; like($@, qr/^Syntax error/, '/(?[(\c])/ should be a syntax error'); eval { $_ = '(?[(\c]) ]\b'; qr/$_/ }; - like($@, qr/^Syntax error/, '/(?[(\c]) ]\b/ should be a syntax error'); + like($@, qr/^Unexpected/, '/(?[(\c]) ]\b/ should be a syntax error'); eval { $_ = '(?[\c[]](])'; qr/$_/ }; - like($@, qr/^Syntax error/, '/(?[\c[]](])/ should be a syntax error'); + like($@, qr/^Unexpected/, '/(?[\c[]](])/ should be a syntax error'); like("\c#", qr/(?[\c#])/, '\c# should match itself'); like("\c[", qr/(?[\c[])/, '\c[ should match itself'); like("\c\ ", qr/(?[\c\])/, '\c\ should match itself'); |