diff options
-rw-r--r-- | pod/perldelta.pod | 22 | ||||
-rw-r--r-- | pod/perldiag.pod | 12 | ||||
-rw-r--r-- | regexp.h | 6 | ||||
-rw-r--r-- | t/op/eval.t | 2 | ||||
-rw-r--r-- | t/re/re.t | 26 | ||||
-rw-r--r-- | toke.c | 60 |
6 files changed, 119 insertions, 9 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod index f0677c7f87..fdc6df18e4 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -28,6 +28,24 @@ here, but most should go in the L</Performance Enhancements> section. [ List each enhancement as a =head2 entry ] +=head2 The new regular expression modifiers available in suffix form + +Various releases of the 5.13.x series have added new regular expression +modifiers, C</a>, C</d>, C</l>, and C</u>. They were only available in +infix form (e.g., C<(?a:...)> until this release; now they are usable +in suffix form. This change was made too late to change all the +affected documentation, so there are a number of places that erroneously +say these must be used in infix form. + +However, there is an ambiguity with the construct, C<s/foo/bar/le...>. Due +to backward compatibility constraints, in Perl 5.14 only, it will be +resolved as C<s/foo/bar/ le...>, that is, as meaning to take the result +of the substitution, and see if it is stringwise less-than-or-equal-to +what follows. In Perl 5.16 and later, it will instead be resolved as +meaing to do the pattern match using the rules of the current locale, +and evaluate the rhs as an expression when doing the substitution. In +5.14, if you want the latter interpretation, you can write "el" instead. + =head2 Add C<\p{Titlecase}> as a synonym for C<\p{Title}> This synonym is added for symmetry with the Unicode property names @@ -41,9 +59,7 @@ non-ASCII character. For example, normally, 'k' =~ /\N{KELVIN SIGN}/ -will match; it won't under C</aa>. Note that like C</a>, C</aa> -in 5.14 will not actually be able to be used as a suffix at the end of a -regular expression. +will match; it won't under C</aa>. =head2 New warnings categories for problematic (non-)Unicode code points. diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 614276ad10..2d9a9ac541 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -146,6 +146,18 @@ string C<"-foo">, or a call to the function C<foo>, negated. If you meant the string, just write C<"-foo">. If you meant the function call, write C<-foo()>. +=item Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way + +(W deprecated, ambiguous) You wrote a pattern match with substitution +immediately followed by "le". In Perl 5.14 and earlier, this is +resolved as meaning to take the result of the substitution, and see if +it is stringwise less-than-or-equal-to what follows in the expression. +Having the "le" immediately following a pattern is deprecated behavior, +so in Perl 5.16, this expression will be resolved as meaning to do the +pattern match using the rules of the current locale, and evaluate the +rhs as an expression when doing the substitution. In 5.14, if you want +the latter interpretation, you can simply write "el" instead. + =item '|' and '<' may not both be specified on command line (F) An error peculiar to VMS. Perl does its own command line @@ -233,7 +233,7 @@ and check for NULL. case SINGLE_PAT_MOD: *(pmfl) |= RXf_PMf_SINGLELINE; break; \ case XTENDED_PAT_MOD: *(pmfl) |= RXf_PMf_EXTENDED; break -/* Note, includes locale, unicode */ +/* Note, includes charset ones, assumes 0 is the default for them */ #define STD_PMMOD_FLAGS_CLEAR(pmfl) \ *(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_CHARSET) @@ -276,13 +276,15 @@ and check for NULL. * character is bit +1, etc. */ #define STD_PAT_MODS "msix" +#define CHARSET_PAT_MODS ASCII_RESTRICT_PAT_MODS DEPENDS_PAT_MODS LOCALE_PAT_MODS UNICODE_PAT_MODS + /* This string is expected by XS_re_regexp_pattern() in universal.c to be ordered * so that the first character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of * extflags; the next character is in bit +1, etc. */ #define INT_PAT_MODS STD_PAT_MODS KEEPCOPY_PAT_MODS #define EXT_PAT_MODS ONCE_PAT_MODS KEEPCOPY_PAT_MODS -#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS +#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS CHARSET_PAT_MODS #define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS #define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS NONDESTRUCT_PAT_MODS diff --git a/t/op/eval.t b/t/op/eval.t index 5ef30096c8..f0fa0f25f1 100644 --- a/t/op/eval.t +++ b/t/op/eval.t @@ -462,7 +462,7 @@ print "ok $test - eval and last\n"; $test++; { no warnings; - eval "/ /a;"; + eval "/ /b;"; print "not " unless $@ =~ /^syntax error/; print "ok $test # eval syntax error, no warnings \n"; $test++; } @@ -55,6 +55,30 @@ if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){ } { + my ($pat, $mods); + $|=1; + + my $re = qr/a/d; + ($pat, $mods) = regexp_pattern($re); + is($mods, "", "Verify /d results in default mod"); + $re = qr/a/u; + ($pat, $mods) = regexp_pattern($re); + is($mods, "u", "Verify /u is understood"); + $re = qr/a/l; + ($pat, $mods) = regexp_pattern($re); + is($mods, "l", "Verify /l is understood"); + $re = qr/a/a; + ($pat, $mods) = regexp_pattern($re); + is($mods, "a", "Verify /a is understood"); + $re = qr/a/aa; + ($pat, $mods) = regexp_pattern($re); + is($mods, "aa", "Verify /aa is understood"); + diag($mods); + $pat = regexp_pattern($re); + diag($pat); +} + +{ # tests for new regexp flags my $text = "\xE4"; my $check; @@ -110,5 +134,5 @@ if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){ } # New tests above this line, don't forget to update the test count below! -BEGIN { plan tests => 28 } +BEGIN { plan tests => 33 } # No tests here! @@ -8765,24 +8765,80 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) { * otherwise FALSE */ const char c = **s; + if (! strchr(valid_flags, c)) { if (isALNUM(c)) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), - "Having no space between pattern and following word is deprecated"); + goto deprecate; } return FALSE; } switch (c) { + CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl); case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break; case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break; case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; + case LOCALE_PAT_MOD: + + /* In 5.14, qr//lt is legal but deprecated; the 't' means they + * can't be regex modifiers. + * In 5.14, s///le is legal and ambiguous. Try to disambiguate as + * much as easily done. s///lei, for example, has to mean regex + * modifiers if it's not an error (as does any word character + * following the 'e'). Otherwise, we resolve to the backwards- + * compatible, but less likely 's/// le ...', i.e. as meaning + * less-than-or-equal. The reason it's not likely is that s// + * returns a number, and so '<=' should be used for comparing, not + * 'le'. */ + if (*((*s) + 1) == 't') { + goto deprecate; + } + else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) + { + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way"); + return FALSE; + } + set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); + break; + case UNICODE_PAT_MOD: + /* In 5.14, qr//unless and qr//until are legal but deprecated; the + * 'n' means they can't be regex modifiers */ + if (*((*s) + 1) == 'n') { + goto deprecate; + } + set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); + break; + case ASCII_RESTRICT_PAT_MOD: + /* In 5.14, qr//and is legal but deprecated; the 'n' means they + * can't be regex modifiers */ + if (*((*s) + 1) == 'n') { + goto deprecate; + } + if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) { + /* Doubled modifier implies more restricted */ + set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); + (*s)++; + } + else { + set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); + } + break; + case DEPENDS_PAT_MOD: + set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); + break; } + (*s)++; return TRUE; + + deprecate: + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), + "Having no space between pattern and following word is deprecated"); + return FALSE; } STATIC char * |