summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pod/perldelta.pod22
-rw-r--r--pod/perldiag.pod12
-rw-r--r--regexp.h6
-rw-r--r--t/op/eval.t2
-rw-r--r--t/re/re.t26
-rw-r--r--toke.c60
6 files changed, 119 insertions, 9 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index f0677c7f87..fdc6df18e4 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -28,6 +28,24 @@ here, but most should go in the L</Performance Enhancements> section.
[ List each enhancement as a =head2 entry ]
+=head2 The new regular expression modifiers available in suffix form
+
+Various releases of the 5.13.x series have added new regular expression
+modifiers, C</a>, C</d>, C</l>, and C</u>. They were only available in
+infix form (e.g., C<(?a:...)> until this release; now they are usable
+in suffix form. This change was made too late to change all the
+affected documentation, so there are a number of places that erroneously
+say these must be used in infix form.
+
+However, there is an ambiguity with the construct, C<s/foo/bar/le...>. Due
+to backward compatibility constraints, in Perl 5.14 only, it will be
+resolved as C<s/foo/bar/ le...>, that is, as meaning to take the result
+of the substitution, and see if it is stringwise less-than-or-equal-to
+what follows. In Perl 5.16 and later, it will instead be resolved as
+meaing to do the pattern match using the rules of the current locale,
+and evaluate the rhs as an expression when doing the substitution. In
+5.14, if you want the latter interpretation, you can write "el" instead.
+
=head2 Add C<\p{Titlecase}> as a synonym for C<\p{Title}>
This synonym is added for symmetry with the Unicode property names
@@ -41,9 +59,7 @@ non-ASCII character. For example, normally,
'k' =~ /\N{KELVIN SIGN}/
-will match; it won't under C</aa>. Note that like C</a>, C</aa>
-in 5.14 will not actually be able to be used as a suffix at the end of a
-regular expression.
+will match; it won't under C</aa>.
=head2 New warnings categories for problematic (non-)Unicode code points.
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 614276ad10..2d9a9ac541 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -146,6 +146,18 @@ string C<"-foo">, or a call to the function C<foo>, negated. If you meant
the string, just write C<"-foo">. If you meant the function call,
write C<-foo()>.
+=item Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way
+
+(W deprecated, ambiguous) You wrote a pattern match with substitution
+immediately followed by "le". In Perl 5.14 and earlier, this is
+resolved as meaning to take the result of the substitution, and see if
+it is stringwise less-than-or-equal-to what follows in the expression.
+Having the "le" immediately following a pattern is deprecated behavior,
+so in Perl 5.16, this expression will be resolved as meaning to do the
+pattern match using the rules of the current locale, and evaluate the
+rhs as an expression when doing the substitution. In 5.14, if you want
+the latter interpretation, you can simply write "el" instead.
+
=item '|' and '<' may not both be specified on command line
(F) An error peculiar to VMS. Perl does its own command line
diff --git a/regexp.h b/regexp.h
index 92c9ccedfd..c4fa6090ed 100644
--- a/regexp.h
+++ b/regexp.h
@@ -233,7 +233,7 @@ and check for NULL.
case SINGLE_PAT_MOD: *(pmfl) |= RXf_PMf_SINGLELINE; break; \
case XTENDED_PAT_MOD: *(pmfl) |= RXf_PMf_EXTENDED; break
-/* Note, includes locale, unicode */
+/* Note, includes charset ones, assumes 0 is the default for them */
#define STD_PMMOD_FLAGS_CLEAR(pmfl) \
*(pmfl) &= ~(RXf_PMf_FOLD|RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_EXTENDED|RXf_PMf_CHARSET)
@@ -276,13 +276,15 @@ and check for NULL.
* character is bit +1, etc. */
#define STD_PAT_MODS "msix"
+#define CHARSET_PAT_MODS ASCII_RESTRICT_PAT_MODS DEPENDS_PAT_MODS LOCALE_PAT_MODS UNICODE_PAT_MODS
+
/* This string is expected by XS_re_regexp_pattern() in universal.c to be ordered
* so that the first character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of
* extflags; the next character is in bit +1, etc. */
#define INT_PAT_MODS STD_PAT_MODS KEEPCOPY_PAT_MODS
#define EXT_PAT_MODS ONCE_PAT_MODS KEEPCOPY_PAT_MODS
-#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS
+#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS CHARSET_PAT_MODS
#define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS
#define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS NONDESTRUCT_PAT_MODS
diff --git a/t/op/eval.t b/t/op/eval.t
index 5ef30096c8..f0fa0f25f1 100644
--- a/t/op/eval.t
+++ b/t/op/eval.t
@@ -462,7 +462,7 @@ print "ok $test - eval and last\n"; $test++;
{
no warnings;
- eval "/ /a;";
+ eval "/ /b;";
print "not " unless $@ =~ /^syntax error/;
print "ok $test # eval syntax error, no warnings \n"; $test++;
}
diff --git a/t/re/re.t b/t/re/re.t
index 67c21813c0..cf6cdffe5b 100644
--- a/t/re/re.t
+++ b/t/re/re.t
@@ -55,6 +55,30 @@ if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){
}
{
+ my ($pat, $mods);
+ $|=1;
+
+ my $re = qr/a/d;
+ ($pat, $mods) = regexp_pattern($re);
+ is($mods, "", "Verify /d results in default mod");
+ $re = qr/a/u;
+ ($pat, $mods) = regexp_pattern($re);
+ is($mods, "u", "Verify /u is understood");
+ $re = qr/a/l;
+ ($pat, $mods) = regexp_pattern($re);
+ is($mods, "l", "Verify /l is understood");
+ $re = qr/a/a;
+ ($pat, $mods) = regexp_pattern($re);
+ is($mods, "a", "Verify /a is understood");
+ $re = qr/a/aa;
+ ($pat, $mods) = regexp_pattern($re);
+ is($mods, "aa", "Verify /aa is understood");
+ diag($mods);
+ $pat = regexp_pattern($re);
+ diag($pat);
+}
+
+{
# tests for new regexp flags
my $text = "\xE4";
my $check;
@@ -110,5 +134,5 @@ if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){
}
# New tests above this line, don't forget to update the test count below!
-BEGIN { plan tests => 28 }
+BEGIN { plan tests => 33 }
# No tests here!
diff --git a/toke.c b/toke.c
index a4a279fe07..ddd50cf301 100644
--- a/toke.c
+++ b/toke.c
@@ -8765,24 +8765,80 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
* otherwise FALSE */
const char c = **s;
+
if (! strchr(valid_flags, c)) {
if (isALNUM(c)) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
- "Having no space between pattern and following word is deprecated");
+ goto deprecate;
}
return FALSE;
}
switch (c) {
+
CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
+ case LOCALE_PAT_MOD:
+
+ /* In 5.14, qr//lt is legal but deprecated; the 't' means they
+ * can't be regex modifiers.
+ * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
+ * much as easily done. s///lei, for example, has to mean regex
+ * modifiers if it's not an error (as does any word character
+ * following the 'e'). Otherwise, we resolve to the backwards-
+ * compatible, but less likely 's/// le ...', i.e. as meaning
+ * less-than-or-equal. The reason it's not likely is that s//
+ * returns a number, and so '<=' should be used for comparing, not
+ * 'le'. */
+ if (*((*s) + 1) == 't') {
+ goto deprecate;
+ }
+ else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2)))
+ {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
+ "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
+ return FALSE;
+ }
+ set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
+ break;
+ case UNICODE_PAT_MOD:
+ /* In 5.14, qr//unless and qr//until are legal but deprecated; the
+ * 'n' means they can't be regex modifiers */
+ if (*((*s) + 1) == 'n') {
+ goto deprecate;
+ }
+ set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
+ break;
+ case ASCII_RESTRICT_PAT_MOD:
+ /* In 5.14, qr//and is legal but deprecated; the 'n' means they
+ * can't be regex modifiers */
+ if (*((*s) + 1) == 'n') {
+ goto deprecate;
+ }
+ if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
+ /* Doubled modifier implies more restricted */
+ set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
+ (*s)++;
+ }
+ else {
+ set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
+ }
+ break;
+ case DEPENDS_PAT_MOD:
+ set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
+ break;
}
+
(*s)++;
return TRUE;
+
+ deprecate:
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
+ "Having no space between pattern and following word is deprecated");
+ return FALSE;
}
STATIC char *