summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-03-13 18:46:41 +0100
committerYves Orton <demerphq@gmail.com>2023-03-14 20:09:01 +0800
commitd80a076d049633e9d4622d7f4561cd0cc13177b5 (patch)
treedc564783fd7b92d6c6239115ec64c94a81ce7572
parent6c12e0ee216a4005debef2334035abf818726587 (diff)
downloadperl-d80a076d049633e9d4622d7f4561cd0cc13177b5.tar.gz
mg.c - add support for ${^LAST_SUCCESSFUL_PATTERN}
This exposes the "last successful pattern" as a variable that can be printed, or used in patterns, or tested for definedness, etc. Many regex magical variables relate to PL_curpm, which contains the last successful match. We never exposed the *pattern* directly, although it was implicitly available via the "empty pattern". With this patch it is exposed explicitly. This means that if someone embeds a pattern as a match operator it can then be accessed after the fact much like a qr// variable would be. @ether asked if we had this, and I had to say "no", which was a shame as obviously the code involved isn't very complicated (the docs from this patch are far larger than the code involved!). At the very least this can be useful for debugging and probably testing. It can also be useful to test if the /is/ a "last successful pattern", by checking if the var is defined.
-rw-r--r--gv.c5
-rw-r--r--mg.c8
-rw-r--r--pod/perldelta.pod19
-rw-r--r--pod/perlop.pod21
-rw-r--r--pod/perlvar.pod21
-rw-r--r--t/re/pat.t45
6 files changed, 112 insertions, 7 deletions
diff --git a/gv.c b/gv.c
index 3b3e984da4..0fb23995a6 100644
--- a/gv.c
+++ b/gv.c
@@ -2219,8 +2219,9 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len,
if (memEQs(name, len, "\007LOBAL_PHASE"))
goto ro_magicalize;
break;
- case '\014': /* ${^LAST_FH} */
- if (memEQs(name, len, "\014AST_FH"))
+ case '\014':
+ if ( memEQs(name, len, "\014AST_FH") || /* ${^LAST_FH} */
+ memEQs(name, len, "\014AST_SUCCESSFUL_PATTERN")) /* ${^LAST_SUCCESSFUL_PATTERN} */
goto ro_magicalize;
break;
case '\015': /* ${^MATCH} */
diff --git a/mg.c b/mg.c
index fa473a2e12..6e911de897 100644
--- a/mg.c
+++ b/mg.c
@@ -1066,6 +1066,14 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
else
sv_set_undef(sv);
}
+ else if (strEQ(remaining, "AST_SUCCESSFUL_PATTERN")) {
+ if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
+ sv_setrv_inc(sv, MUTABLE_SV(rx));
+ sv_rvweaken(sv);
+ }
+ else
+ sv_set_undef(sv);
+ }
break;
case '\017': /* ^O & ^OPEN */
if (nextchar == '\0') {
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index ce1a2adec5..4183474c53 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -43,6 +43,25 @@ have a constant target label, and that label is found within the block.
LABEL: print "This does\n";
}
+=head2 New regexp variable ${^LAST_SUCCESSFUL_PATTERN}
+
+This allows access to the last succesful pattern that matched in the current scope.
+Many aspects of the regex engine refer to the "last successful pattern". The empty
+pattern reuses it, and all of the magic regex vars relate to it. This allows
+access to its pattern. The following code
+
+ if (m/foo/ || m/bar/) {
+ s//PQR/;
+ }
+
+can be rewritten as follows
+
+ if (m/foo/ || m/bar/) {
+ s/${^LAST_SUCCESSFUL_PATTERN}/PQR/;
+ }
+
+and it will do the exactly same thing.
+
=head1 Security
XXX Any security-related notices go here. In particular, any security
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 35b2fe4d7b..07c01a5279 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -2055,11 +2055,24 @@ The bottom line is that using C</o> is almost never a good idea.
=item The empty pattern C<//>
If the I<PATTERN> evaluates to the empty string, the last
-I<successfully> matched regular expression is used instead. In this
-case, only the C<g> and C<c> flags on the empty pattern are honored;
-the other flags are taken from the original pattern. If no match has
+I<successfully> matched regular expression is used instead. In this
+case, only the C<g> and C<c> flags on the empty pattern are honored; the
+other flags are taken from the original pattern. If no match has
previously succeeded, this will (silently) act instead as a genuine
-empty pattern (which will always match).
+empty pattern (which will always match). Using a user supplied string as
+a pattern has the risk that if the string is empty that it triggers the
+"last successful match" behavior, which can be very confusing. In such
+cases you are recommended to replace C<m/$pattern/> with
+C<m/(?:$pattern)/> to avoid this behavior.
+
+The last successful pattern may be accessed as a variable via
+C<${^LAST_SUCCESSFUL_PATTERN}>. Matching against it, or the empty
+pattern should have the same effect, with the exception that when there
+is no last successful pattern the empty pattern will silently match,
+whereas using the C<${^LAST_SUCCESSFUL_PATTERN}> variable will produce
+undefined warnings (if warnings are enabled). You can check
+C<defined(${^LAST_SUCCESSFUL_PATTERN})> to test if there is a "last
+successful match" in the current scope.
Note that it's possible to confuse Perl into thinking C<//> (the empty
regex) is really C<//> (the defined-or operator). Perl is usually pretty
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index 565d0206f5..aebd6a45bd 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -1372,6 +1372,27 @@ added in 5.25.7.
This variable is read-only, and its value is dynamically scoped.
+=item ${^LAST_SUCCESSFUL_PATTERN}
+
+The last successful pattern that matched in the current scope. The empty
+pattern defaults to matching to this. For instance:
+
+ if (m/foo/ || m/bar/) {
+ s//BLAH/;
+ }
+
+and
+
+ if (m/foo/ || m/bar/) {
+ s/${^LAST_SUCCESSFUL_PATTERN}/BLAH/;
+ }
+
+are equivalent.
+
+You can use this to debug which pattern matched last, or to match with it again.
+
+Added in Perl 5.37.10.
+
=item $LAST_REGEXP_CODE_RESULT
=item $^R
diff --git a/t/re/pat.t b/t/re/pat.t
index 95070b2290..3e3b9e6bf6 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -27,7 +27,7 @@ skip_all_without_unicode_tables();
my $has_locales = locales_enabled('LC_CTYPE');
-plan tests => 1240; # Update this when adding/deleting tests.
+plan tests => 1260; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -2462,6 +2462,49 @@ SKIP:
"GH Issue #18865 'XaaXbbXb' - test optimization");
}
}
+ {
+ # Test that ${^LAST_SUCCESSFUL_PATTERN} works as expected.
+ # It should match like the empty pattern does, and it should be dynamic
+ # in the same was as $1 is dynamic.
+ my ($str,$pat);
+ $str = "ABCD";
+ $str =~/(D)/;
+ is("$1", "D", '$1 is "D"');
+ $pat = "${^LAST_SUCCESSFUL_PATTERN}";
+ is($pat, "(?^:(D))", 'Outer ${^LAST_SUCCESSFUL_PATTERN} is as expected');
+ {
+ if ($str=~/BX/ || $str=~/(BC)/) {
+ is("$1", "BC",'$1 is now "BC"');
+ $pat = "${^LAST_SUCCESSFUL_PATTERN}";
+ ok($str =~ s//ZZ/, "Empty pattern matched as expected");
+ is($str, "AZZD", "Empty pattern in s/// has result we expected");
+ }
+ }
+ is("$1", "D", '$1 should now be "D" again');
+ is($pat, "(?^:(BC))", 'inner ${^LAST_SUCCESSFUL_PATTERN} is as expected');
+ ok($str=~s//Q/, 'Empty pattern to "Q" was successful');
+ is($str, "AZZQ", "Empty pattern in s/// has result we expected (try2)");
+ $pat = "${^LAST_SUCCESSFUL_PATTERN}";
+ is($pat, "(?^:(D))", 'Outer ${^LAST_SUCCESSFUL_PATTERN} restored to its previous value as expected');
+
+ $str = "ABCD";
+ {
+ if ($str=~/BX/ || $str=~/(BC)/) {
+ is("$1", "BC",'$1 is now "BC"');
+ $pat = "${^LAST_SUCCESSFUL_PATTERN}";
+ ok($str=~s/${^LAST_SUCCESSFUL_PATTERN}/ZZ/, '${^LAST_SUCCESSFUL_PATTERN} matched as expected');
+ is($str, "AZZD", '${^LAST_SUCCESSFUL_PATTERN} in s/// has result we expected');
+ }
+ }
+ is("$1", "D", '$1 should now be "D" again');
+ is($pat, "(?^:(BC))", 'inner ${^LAST_SUCCESSFUL_PATTERN} is as expected');
+ is($str, "AZZD", 'Using ${^LAST_SUCCESSFUL_PATTERN} as a pattern has same result as empty pattern');
+ ok($str=~s/${^LAST_SUCCESSFUL_PATTERN}/Q/, '${^LAST_SUCCESSFUL_PATTERN} to "Q" was successful');
+ is($str, "AZZQ", '${^LAST_SUCCESSFUL_PATTERN} in s/// has result we expected');
+ ok($str=~/ZQ/, "/ZQ/ matched as expected");
+ $pat = "${^LAST_SUCCESSFUL_PATTERN}";
+ is($pat, "(?^:ZQ)", '${^LAST_SUCCESSFUL_PATTERN} changed as expected');
+ }
} # End of sub run_tests
1;