summaryrefslogtreecommitdiff
path: root/dist
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2021-06-30 14:49:34 +0000
committerNicholas Clark <nick@ccl4.org>2021-07-05 06:11:04 +0000
commitd4756df0a5b50be2739c8d596375f2b6ed1c6d93 (patch)
treeff5a129262eea0cd08754765bf9992e4d52de586 /dist
parentee5117509fdb6ed3324e9d129ac0de08d9873ea3 (diff)
downloadperl-d4756df0a5b50be2739c8d596375f2b6ed1c6d93.tar.gz
Data::Dumper was erroneously stringifying qr'$foo' as qr/$foo/
qr// vs qr'' only affects whether variable interpolation happens in the parser. The compiled regex doesn't record which quote style it used. It turns out that $ always interpolates, unless it is at the end of the string or followed by ) or |. Otherwise, except in qr'', dollar-anything will interpolate that punctuation or regular variable. (Possibly violating strict) Meaning that if we see an unescaped $ that isn't at the end of the string, isn't followed by ) and isn't followed by |, then either the regular expression was written as qr'', *or* there was variable interpolation where the interpolated value was that literal dollar sign. We can exploit variable interpolation to generate a regex in qr// form equivalent to one written without interpolation in qr'' form, which is very useful as the XS code needs to use \x{} style escapes for non-ASCII literals in the regex. This approach suggested by Eirik Berg Hanssen. Bug reported as CPAN #84569
Diffstat (limited to 'dist')
-rw-r--r--dist/Data-Dumper/Dumper.pm11
-rw-r--r--dist/Data-Dumper/Dumper.xs19
-rw-r--r--dist/Data-Dumper/t/dumper.t51
3 files changed, 77 insertions, 4 deletions
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 15589a33cf..9c56d4ffa1 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -364,7 +364,16 @@ sub _dump {
else {
$pat = "$val";
}
- $pat =~ s <(\\.)|/> { $1 || '\\/' }ge;
+ $pat =~ s <
+ (\\.) # anything backslash escaped
+ | (\$)(?![)|]|\z) # any unescaped $, except $| $) and end
+ | / # any unescaped /
+ >
+ {
+ $1 ? $1
+ : $2 ? '${\q($)}'
+ : '\\/'
+ }gex;
$out .= "qr/$pat/$flags";
}
elsif ($realtype eq 'SCALAR' || $realtype eq 'REF'
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 6d73beca30..7afb4a5a22 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -570,6 +570,10 @@ dump_regexp(pTHX_ SV *retval, SV *val)
*
* Of course, to add to the fun, we also need to escape Unicode characters
* to \x{...} notation (whether they are "escaped" by \ or stand alone).
+ *
+ * which means we need to output qr// notation
+ * even if the input was expressed as q'' (eg q'$foo')
+ *
* We can do all this in one pass if we are careful...
*/
@@ -591,8 +595,14 @@ dump_regexp(pTHX_ SV *retval, SV *val)
k = *p;
}
- if ((k == '/' && !saw_backslash) || ( do_utf8
- && ! UTF8_IS_INVARIANT(k)))
+ if (/* / that was not backslashed */
+ (k == '/' && !saw_backslash)
+ /* $ that was not backslashed, unless it is at the end of the regex
+ or it is followed by | or it is followed by ) */
+ || (k == '$' && !saw_backslash
+ && (p + 1 != rend && p[1] != '|' && p[1] != ')'))
+ /* or need to use \x{} notation. */
+ || (do_utf8 && ! UTF8_IS_INVARIANT(k)))
{
STRLEN to_copy = p - (U8 *) rval;
if (to_copy) {
@@ -603,6 +613,11 @@ dump_regexp(pTHX_ SV *retval, SV *val)
sv_catpvs(retval, "\\/");
++p;
}
+ else if (k == '$') {
+ /* this approach suggested by Eirik Berg Hanssen: */
+ sv_catpvs(retval, "${\\q($)}");
+ ++p;
+ }
else {
/* If there was a \, we have copied it already, so all that is
* left to do here is the \x{...} escaping.
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
index 137ceb1bb1..176a12731a 100644
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -15,7 +15,7 @@ $Data::Dumper::Sortkeys = 1;
$Data::Dumper::Pad = "#";
my $XS;
-my $TMAX = 492;
+my $TMAX = 498;
# Force Data::Dumper::Dump to use perl. We test Dumpxs explicitly by calling
# it direct. Out here it lets us knobble the next if to test that the perl
@@ -1740,6 +1740,55 @@ EOW
}
#############
{
+ # [CPAN #84569]
+ my $dollar = '${\q($)}';
+ my $want = <<"EOW";
+#\$VAR1 = [
+# "\\x{2e18}",
+# qr/^\$/,
+# qr/^\$/,
+# qr/${dollar}foo/,
+# qr/\\\$foo/,
+# qr/$dollar \x{A3} /u,
+# qr/$dollar \x{203d} /u,
+# qr/\\\$ \x{203d} /u,
+# qr/\\\\$dollar \x{203d} /u,
+# qr/ \$| \x{203d} /u,
+# qr/ (\$) \x{203d} /u,
+# '\xA3'
+#];
+EOW
+ if ($] lt '5.014') {
+ $want =~ s{/u,$}{/,}mg;
+ }
+ if ($] lt '5.010001') {
+ $want =~ s!qr/!qr/(?-xism:!g;
+ $want =~ s!/,!)/,!g;
+ }
+ my $want_xs = $want;
+ $want_xs =~ s/'\x{A3}'/"\\x{a3}"/;
+ $want_xs =~ s/\x{A3}/\\x{a3}/;
+ $want_xs =~ s/\x{203D}/\\x{203d}/g;
+ my $have = <<"EOT";
+Data::Dumper->Dumpxs([ [
+ "\\x{2e18}",
+ qr/^\$/,
+ qr'^\$',
+ qr'\$foo',
+ qr/\\\$foo/,
+ qr'\$ \x{A3} ',
+ qr'\$ \x{203d} ',
+ qr/\\\$ \x{203d} /,
+ qr'\\\\\$ \x{203d} ',
+ qr/ \$| \x{203d} /,
+ qr/ (\$) \x{203d} /,
+ '\xA3'
+] ]);
+EOT
+ TEST_BOTH($have, "CPAN #84569", $want, $want_xs);
+}
+#############
+{
# [perl #82948]
# re::regexp_pattern was moved to universal.c in v5.10.0-252-g192c1e2
# and apparently backported to maint-5.10