diff options
author | Nicholas Clark <nick@ccl4.org> | 2021-06-30 14:49:34 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2021-07-05 06:11:04 +0000 |
commit | d4756df0a5b50be2739c8d596375f2b6ed1c6d93 (patch) | |
tree | ff5a129262eea0cd08754765bf9992e4d52de586 /dist | |
parent | ee5117509fdb6ed3324e9d129ac0de08d9873ea3 (diff) | |
download | perl-d4756df0a5b50be2739c8d596375f2b6ed1c6d93.tar.gz |
Data::Dumper was erroneously stringifying qr'$foo' as qr/$foo/
qr// vs qr'' only affects whether variable interpolation happens in the
parser. The compiled regex doesn't record which quote style it used. It
turns out that $ always interpolates, unless it is at the end of the string
or followed by ) or |. Otherwise, except in qr'', dollar-anything will
interpolate that punctuation or regular variable. (Possibly violating
strict)
Meaning that if we see an unescaped $ that isn't at the end of the string,
isn't followed by ) and isn't followed by |, then either the regular
expression was written as qr'', *or* there was variable interpolation
where the interpolated value was that literal dollar sign.
We can exploit variable interpolation to generate a regex in qr// form
equivalent to one written without interpolation in qr'' form, which is
very useful as the XS code needs to use \x{} style escapes for non-ASCII
literals in the regex. This approach suggested by Eirik Berg Hanssen.
Bug reported as CPAN #84569
Diffstat (limited to 'dist')
-rw-r--r-- | dist/Data-Dumper/Dumper.pm | 11 | ||||
-rw-r--r-- | dist/Data-Dumper/Dumper.xs | 19 | ||||
-rw-r--r-- | dist/Data-Dumper/t/dumper.t | 51 |
3 files changed, 77 insertions, 4 deletions
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm index 15589a33cf..9c56d4ffa1 100644 --- a/dist/Data-Dumper/Dumper.pm +++ b/dist/Data-Dumper/Dumper.pm @@ -364,7 +364,16 @@ sub _dump { else { $pat = "$val"; } - $pat =~ s <(\\.)|/> { $1 || '\\/' }ge; + $pat =~ s < + (\\.) # anything backslash escaped + | (\$)(?![)|]|\z) # any unescaped $, except $| $) and end + | / # any unescaped / + > + { + $1 ? $1 + : $2 ? '${\q($)}' + : '\\/' + }gex; $out .= "qr/$pat/$flags"; } elsif ($realtype eq 'SCALAR' || $realtype eq 'REF' diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs index 6d73beca30..7afb4a5a22 100644 --- a/dist/Data-Dumper/Dumper.xs +++ b/dist/Data-Dumper/Dumper.xs @@ -570,6 +570,10 @@ dump_regexp(pTHX_ SV *retval, SV *val) * * Of course, to add to the fun, we also need to escape Unicode characters * to \x{...} notation (whether they are "escaped" by \ or stand alone). + * + * which means we need to output qr// notation + * even if the input was expressed as q'' (eg q'$foo') + * * We can do all this in one pass if we are careful... */ @@ -591,8 +595,14 @@ dump_regexp(pTHX_ SV *retval, SV *val) k = *p; } - if ((k == '/' && !saw_backslash) || ( do_utf8 - && ! UTF8_IS_INVARIANT(k))) + if (/* / that was not backslashed */ + (k == '/' && !saw_backslash) + /* $ that was not backslashed, unless it is at the end of the regex + or it is followed by | or it is followed by ) */ + || (k == '$' && !saw_backslash + && (p + 1 != rend && p[1] != '|' && p[1] != ')')) + /* or need to use \x{} notation. */ + || (do_utf8 && ! UTF8_IS_INVARIANT(k))) { STRLEN to_copy = p - (U8 *) rval; if (to_copy) { @@ -603,6 +613,11 @@ dump_regexp(pTHX_ SV *retval, SV *val) sv_catpvs(retval, "\\/"); ++p; } + else if (k == '$') { + /* this approach suggested by Eirik Berg Hanssen: */ + sv_catpvs(retval, "${\\q($)}"); + ++p; + } else { /* If there was a \, we have copied it already, so all that is * left to do here is the \x{...} escaping. diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t index 137ceb1bb1..176a12731a 100644 --- a/dist/Data-Dumper/t/dumper.t +++ b/dist/Data-Dumper/t/dumper.t @@ -15,7 +15,7 @@ $Data::Dumper::Sortkeys = 1; $Data::Dumper::Pad = "#"; my $XS; -my $TMAX = 492; +my $TMAX = 498; # Force Data::Dumper::Dump to use perl. We test Dumpxs explicitly by calling # it direct. Out here it lets us knobble the next if to test that the perl @@ -1740,6 +1740,55 @@ EOW } ############# { + # [CPAN #84569] + my $dollar = '${\q($)}'; + my $want = <<"EOW"; +#\$VAR1 = [ +# "\\x{2e18}", +# qr/^\$/, +# qr/^\$/, +# qr/${dollar}foo/, +# qr/\\\$foo/, +# qr/$dollar \x{A3} /u, +# qr/$dollar \x{203d} /u, +# qr/\\\$ \x{203d} /u, +# qr/\\\\$dollar \x{203d} /u, +# qr/ \$| \x{203d} /u, +# qr/ (\$) \x{203d} /u, +# '\xA3' +#]; +EOW + if ($] lt '5.014') { + $want =~ s{/u,$}{/,}mg; + } + if ($] lt '5.010001') { + $want =~ s!qr/!qr/(?-xism:!g; + $want =~ s!/,!)/,!g; + } + my $want_xs = $want; + $want_xs =~ s/'\x{A3}'/"\\x{a3}"/; + $want_xs =~ s/\x{A3}/\\x{a3}/; + $want_xs =~ s/\x{203D}/\\x{203d}/g; + my $have = <<"EOT"; +Data::Dumper->Dumpxs([ [ + "\\x{2e18}", + qr/^\$/, + qr'^\$', + qr'\$foo', + qr/\\\$foo/, + qr'\$ \x{A3} ', + qr'\$ \x{203d} ', + qr/\\\$ \x{203d} /, + qr'\\\\\$ \x{203d} ', + qr/ \$| \x{203d} /, + qr/ (\$) \x{203d} /, + '\xA3' +] ]); +EOT + TEST_BOTH($have, "CPAN #84569", $want, $want_xs); +} +############# +{ # [perl #82948] # re::regexp_pattern was moved to universal.c in v5.10.0-252-g192c1e2 # and apparently backported to maint-5.10 |