diff options
author | Tony Cook <tony@develop-help.com> | 2013-12-18 15:32:20 +1100 |
---|---|---|
committer | Tony Cook <tony@develop-help.com> | 2013-12-18 15:32:20 +1100 |
commit | 5b50ddc0fecad7c18ba9a41a6996121a236a36f0 (patch) | |
tree | 1330e4215a5f9aedf66271ec1618c60e27cd4932 /dist | |
parent | 09c658110f06839ac776873506c8541cccfd0739 (diff) | |
download | perl-5b50ddc0fecad7c18ba9a41a6996121a236a36f0.tar.gz |
[perl #120384] make hash key quoting compatible between perl and XS
In particular:
- if quotekeys is set all hash keys are now quoted, previously the perl
code didn't quote "safe" numeric keys
- keys of the form ::foo are now quoted by XS as the perl code always did
- XS code quoted "safe" numbers, while the perl code didn't
- perl code didn't quote strings like "1\x{660}", since \x{660}
matches \d
Diffstat (limited to 'dist')
-rw-r--r-- | dist/Data-Dumper/Dumper.pm | 16 | ||||
-rw-r--r-- | dist/Data-Dumper/Dumper.xs | 100 | ||||
-rw-r--r-- | dist/Data-Dumper/t/dumper.t | 14 | ||||
-rw-r--r-- | dist/Data-Dumper/t/quotekeys.t | 43 |
4 files changed, 119 insertions, 54 deletions
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm index 96ff4926ff..7acf6faaa5 100644 --- a/dist/Data-Dumper/Dumper.pm +++ b/dist/Data-Dumper/Dumper.pm @@ -440,8 +440,15 @@ sub _dump { () ) { my $nk = $s->_dump($k, ""); - $nk = $1 - if !$s->{quotekeys} and $nk =~ /^[\"\']([A-Za-z_]\w*)[\"\']$/; + + # _dump doesn't quote numbers of this form + if ($s->{quotekeys} && $nk =~ /^(?:0|-?[1-9][0-9]{0,8})\z/) { + $nk = $s->{useqq} ? qq("$nk") : qq('$nk'); + } + elsif (!$s->{quotekeys} and $nk =~ /^[\"\']([A-Za-z_]\w*)[\"\']$/) { + $nk = $1 + } + $sname = $mname . '{' . $nk . '}'; $out .= $pad . $ipad . $nk . $pair; @@ -546,7 +553,8 @@ sub _dump { and ref $ref eq 'VSTRING' || eval{Scalar::Util::isvstring($val)}) { $out .= sprintf "%vd", $val; } - elsif ($val =~ /^(?:0|-?[1-9]\d{0,8})\z/) { # safe decimal number + # \d here would treat "1\x{660}" as a safe decimal number + elsif ($val =~ /^(?:0|-?[1-9][0-9]{0,8})\z/) { # safe decimal number $out .= $val; } else { # string @@ -1390,7 +1398,7 @@ modify it under the same terms as Perl itself. =head1 VERSION -Version 2.150 (November 21 2013) +Version 2.150 (December 18 2013) =head1 SEE ALSO diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs index 0bdcbe06f3..12c4ebd9f6 100644 --- a/dist/Data-Dumper/Dumper.xs +++ b/dist/Data-Dumper/Dumper.xs @@ -19,7 +19,9 @@ static I32 num_q (const char *s, STRLEN slen); static I32 esc_q (char *dest, const char *src, STRLEN slen); static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq); -static I32 needs_quote(const char *s, STRLEN len); +static bool globname_needs_quote(const char *s, STRLEN len); +static bool key_needs_quote(const char *s, STRLEN len); +static bool safe_decimal_number(const char *p, STRLEN len); static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n); static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, @@ -91,19 +93,19 @@ Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen) #define DD_is_integer(sv) SvIOK(sv) #endif -/* does a string need to be protected? */ -static I32 -needs_quote(const char *s, STRLEN len) +/* does a glob name need to be protected? */ +static bool +globname_needs_quote(const char *s, STRLEN len) { const char *send = s+len; TOP: if (s[0] == ':') { if (++s<send) { if (*s++ != ':') - return 1; + return TRUE; } else - return 1; + return TRUE; } if (isIDFIRST(*s)) { while (++s<send) @@ -111,12 +113,35 @@ TOP: if (*s == ':') goto TOP; else - return 1; + return TRUE; } } else - return 1; - return 0; + return TRUE; + + return FALSE; +} + +/* does a hash key need to be quoted (to the left of => ). + Previously this used (globname_)needs_quote() which accepted strings + like '::foo', but these aren't safe as unquoted keys under strict. +*/ +static bool +key_needs_quote(const char *s, STRLEN len) { + const char *send = s+len; + + if (safe_decimal_number(s, len)) { + return FALSE; + } + else if (isIDFIRST(*s)) { + while (++s<send) + if (!isWORDCHAR(*s)) + return TRUE; + } + else + return TRUE; + + return FALSE; } /* Check that the SV can be represented as a simple decimal integer. @@ -124,10 +149,7 @@ TOP: * The perl code does this by matching against /^(?:0|-?[1-9]\d{0,8})\z/ */ static bool -safe_decimal_number(pTHX_ SV *val) { - STRLEN len; - const char *p = SvPV(val, len); - +safe_decimal_number(const char *p, STRLEN len) { if (len == 1 && *p == '0') return TRUE; @@ -883,28 +905,24 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, sv_catsv(retval, totpad); sv_catsv(retval, ipad); - /* old logic was first to check utf8 flag, and if utf8 always + /* The (very) + old logic was first to check utf8 flag, and if utf8 always call esc_q_utf8. This caused test to break under -Mutf8, because there even strings like 'c' have utf8 flag on. Hence with quotekeys == 0 the XS code would still '' quote them based on flags, whereas the perl code would not, based on regexps. - The perl code is correct. - needs_quote() decides that anything that isn't a valid - perl identifier needs to be quoted, hence only correctly - formed strings with no characters outside [A-Za-z0-9_:] - won't need quoting. None of those characters are used in - the byte encoding of utf8, so anything with utf8 - encoded characters in will need quoting. Hence strings - with utf8 encoded characters in will end up inside do_utf8 - just like before, but now strings with utf8 flag set but - only ascii characters will end up in the unquoted section. - - There should also be less tests for the (probably currently) - more common doesn't need quoting case. - The code is also smaller (22044 vs 22260) because I've been - able to pull the common logic out to both sides. */ - if (quotekeys || needs_quote(key,keylen)) { + + The old logic checked that the string was a valid + perl glob name (foo::bar), which isn't safe under + strict, and differs from the perl code which only + accepts simple identifiers. + + With the fix for [perl #120384] I chose to make + their handling of key quoting compatible between XS + and perl. + */ + if (quotekeys || key_needs_quote(key,keylen)) { if (do_utf8 || useqq) { STRLEN ocur = SvCUR(retval); nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq); @@ -1094,7 +1112,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, #endif i = 0; else i -= 4; } - if (needs_quote(c,i)) { + if (globname_needs_quote(c,i)) { #ifdef GvNAMEUTF8 if (GvNAMEUTF8(val)) { sv_grow(retval, SvCUR(retval)+2); @@ -1188,18 +1206,18 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv, } #endif - /* the pure perl and XS non-qq outputs have historically been - * different in this case, but for useqq, let's try to match - * the pure perl code. - * see [perl #74798] - */ - else if (useqq && safe_decimal_number(aTHX_ val)) { - sv_catsv(retval, val); - } else { integer_came_from_string: - c = SvPV(val, i); - if (DO_UTF8(val) || useqq) + c = SvPV(val, i); + /* the pure perl and XS non-qq outputs have historically been + * different in this case, but for useqq, let's try to match + * the pure perl code. + * see [perl #74798] + */ + if (useqq && safe_decimal_number(c, i)) { + sv_catsv(retval, val); + } + else if (DO_UTF8(val) || useqq) i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq); else { sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */ diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t index 85609f1a0a..4cca53f791 100644 --- a/dist/Data-Dumper/t/dumper.t +++ b/dist/Data-Dumper/t/dumper.t @@ -899,11 +899,8 @@ TEST q(Data::Dumper->new([$a])->Dumpxs;) #}; EOT -# perl code does keys and values as numbers if possible -TEST q(Data::Dumper->new([$c])->Dump;); -# XS code always does them as strings -$WANT =~ s/ (\d+)/ '$1'/gs; -TEST q(Data::Dumper->new([$c])->Dumpxs;) +TEST q(Data::Dumper->new([$c])->Dump;), "sortkeys sub"; +TEST q(Data::Dumper->new([$c])->Dumpxs;), "sort keys sub (XS)" if $XS; } @@ -949,9 +946,10 @@ TEST q(Data::Dumper->new([$c])->Dumpxs;) #]; EOT -TEST q(Data::Dumper->new([[$c, $d]])->Dump;); -$WANT =~ s/ (\d+)/ '$1'/gs; -TEST q(Data::Dumper->new([[$c, $d]])->Dumpxs;) +TEST q(Data::Dumper->new([[$c, $d]])->Dump;), "more sortkeys sub"; +# the XS code does number values as strings +$WANT =~ s/ (\d+)(,?)$/ '$1'$2/gm; +TEST q(Data::Dumper->new([[$c, $d]])->Dumpxs;), "more sortkeys sub (XS)" if $XS; } diff --git a/dist/Data-Dumper/t/quotekeys.t b/dist/Data-Dumper/t/quotekeys.t index c633d56d64..a85882890b 100644 --- a/dist/Data-Dumper/t/quotekeys.t +++ b/dist/Data-Dumper/t/quotekeys.t @@ -15,7 +15,7 @@ BEGIN { use strict; use Data::Dumper; -use Test::More tests => 10; +use Test::More tests => 18; use lib qw( ./t/lib ); use Testing qw( _dumptostr ); @@ -90,5 +90,46 @@ sub run_tests_for_quotekeys { isnt($dumps{'ddqkzero'}, $dumps{'objqkundef'}, "\$Data::Dumper::Quotekeys = undef and = 0 are equivalent"); %dumps = (); + + local $Data::Dumper::Quotekeys = 1; + local $Data::Dumper::Sortkeys = 1; + local $Data::Dumper::Indent = 0; + local $Data::Dumper::Useqq = 0; + + my %qkdata = + ( + 0 => 1, + '012345' => 1, + 12 => 1, + 123456789 => 1, + 1234567890 => 1, + '::de::fg' => 1, + ab => 1, + 'hi::12' => 1, + "1\x{660}" => 1, + ); + + is(Dumper(\%qkdata), + q($VAR1 = {'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,'ab' => 1,'hi::12' => 1};), + "always quote when quotekeys true"); + + { + local $Data::Dumper::Useqq = 1; + is(Dumper(\%qkdata), + q($VAR1 = {"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,"ab" => 1,"hi::12" => 1};), + "always quote when quotekeys true (useqq)"); + } + + local $Data::Dumper::Quotekeys = 0; + + is(Dumper(\%qkdata), + q($VAR1 = {0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,ab => 1,'hi::12' => 1};), + "avoid quotes when quotekeys false"); + { + local $Data::Dumper::Useqq = 1; + is(Dumper(\%qkdata), + q($VAR1 = {0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,ab => 1,"hi::12" => 1};), + "avoid quotes when quotekeys false (useqq)"); + } } |