summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Cook <tony@develop-help.com>2013-12-18 15:32:20 +1100
committerTony Cook <tony@develop-help.com>2013-12-18 15:32:20 +1100
commit5b50ddc0fecad7c18ba9a41a6996121a236a36f0 (patch)
tree1330e4215a5f9aedf66271ec1618c60e27cd4932
parent09c658110f06839ac776873506c8541cccfd0739 (diff)
downloadperl-5b50ddc0fecad7c18ba9a41a6996121a236a36f0.tar.gz
[perl #120384] make hash key quoting compatible between perl and XS
In particular: - if quotekeys is set all hash keys are now quoted, previously the perl code didn't quote "safe" numeric keys - keys of the form ::foo are now quoted by XS as the perl code always did - XS code quoted "safe" numbers, while the perl code didn't - perl code didn't quote strings like "1\x{660}", since \x{660} matches \d
-rw-r--r--dist/Data-Dumper/Dumper.pm16
-rw-r--r--dist/Data-Dumper/Dumper.xs100
-rw-r--r--dist/Data-Dumper/t/dumper.t14
-rw-r--r--dist/Data-Dumper/t/quotekeys.t43
4 files changed, 119 insertions, 54 deletions
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 96ff4926ff..7acf6faaa5 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -440,8 +440,15 @@ sub _dump {
() )
{
my $nk = $s->_dump($k, "");
- $nk = $1
- if !$s->{quotekeys} and $nk =~ /^[\"\']([A-Za-z_]\w*)[\"\']$/;
+
+ # _dump doesn't quote numbers of this form
+ if ($s->{quotekeys} && $nk =~ /^(?:0|-?[1-9][0-9]{0,8})\z/) {
+ $nk = $s->{useqq} ? qq("$nk") : qq('$nk');
+ }
+ elsif (!$s->{quotekeys} and $nk =~ /^[\"\']([A-Za-z_]\w*)[\"\']$/) {
+ $nk = $1
+ }
+
$sname = $mname . '{' . $nk . '}';
$out .= $pad . $ipad . $nk . $pair;
@@ -546,7 +553,8 @@ sub _dump {
and ref $ref eq 'VSTRING' || eval{Scalar::Util::isvstring($val)}) {
$out .= sprintf "%vd", $val;
}
- elsif ($val =~ /^(?:0|-?[1-9]\d{0,8})\z/) { # safe decimal number
+ # \d here would treat "1\x{660}" as a safe decimal number
+ elsif ($val =~ /^(?:0|-?[1-9][0-9]{0,8})\z/) { # safe decimal number
$out .= $val;
}
else { # string
@@ -1390,7 +1398,7 @@ modify it under the same terms as Perl itself.
=head1 VERSION
-Version 2.150 (November 21 2013)
+Version 2.150 (December 18 2013)
=head1 SEE ALSO
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 0bdcbe06f3..12c4ebd9f6 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -19,7 +19,9 @@
static I32 num_q (const char *s, STRLEN slen);
static I32 esc_q (char *dest, const char *src, STRLEN slen);
static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
-static I32 needs_quote(const char *s, STRLEN len);
+static bool globname_needs_quote(const char *s, STRLEN len);
+static bool key_needs_quote(const char *s, STRLEN len);
+static bool safe_decimal_number(const char *p, STRLEN len);
static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
HV *seenhv, AV *postav, I32 *levelp, I32 indent,
@@ -91,19 +93,19 @@ Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
#define DD_is_integer(sv) SvIOK(sv)
#endif
-/* does a string need to be protected? */
-static I32
-needs_quote(const char *s, STRLEN len)
+/* does a glob name need to be protected? */
+static bool
+globname_needs_quote(const char *s, STRLEN len)
{
const char *send = s+len;
TOP:
if (s[0] == ':') {
if (++s<send) {
if (*s++ != ':')
- return 1;
+ return TRUE;
}
else
- return 1;
+ return TRUE;
}
if (isIDFIRST(*s)) {
while (++s<send)
@@ -111,12 +113,35 @@ TOP:
if (*s == ':')
goto TOP;
else
- return 1;
+ return TRUE;
}
}
else
- return 1;
- return 0;
+ return TRUE;
+
+ return FALSE;
+}
+
+/* does a hash key need to be quoted (to the left of => ).
+ Previously this used (globname_)needs_quote() which accepted strings
+ like '::foo', but these aren't safe as unquoted keys under strict.
+*/
+static bool
+key_needs_quote(const char *s, STRLEN len) {
+ const char *send = s+len;
+
+ if (safe_decimal_number(s, len)) {
+ return FALSE;
+ }
+ else if (isIDFIRST(*s)) {
+ while (++s<send)
+ if (!isWORDCHAR(*s))
+ return TRUE;
+ }
+ else
+ return TRUE;
+
+ return FALSE;
}
/* Check that the SV can be represented as a simple decimal integer.
@@ -124,10 +149,7 @@ TOP:
* The perl code does this by matching against /^(?:0|-?[1-9]\d{0,8})\z/
*/
static bool
-safe_decimal_number(pTHX_ SV *val) {
- STRLEN len;
- const char *p = SvPV(val, len);
-
+safe_decimal_number(const char *p, STRLEN len) {
if (len == 1 && *p == '0')
return TRUE;
@@ -883,28 +905,24 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
sv_catsv(retval, totpad);
sv_catsv(retval, ipad);
- /* old logic was first to check utf8 flag, and if utf8 always
+ /* The (very)
+ old logic was first to check utf8 flag, and if utf8 always
call esc_q_utf8. This caused test to break under -Mutf8,
because there even strings like 'c' have utf8 flag on.
Hence with quotekeys == 0 the XS code would still '' quote
them based on flags, whereas the perl code would not,
based on regexps.
- The perl code is correct.
- needs_quote() decides that anything that isn't a valid
- perl identifier needs to be quoted, hence only correctly
- formed strings with no characters outside [A-Za-z0-9_:]
- won't need quoting. None of those characters are used in
- the byte encoding of utf8, so anything with utf8
- encoded characters in will need quoting. Hence strings
- with utf8 encoded characters in will end up inside do_utf8
- just like before, but now strings with utf8 flag set but
- only ascii characters will end up in the unquoted section.
-
- There should also be less tests for the (probably currently)
- more common doesn't need quoting case.
- The code is also smaller (22044 vs 22260) because I've been
- able to pull the common logic out to both sides. */
- if (quotekeys || needs_quote(key,keylen)) {
+
+ The old logic checked that the string was a valid
+ perl glob name (foo::bar), which isn't safe under
+ strict, and differs from the perl code which only
+ accepts simple identifiers.
+
+ With the fix for [perl #120384] I chose to make
+ their handling of key quoting compatible between XS
+ and perl.
+ */
+ if (quotekeys || key_needs_quote(key,keylen)) {
if (do_utf8 || useqq) {
STRLEN ocur = SvCUR(retval);
nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
@@ -1094,7 +1112,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
#endif
i = 0; else i -= 4;
}
- if (needs_quote(c,i)) {
+ if (globname_needs_quote(c,i)) {
#ifdef GvNAMEUTF8
if (GvNAMEUTF8(val)) {
sv_grow(retval, SvCUR(retval)+2);
@@ -1188,18 +1206,18 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
}
#endif
- /* the pure perl and XS non-qq outputs have historically been
- * different in this case, but for useqq, let's try to match
- * the pure perl code.
- * see [perl #74798]
- */
- else if (useqq && safe_decimal_number(aTHX_ val)) {
- sv_catsv(retval, val);
- }
else {
integer_came_from_string:
- c = SvPV(val, i);
- if (DO_UTF8(val) || useqq)
+ c = SvPV(val, i);
+ /* the pure perl and XS non-qq outputs have historically been
+ * different in this case, but for useqq, let's try to match
+ * the pure perl code.
+ * see [perl #74798]
+ */
+ if (useqq && safe_decimal_number(c, i)) {
+ sv_catsv(retval, val);
+ }
+ else if (DO_UTF8(val) || useqq)
i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
else {
sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
index 85609f1a0a..4cca53f791 100644
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -899,11 +899,8 @@ TEST q(Data::Dumper->new([$a])->Dumpxs;)
#};
EOT
-# perl code does keys and values as numbers if possible
-TEST q(Data::Dumper->new([$c])->Dump;);
-# XS code always does them as strings
-$WANT =~ s/ (\d+)/ '$1'/gs;
-TEST q(Data::Dumper->new([$c])->Dumpxs;)
+TEST q(Data::Dumper->new([$c])->Dump;), "sortkeys sub";
+TEST q(Data::Dumper->new([$c])->Dumpxs;), "sort keys sub (XS)"
if $XS;
}
@@ -949,9 +946,10 @@ TEST q(Data::Dumper->new([$c])->Dumpxs;)
#];
EOT
-TEST q(Data::Dumper->new([[$c, $d]])->Dump;);
-$WANT =~ s/ (\d+)/ '$1'/gs;
-TEST q(Data::Dumper->new([[$c, $d]])->Dumpxs;)
+TEST q(Data::Dumper->new([[$c, $d]])->Dump;), "more sortkeys sub";
+# the XS code does number values as strings
+$WANT =~ s/ (\d+)(,?)$/ '$1'$2/gm;
+TEST q(Data::Dumper->new([[$c, $d]])->Dumpxs;), "more sortkeys sub (XS)"
if $XS;
}
diff --git a/dist/Data-Dumper/t/quotekeys.t b/dist/Data-Dumper/t/quotekeys.t
index c633d56d64..a85882890b 100644
--- a/dist/Data-Dumper/t/quotekeys.t
+++ b/dist/Data-Dumper/t/quotekeys.t
@@ -15,7 +15,7 @@ BEGIN {
use strict;
use Data::Dumper;
-use Test::More tests => 10;
+use Test::More tests => 18;
use lib qw( ./t/lib );
use Testing qw( _dumptostr );
@@ -90,5 +90,46 @@ sub run_tests_for_quotekeys {
isnt($dumps{'ddqkzero'}, $dumps{'objqkundef'},
"\$Data::Dumper::Quotekeys = undef and = 0 are equivalent");
%dumps = ();
+
+ local $Data::Dumper::Quotekeys = 1;
+ local $Data::Dumper::Sortkeys = 1;
+ local $Data::Dumper::Indent = 0;
+ local $Data::Dumper::Useqq = 0;
+
+ my %qkdata =
+ (
+ 0 => 1,
+ '012345' => 1,
+ 12 => 1,
+ 123456789 => 1,
+ 1234567890 => 1,
+ '::de::fg' => 1,
+ ab => 1,
+ 'hi::12' => 1,
+ "1\x{660}" => 1,
+ );
+
+ is(Dumper(\%qkdata),
+ q($VAR1 = {'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,'ab' => 1,'hi::12' => 1};),
+ "always quote when quotekeys true");
+
+ {
+ local $Data::Dumper::Useqq = 1;
+ is(Dumper(\%qkdata),
+ q($VAR1 = {"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,"ab" => 1,"hi::12" => 1};),
+ "always quote when quotekeys true (useqq)");
+ }
+
+ local $Data::Dumper::Quotekeys = 0;
+
+ is(Dumper(\%qkdata),
+ q($VAR1 = {0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,ab => 1,'hi::12' => 1};),
+ "avoid quotes when quotekeys false");
+ {
+ local $Data::Dumper::Useqq = 1;
+ is(Dumper(\%qkdata),
+ q($VAR1 = {0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,ab => 1,"hi::12" => 1};),
+ "avoid quotes when quotekeys false (useqq)");
+ }
}