summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2002-05-04 13:38:58 +0100
committerJarkko Hietaniemi <jhi@iki.fi>2002-05-04 15:47:49 +0000
commitfdce9ba9224e8263957ca62d8449b3b8e13606ea (patch)
treee85a7eb531b0a34db024aa2bb13adee4d32792cd
parentd075f8ed0c85617a33177fa8812167b4177c2522 (diff)
downloadperl-fdce9ba9224e8263957ca62d8449b3b8e13606ea.tar.gz
Re: [PATCH] Re: Data::Dumper tests with -Mutf8
Message-ID: <20020504113857.GC317@Bagpuss.unfortu.net> p4raw-id: //depot/perl@16390
-rw-r--r--ext/Data/Dumper/Dumper.xs75
-rwxr-xr-xext/Data/Dumper/t/dumper.t23
2 files changed, 67 insertions, 31 deletions
diff --git a/ext/Data/Dumper/Dumper.xs b/ext/Data/Dumper/Dumper.xs
index 0e12cbf513..11a97d2465 100644
--- a/ext/Data/Dumper/Dumper.xs
+++ b/ext/Data/Dumper/Dumper.xs
@@ -574,10 +574,12 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
/* foreach (keys %hash) */
for (i = 0; 1; i++) {
- char *nkey = NULL;
+ char *nkey;
+ char *nkey_buffer = NULL;
I32 nticks = 0;
SV* keysv;
STRLEN keylen;
+ I32 nlen;
bool do_utf8 = FALSE;
if ((sortkeys && !(keys && (I32)i <= av_len(keys))) ||
@@ -605,23 +607,39 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
key = SvPV(keysv, keylen);
klen = keylen;
- if (do_utf8) {
- STRLEN ocur;
- I32 nlen;
+ sv_catsv(retval, totpad);
+ sv_catsv(retval, ipad);
+ /* old logic was first to check utf8 flag, and if utf8 always
+ call esc_q_utf8. This caused test to break under -Mutf8,
+ because there even strings like 'c' have utf8 flag on.
+ Hence with quotekeys == 0 the XS code would still '' quote
+ them based on flags, whereas the perl code would not,
+ based on regexps.
+ The perl code is correct.
+ needs_quote() decides that anything that isn't a valid
+ perl identifier needs to be quoted, hence only correctly
+ formed strings with no characters outside [A-Za-z0-9_:]
+ won't need quoting. None of those characters are used in
+ the byte encoding of utf8, so anything with utf8
+ encoded characters in will need quoting. Hence strings
+ with utf8 encoded characters in will end up inside do_utf8
+ just like before, but now strings with utf8 flag set but
+ only ascii characters will end up in the unquoted section.
- sv_catsv(retval, totpad);
- sv_catsv(retval, ipad);
- ocur = SvCUR(retval);
- nlen = esc_q_utf8(aTHX_ retval, key, klen);
-
- sname = newSVsv(iname);
- sv_catpvn(sname, SvPVX(retval) + ocur, nlen);
- sv_catpvn(sname, "}", 1);
- }
- else {
- if (quotekeys || needs_quote(key)) {
+ There should also be less tests for the (probably currently)
+ more common doesn't need quoting case.
+ The code is also smaller (22044 vs 22260) because I've been
+ able to pull the comon logic out to both sides. */
+ if (quotekeys || needs_quote(key)) {
+ if (do_utf8) {
+ STRLEN ocur = SvCUR(retval);
+ nlen = esc_q_utf8(aTHX_ retval, key, klen);
+ nkey = SvPVX(retval) + ocur;
+ }
+ else {
nticks = num_q(key, klen);
- New(0, nkey, klen+nticks+3, char);
+ New(0, nkey_buffer, klen+nticks+3, char);
+ nkey = nkey_buffer;
nkey[0] = '\'';
if (nticks)
klen += esc_q(nkey+1, key, klen);
@@ -629,20 +647,19 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
(void)Copy(key, nkey+1, klen, char);
nkey[++klen] = '\'';
nkey[++klen] = '\0';
+ nlen = klen;
+ sv_catpvn(retval, nkey, klen);
}
- else {
- New(0, nkey, klen, char);
- (void)Copy(key, nkey, klen, char);
- }
-
- sname = newSVsv(iname);
- sv_catpvn(sname, nkey, klen);
- sv_catpvn(sname, "}", 1);
-
- sv_catsv(retval, totpad);
- sv_catsv(retval, ipad);
- sv_catpvn(retval, nkey, klen);
+ }
+ else {
+ nkey = key;
+ nlen = klen;
+ sv_catpvn(retval, nkey, klen);
}
+ sname = newSVsv(iname);
+ sv_catpvn(sname, nkey, nlen);
+ sv_catpvn(sname, "}", 1);
+
sv_catpvn(retval, " => ", 4);
if (indent >= 2) {
char *extra;
@@ -663,7 +680,7 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys);
SvREFCNT_dec(sname);
- Safefree(nkey);
+ Safefree(nkey_buffer);
if (indent >= 2)
SvREFCNT_dec(newapad);
}
diff --git a/ext/Data/Dumper/t/dumper.t b/ext/Data/Dumper/t/dumper.t
index 2955a7f0fa..9c7f0a6870 100755
--- a/ext/Data/Dumper/t/dumper.t
+++ b/ext/Data/Dumper/t/dumper.t
@@ -67,11 +67,11 @@ sub TEST {
$Data::Dumper::Useperl = 1;
if (defined &Data::Dumper::Dumpxs) {
print "### XS extension loaded, will run XS tests\n";
- $TMAX = 351; $XS = 1;
+ $TMAX = 357; $XS = 1;
}
else {
print "### XS extensions not loaded, will NOT run XS tests\n";
- $TMAX = 177; $XS = 0;
+ $TMAX = 180; $XS = 0;
}
print "1..$TMAX\n";
@@ -1334,3 +1334,22 @@ EOT
TEST q(Data::Dumper->Dumpxs([\\*ping, \\%ping], ['*ping', '*pong'])) if $XS;
}
}
+
+# XS for quotekeys==0 was not being defensive enough against utf8 flagged
+# scalars
+
+{
+ $WANT = <<'EOT';
+#$VAR1 = {
+# perl => 'rocks'
+#};
+EOT
+ local $Data::Dumper::Quotekeys = 0;
+ my $k = 'perl' . chr 256;
+ chop $k;
+ %foo = ($k => 'rocks');
+
+ TEST q(Data::Dumper->Dump([\\%foo])), "quotekeys == 0 for utf8 flagged ASCII";
+ TEST q(Data::Dumper->Dumpxs([\\%foo])),
+ "XS quotekeys == 0 for utf8 flagged ASCII" if $XS;
+}