Data::Dumper: the XS implementation now supports Deparse

This will provide a significant performance enhancement for callers that use deparsing (including Data::Dumper::Concise). There are no longer any configuration settings or (when run on Perl 5.21.10 or later) platforms that force use of the pure-Perl implementation.
author: Aaron Crane <arc@cpan.org> 2016-07-11 15:25:43 +0100
committer: Aaron Crane <arc@cpan.org> 2016-11-12 12:18:47 +0100
commit: b5048e7b9abae986778038f1e36281c6175b17e1 (patch)
tree: a43b6f0e59d933bd6a28051a7ebc78c62fe8c2b8 /dist/Data-Dumper
parent: 942cf643a5471ed6895acda61d22257363316daf (diff)
download: perl-b5048e7b9abae986778038f1e36281c6175b17e1.tar.gz
4 files changed, 107 insertions, 26 deletions
diff --git a/dist/Data-Dumper/Changes b/dist/Data-Dumper/Changes
index f9ea53fd55..9828fe0e65 100644
--- a/dist/Data-Dumper/Changes
+++ b/dist/Data-Dumper/Changes
@@ -6,6 +6,11 @@ Changes - public release history for Data::Dumper
 
 =over 8
 
+=item NEXT
+
+The XS implementation now handles the C<Deparse> option, so using it no
+longer forces use of the pure-Perl version.
+
 =item 2.161 (Jul 11 2016)
 
 Perl 5.12 fix/workaround until fixed PPPort release.
diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index aa62316c7b..8e3e4f1c34 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -227,7 +227,6 @@ sub DESTROY {}
 sub Dump {
     return &Dumpxs
     unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl})
-        || $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse})
 
             # Use pure perl version on earlier releases on EBCDIC platforms
         || (! $IS_ASCII && $] lt 5.021_010);
@@ -1212,9 +1211,10 @@ $Data::Dumper::Deparse  I<or>  $I<OBJ>->Deparse(I<[NEWVAL]>)
 
 Can be set to a boolean value to control whether code references are
 turned into perl source code. If set to a true value, C<B::Deparse>
-will be used to get the source of the code reference. Using this option
-will force using the Perl implementation of the dumper, since the fast
-XSUB implementation doesn't support it.
+will be used to get the source of the code reference. In older versions,
+using this option imposed a significant performance penalty when dumping
+parts of a data structure other than code references, but that is no
+longer the case.
 
 Caution : use this option only if you know that your coderefs will be
 properly reconstructed by C<B::Deparse>.
@@ -1435,15 +1435,9 @@ the C<Deparse> flag), an anonymous subroutine that
 contains the string '"DUMMY"' will be inserted in its place, and a warning
 will be printed if C<Purity> is set.  You can C<eval> the result, but bear
 in mind that the anonymous sub that gets created is just a placeholder.
-Someday, perl will have a switch to cache-on-demand the string
-representation of a compiled piece of code, I hope.  If you have prior
-knowledge of all the code refs that your data structures are likely
-to have, you can use the C<Seen> method to pre-seed the internal reference
-table and make the dumped output point to them, instead.  See L</EXAMPLES>
-above.
-
-The C<Deparse> flag makes Dump() run slower, since the XSUB
-implementation does not support it.
+Even using the C<Deparse> flag will in some cases produce results that
+behave differently after being passed to C<eval>; see the documentation
+for L<B::Deparse>.
 
 SCALAR objects have the weirdest looking C<bless> workaround.
 
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 5a658318b1..7960ec00d4 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -63,6 +63,7 @@ typedef struct {
     I32 useqq;
     int use_sparse_seen_hash;
     int trailingcomma;
+    int deparse;
 } Style;
 
 static STRLEN num_q (const char *s, STRLEN slen);
@@ -505,6 +506,51 @@ sv_x(pTHX_ SV *sv, const char *str, STRLEN len, I32 n)
     return sv;
 }
 
+static SV *
+deparsed_output(pTHX_ SV *val)
+{
+    SV *text;
+    int n;
+    dSP;
+
+    /* This is passed to load_module(), which decrements its ref count and
+     * modifies it (so we also can't reuse it below) */
+    SV *pkg = newSVpvs("B::Deparse");
+
+    load_module(PERL_LOADMOD_NOIMPORT, pkg, 0);
+
+    SAVETMPS;
+
+    PUSHMARK(SP);
+    mXPUSHs(newSVpvs("B::Deparse"));
+    PUTBACK;
+
+    n = call_method("new", G_SCALAR);
+    SPAGAIN;
+
+    if (n != 1) {
+        croak("B::Deparse->new returned %d items, but expected exactly 1", n);
+    }
+
+    PUSHMARK(SP - n);
+    XPUSHs(val);
+    PUTBACK;
+
+    n = call_method("coderef2text", G_SCALAR);
+    SPAGAIN;
+
+    if (n != 1) {
+        croak("$b_deparse->coderef2text returned %d items, but expected exactly 1", n);
+    }
+
+    text = POPs;
+    SvREFCNT_inc(text);         /* the caller will mortalise this */
+
+    FREETMPS;
+
+    return text;
+}
+
 /*
  * This ought to be split into smaller functions. (it is one long function since
  * it exactly parallels the perl version, which was one long thing for
@@ -1095,9 +1141,41 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	    SvREFCNT_dec(totpad);
 	}
 	else if (realtype == SVt_PVCV) {
-	    sv_catpvs(retval, "sub { \"DUMMY\" }");
-            if (style->purity)
-		warn("Encountered CODE ref, using dummy placeholder");
+            if (style->deparse) {
+                SV *deparsed = sv_2mortal(deparsed_output(aTHX_ val));
+                SV *fullpad = sv_2mortal(newSVsv(style->sep));
+                const char *p;
+                STRLEN plen;
+                I32 i;
+
+                sv_catsv(fullpad, style->pad);
+                sv_catsv(fullpad, apad);
+                for (i = 0; i < level; i++) {
+                    sv_catsv(fullpad, style->xpad);
+                }
+
+                sv_catpvs(retval, "sub ");
+                p = SvPV(deparsed, plen);
+                while (plen > 0) {
+                    const char *nl = (const char *) memchr(p, '\n', plen);
+                    if (!nl) {
+                        sv_catpvn(retval, p, plen);
+                        break;
+                    }
+                    else {
+                        size_t n = nl - p;
+                        sv_catpvn(retval, p, n);
+                        sv_catsv(retval, fullpad);
+                        p += n + 1;
+                        plen -= n + 1;
+                    }
+                }
+            }
+            else {
+                sv_catpvs(retval, "sub { \"DUMMY\" }");
+                if (style->purity)
+                    warn("Encountered CODE ref, using dummy placeholder");
+            }
 	}
 	else {
 	    warn("cannot handle ref type %d", (int)realtype);
@@ -1452,6 +1530,8 @@ Data_Dumper_Dumpxs(href, ...)
                     style.quotekeys = SvTRUE(*svp);
                 if ((svp = hv_fetchs(hv, "trailingcomma", FALSE)))
                     style.trailingcomma = SvTRUE(*svp);
+                if ((svp = hv_fetchs(hv, "deparse", FALSE)))
+                    style.deparse = SvTRUE(*svp);
 		if ((svp = hv_fetchs(hv, "bless", FALSE)))
                     style.bless = *svp;
 		if ((svp = hv_fetchs(hv, "maxdepth", FALSE)))
diff --git a/dist/Data-Dumper/t/deparse.t b/dist/Data-Dumper/t/deparse.t
index c281fcea02..cddde8cb6e 100644
--- a/dist/Data-Dumper/t/deparse.t
+++ b/dist/Data-Dumper/t/deparse.t
@@ -15,7 +15,7 @@ BEGIN {
 use strict;
 
 use Data::Dumper;
-use Test::More tests =>  8;
+use Test::More tests =>  16;
 use lib qw( ./t/lib );
 use Testing qw( _dumptostr );
 
@@ -24,7 +24,9 @@ use Testing qw( _dumptostr );
 
 note("\$Data::Dumper::Deparse and Deparse()");
 
-{
+for my $useperl (0, 1) {
+    local $Data::Dumper::Useperl = $useperl;
+
     my ($obj, %dumps, $deparse, $starting);
     use strict;
     my $struct = { foo => "bar\nbaz", quux => sub { "fleem" } };
@@ -46,11 +48,11 @@ note("\$Data::Dumper::Deparse and Deparse()");
     $dumps{'objzero'} = _dumptostr($obj);
 
     is($dumps{'noprev'}, $dumps{'dddzero'},
-        "No previous setting and \$Data::Dumper::Deparse = 0 are equivalent");
+        "No previous setting and \$Data::Dumper::Deparse = 0 are equivalent (useperl=$useperl)");
     is($dumps{'noprev'}, $dumps{'objempty'},
-        "No previous setting and Deparse() are equivalent");
+        "No previous setting and Deparse() are equivalent (useperl=$useperl)");
     is($dumps{'noprev'}, $dumps{'objzero'},
-        "No previous setting and Deparse(0) are equivalent");
+        "No previous setting and Deparse(0) are equivalent (useperl=$useperl)");
 
     local $Data::Dumper::Deparse = 1;
     $obj = Data::Dumper->new( [ $struct ] );
@@ -62,19 +64,19 @@ note("\$Data::Dumper::Deparse and Deparse()");
     $dumps{'objone'} = _dumptostr($obj);
 
     is($dumps{'dddtrue'}, $dumps{'objone'},
-        "\$Data::Dumper::Deparse = 1 and Deparse(1) are equivalent");
+        "\$Data::Dumper::Deparse = 1 and Deparse(1) are equivalent (useperl=$useperl)");
 
     isnt($dumps{'dddzero'}, $dumps{'dddtrue'},
-        "\$Data::Dumper::Deparse = 0 differs from \$Data::Dumper::Deparse = 1");
+        "\$Data::Dumper::Deparse = 0 differs from \$Data::Dumper::Deparse = 1 (useperl=$useperl)");
 
     like($dumps{'dddzero'},
         qr/quux.*?sub.*?DUMMY/s,
-        "\$Data::Dumper::Deparse = 0 reports DUMMY instead of deparsing coderef");
+        "\$Data::Dumper::Deparse = 0 reports DUMMY instead of deparsing coderef (useperl=$useperl)");
     unlike($dumps{'dddtrue'},
         qr/quux.*?sub.*?DUMMY/s,
-        "\$Data::Dumper::Deparse = 1 does not report DUMMY");
+        "\$Data::Dumper::Deparse = 1 does not report DUMMY (useperl=$useperl)");
     like($dumps{'dddtrue'},
         qr/quux.*?sub.*?use\sstrict.*?fleem/s,
-        "\$Data::Dumper::Deparse = 1 deparses coderef");
+        "\$Data::Dumper::Deparse = 1 deparses coderef (useperl=$useperl)");
 }
author	Aaron Crane <arc@cpan.org>	2016-07-11 15:25:43 +0100
committer	Aaron Crane <arc@cpan.org>	2016-11-12 12:18:47 +0100
commit	b5048e7b9abae986778038f1e36281c6175b17e1 (patch)
tree	a43b6f0e59d933bd6a28051a7ebc78c62fe8c2b8 /dist/Data-Dumper
parent	942cf643a5471ed6895acda61d22257363316daf (diff)
download	perl-b5048e7b9abae986778038f1e36281c6175b17e1.tar.gz