summaryrefslogtreecommitdiff
path: root/ext/PerlIO-encoding
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2012-10-04 21:56:00 -0700
committerFather Chrysostomos <sprout@cpan.org>2012-10-05 06:13:09 -0700
commit667763bdbf37a30596512ca0a08a720d86c7e2a8 (patch)
tree75605631198968fa4d4aa1518a67b9e044214ccb /ext/PerlIO-encoding
parent0c38a575805726e13941c02d1cdf5b6b5c4ded11 (diff)
downloadperl-667763bdbf37a30596512ca0a08a720d86c7e2a8.tar.gz
Make PerlIO::encoding more resilient to buffer changes
I was trying to figure out why Encode’s perlio.t was sometimes failing under PERL_OLD_COPY_ON_WRITE (depending on the number of comments in the source code, or metereological conditions). I noticed that PerlIO::encoding assumes that the buffer passed to the encode method will come back SvPOKp. (It accesses SvCUR without checking any flags.) That means it can come back as a typeglob, reference, or undefined, and PerlIO::encoding won’t care. This can result in crashes. Assign- ing $_[1] = *foo inside an encode method is not a smart thing to do, but it shouldn’t crash. PerlIO::encoding was also assuming that SvPVX would not change between calls to encode. It is very easy to reallocate it. This means the internal buffer used by the encoding layer (which is owned by the SV buffer passed to the encode method) can be freed and still subse- quently written too, which is not good. This commit makes PerlIO::encoding force stringification of the value returned. If it does not match its internal buffer pointers, it resets them based on the buffer SV. This probably makes Encode pass its tests under PERL_OLD_COPY_ON_WRITE, but I have yet to confirm it. Encoding mod- ules are expected to write to the buffer ($_[1] = '') in certain cases. If COW is enabled, that would cause the buffer’s SvPVX to point to the same string as the rhs, which would explain why the lack of accounting for SvPVX changes caused test failures under PERL_OLD_COPY_ON_WRITE.
Diffstat (limited to 'ext/PerlIO-encoding')
-rw-r--r--ext/PerlIO-encoding/encoding.xs5
-rw-r--r--ext/PerlIO-encoding/t/encoding.t66
2 files changed, 70 insertions, 1 deletions
diff --git a/ext/PerlIO-encoding/encoding.xs b/ext/PerlIO-encoding/encoding.xs
index 98d89e9ef4..3f27dec740 100644
--- a/ext/PerlIO-encoding/encoding.xs
+++ b/ext/PerlIO-encoding/encoding.xs
@@ -365,6 +365,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
/* Adjust ptr/cnt not taking anything which
did not translate - not clear this is a win */
/* compute amount we took */
+ if (!SvPOKp(e->dataSV)) (void)SvPV_force_nolen(e->dataSV);
use -= SvCUR(e->dataSV);
PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
/* and as we did not take it it isn't pending */
@@ -440,6 +441,10 @@ PerlIOEncode_flush(pTHX_ PerlIO * f)
if (PerlIO_flush(PerlIONext(f)) != 0) {
code = -1;
}
+ if (!SvPOKp(e->bufsv) || SvTHINKFIRST(e->bufsv))
+ (void)SvPV_force_nolen(e->bufsv);
+ if ((STDCHAR *)SvPVX(e->bufsv) != e->base.buf)
+ e->base.buf = (STDCHAR *)SvPVX(e->bufsv);
if (SvCUR(e->bufsv)) {
/* Did not all translate */
e->base.ptr = e->base.buf+SvCUR(e->bufsv);
diff --git a/ext/PerlIO-encoding/t/encoding.t b/ext/PerlIO-encoding/t/encoding.t
index 4642bd8e8c..71ba493f67 100644
--- a/ext/PerlIO-encoding/t/encoding.t
+++ b/ext/PerlIO-encoding/t/encoding.t
@@ -11,7 +11,7 @@ BEGIN {
}
}
-use Test::More tests => 18;
+use Test::More tests => 22;
my $grk = "grk$$";
my $utf = "utf$$";
@@ -124,6 +124,70 @@ if (ord('A') == 193) { # EBCDIC
is($dstr, "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n");
}
+# Check that PerlIO::encoding can handle custom encodings that do funny
+# things with the buffer.
+use Encode::Encoding;
+package Extensive {
+ @ISA = Encode::Encoding;
+ __PACKAGE__->Define('extensive');
+ sub encode($$;$) {
+ my ($self,$buf,$chk) = @_;
+ my $leftovers = '';
+ if ($buf =~ /(.*\n)(?!\z)/) {
+ $buf = $1;
+ $leftovers = $';
+ }
+ if ($chk) {
+ my $x = ' ' x 8000; # prevent realloc from simply extending the buffer
+ $_[1] = ' ' x 8000; # make SvPVX point elsewhere
+ $_[1] = $leftovers;
+ }
+ $buf;
+ }
+ no warnings 'once';
+ *decode = *encode;
+}
+open my $fh, ">:encoding(extensive)", \$buf;
+$fh->autoflush;
+print $fh "doughnut\n";
+print $fh "quaffee\n";
+close $fh;
+is $buf, "doughnut\nquaffee\n", 'buffer realloc during encoding';
+$buf = "Sheila surely shod Sean\nin shoddy shoes.\n";
+open $fh, "<:encoding(extensive)", \$buf;
+is join("", <$fh>), "Sheila surely shod Sean\nin shoddy shoes.\n",
+ 'buffer realloc during decoding';
+
+package Globber {
+ no warnings 'once';
+ @ISA = Encode::Encoding;
+ __PACKAGE__->Define('globber');
+ sub encode($$;$) {
+ my ($self,$buf,$chk) = @_;
+ $_[1] = *foo if $chk;
+ $buf;
+ }
+ *decode = *encode;
+}
+
+# Here we just want to test there is no crash. The actual output is not so
+# important.
+# We need a double eval, as scope unwinding will close the handle,
+# which croaks.
+eval { eval {
+ open my $fh, ">:encoding(globber)", \$buf;
+ print $fh "Agathopous Goodfoot\n";
+ close $fh;
+}; $e = $@};
+like $@||$e, qr/Close with partial character/,
+ 'no crash when assigning glob to buffer in encode';
+$buf = "To hymn him who heard her herd herd\n";
+open $fh, "<:encoding(globber)", \$buf;
+my $x = <$fh>;
+close $fh;
+is $x, "To hymn him who heard her herd herd\n",
+ 'no crash when assigning glob to buffer in decode';
+
END {
1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
}