summaryrefslogtreecommitdiff
path: root/pp_sys.c
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2004-12-29 19:00:12 +0000
committerNicholas Clark <nick@ccl4.org>2004-12-29 19:00:12 +0000
commit1dd30107ebac37cf465e5225a00d367454a7cb84 (patch)
tree53804068371bd6087eacc4e0b78f1c9a46d82be6 /pp_sys.c
parent69938bbac29d5bcb76b80f6eccb27c5ff84cee37 (diff)
downloadperl-1dd30107ebac37cf465e5225a00d367454a7cb84.tar.gz
read (and presuambly sysread) would expose the UTF8 internals when
reading from a byte orientated file handle into a UTF8 scalar. p4raw-id: //depot/perl@23703
Diffstat (limited to 'pp_sys.c')
-rw-r--r--pp_sys.c33
1 files changed, 30 insertions, 3 deletions
diff --git a/pp_sys.c b/pp_sys.c
index 3071f1bfea..edb69c29af 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -1555,6 +1555,8 @@ PP(pp_sysread)
STRLEN blen;
MAGIC *mg;
int fp_utf8;
+ int buffer_utf8;
+ SV *read_target;
Size_t got = 0;
Size_t wanted;
bool charstart = FALSE;
@@ -1605,6 +1607,7 @@ PP(pp_sysread)
}
else {
buffer = SvPV_force(bufsv, blen);
+ buffer_utf8 = !IN_BYTES && SvUTF8(bufsv);
}
if (length < 0)
DIE(aTHX_ "Negative length");
@@ -1672,11 +1675,30 @@ PP(pp_sysread)
}
more_bytes:
bufsize = SvCUR(bufsv);
+ /* Allocating length + offset + 1 isn't perfect in the case of reading
+ bytes from a byte file handle into a UTF8 buffer, but it won't harm us
+ unduly.
+ (should be 2 * length + offset + 1, or possibly something longer if
+ PL_encoding is true) */
buffer = SvGROW(bufsv, (STRLEN)(length+offset+1));
if (offset > bufsize) { /* Zero any newly allocated space */
Zero(buffer+bufsize, offset-bufsize, char);
}
buffer = buffer + offset;
+ if (!buffer_utf8) {
+ read_target = bufsv;
+ } else {
+ /* Best to read the bytes into a new SV, upgrade that to UTF8, then
+ concatenate it to the current buffer. */
+
+ /* Truncate the existing buffer to the start of where we will be
+ reading to: */
+ SvCUR_set(bufsv, offset);
+
+ read_target = sv_newmortal();
+ SvUPGRADE(read_target, SVt_PV);
+ buffer = SvGROW(read_target, length + 1);
+ }
if (PL_op->op_type == OP_SYSREAD) {
#ifdef PERL_SOCK_SYSREAD_IS_RECV
@@ -1716,9 +1738,9 @@ PP(pp_sysread)
report_evil_fh(gv, io, OP_phoney_OUTPUT_ONLY);
goto say_undef;
}
- SvCUR_set(bufsv, count+(buffer - SvPVX(bufsv)));
- *SvEND(bufsv) = '\0';
- (void)SvPOK_only(bufsv);
+ SvCUR_set(read_target, count+(buffer - SvPVX(read_target)));
+ *SvEND(read_target) = '\0';
+ (void)SvPOK_only(read_target);
if (fp_utf8 && !IN_BYTES) {
/* Look at utf8 we got back and count the characters */
char *bend = buffer + count;
@@ -1754,6 +1776,11 @@ PP(pp_sysread)
count = got;
SvUTF8_on(bufsv);
}
+ else if (buffer_utf8) {
+ /* Let svcatsv upgrade the bytes we read in to utf8.
+ The buffer is a mortal so will be freed soon. */
+ sv_catsv_nomg(bufsv, read_target);
+ }
SvSETMAGIC(bufsv);
/* This should not be marked tainted if the fp is marked clean */
if (!(IoFLAGS(io) & IOf_UNTAINT))