diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-12-10 02:52:46 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-12-10 02:52:46 +0000 |
commit | 33654bcb08b10f3c01b7921732dd84d16906e0e6 (patch) | |
tree | c38c0418d5b093d0a6053a05af6b8aa2683fd20b | |
parent | c184d1a61a4639abab164d4f2ad4e87051c3aa77 (diff) | |
parent | eb5c063ad00efba84ec136b0b86cfe12eb4ab14c (diff) | |
download | perl-33654bcb08b10f3c01b7921732dd84d16906e0e6.tar.gz |
Integrate perlio:
[ 8059]
read/sysread/recv should now be utf8 aware.
Basic test for utf8 read.
p4raw-link: @8059 on //depot/perlio: eb5c063ad00efba84ec136b0b86cfe12eb4ab14c
p4raw-id: //depot/perl@8060
-rw-r--r-- | pp_sys.c | 84 | ||||
-rwxr-xr-x | t/io/utf8.t | 9 |
2 files changed, 72 insertions, 21 deletions
@@ -1474,10 +1474,14 @@ PP(pp_sysread) IO *io; char *buffer; SSize_t length; + SSize_t count; Sock_size_t bufsize; SV *bufsv; STRLEN blen; MAGIC *mg; + int fp_utf8; + Size_t got = 0; + Size_t wanted; gv = (GV*)*++MARK; if ((PL_op->op_type == OP_READ || PL_op->op_type == OP_SYSREAD) && @@ -1511,14 +1515,17 @@ PP(pp_sysread) io = GvIO(gv); if (!io || !IoIFP(io)) goto say_undef; - if (PerlIO_isutf8(IoIFP(io))) { + if ((fp_utf8 = PerlIO_isutf8(IoIFP(io))) && !IN_BYTE) { buffer = SvPVutf8_force(bufsv, blen); + /* UTF8 may not have been set if they are all low bytes */ + SvUTF8_on(bufsv); } else { buffer = SvPV_force(bufsv, blen); } if (length < 0) DIE(aTHX_ "Negative length"); + wanted = length; #ifdef HAS_SOCKET if (PL_op->op_type == OP_RECV) { @@ -1534,13 +1541,15 @@ PP(pp_sysread) #endif buffer = SvGROW(bufsv, length+1); /* 'offset' means 'flags' here */ - length = PerlSock_recvfrom(PerlIO_fileno(IoIFP(io)), buffer, length, offset, + count = PerlSock_recvfrom(PerlIO_fileno(IoIFP(io)), buffer, length, offset, (struct sockaddr *)namebuf, &bufsize); - if (length < 0) + if (count < 0) RETPUSHUNDEF; - SvCUR_set(bufsv, length); + SvCUR_set(bufsv, count); *SvEND(bufsv) = '\0'; (void)SvPOK_only(bufsv); + if (fp_utf8) + SvUTF8_on(bufsv); SvSETMAGIC(bufsv); /* This should not be marked tainted if the fp is marked clean */ if (!(IoFLAGS(io) & IOf_UNTAINT)) @@ -1554,31 +1563,38 @@ PP(pp_sysread) if (PL_op->op_type == OP_RECV) DIE(aTHX_ PL_no_sock_func, "recv"); #endif - if (SvUTF8(bufsv) && offset) { - /* FIXME ! */ - Perl_croak(aTHX_ "Non zero offset not supported yet for utf8"); + if (DO_UTF8(bufsv)) { + /* offset adjust in characters not bytes */ + blen = sv_len_utf8(bufsv); } if (offset < 0) { if (-offset > blen) DIE(aTHX_ "Offset outside string"); offset += blen; } + if (DO_UTF8(bufsv)) { + /* convert offset-as-chars to offset-as-bytes */ + offset = utf8_hop((U8 *)buffer,offset) - (U8 *) buffer; + } + more_bytes: bufsize = SvCUR(bufsv); - buffer = SvGROW(bufsv, length+offset+1); + buffer = SvGROW(bufsv, length+offset+1); if (offset > bufsize) { /* Zero any newly allocated space */ Zero(buffer+bufsize, offset-bufsize, char); } + buffer = buffer + offset; + if (PL_op->op_type == OP_SYSREAD) { #ifdef PERL_SOCK_SYSREAD_IS_RECV if (IoTYPE(io) == IoTYPE_SOCKET) { - length = PerlSock_recv(PerlIO_fileno(IoIFP(io)), - buffer+offset, length, 0); + count = PerlSock_recv(PerlIO_fileno(IoIFP(io)), + buffer, length, 0); } else #endif { - length = PerlLIO_read(PerlIO_fileno(IoIFP(io)), - buffer+offset, length); + count = PerlLIO_read(PerlIO_fileno(IoIFP(io)), + buffer, length); } } else @@ -1590,18 +1606,18 @@ PP(pp_sysread) #else bufsize = sizeof namebuf; #endif - length = PerlSock_recvfrom(PerlIO_fileno(IoIFP(io)), buffer+offset, length, 0, + count = PerlSock_recvfrom(PerlIO_fileno(IoIFP(io)), buffer, length, 0, (struct sockaddr *)namebuf, &bufsize); } else #endif { - length = PerlIO_read(IoIFP(io), buffer+offset, length); - /* fread() returns 0 on both error and EOF */ - if (length == 0 && PerlIO_error(IoIFP(io))) - length = -1; + count = PerlIO_read(IoIFP(io), buffer, length); + /* PerlIO_read() - like fread() returns 0 on both error and EOF */ + if (count == 0 && PerlIO_error(IoIFP(io))) + count = -1; } - if (length < 0) { + if (count < 0) { if ((IoTYPE(io) == IoTYPE_WRONLY || IoIFP(io) == PerlIO_stdout() || IoIFP(io) == PerlIO_stderr()) && ckWARN(WARN_IO)) { @@ -1621,15 +1637,43 @@ PP(pp_sysread) } goto say_undef; } - SvCUR_set(bufsv, length+offset); + SvCUR_set(bufsv, count+(buffer - SvPVX(bufsv))); *SvEND(bufsv) = '\0'; (void)SvPOK_only(bufsv); + if (fp_utf8 && !IN_BYTE) { + /* Look at utf8 we got back and count the characters */ + char *bend = buffer + count; + while (buffer < bend) { + STRLEN skip = UTF8SKIP(buffer); + if (buffer+skip > bend) { + /* partial character - try for rest of it */ + length = skip - (bend-buffer); + offset = bend - SvPVX(bufsv); + goto more_bytes; + } + else { + got++; + buffer += skip; + } + } + /* If we have not 'got' the number of _characters_ we 'wanted' get some more + provided amount read (count) was what was requested (length) + */ + if (got < wanted && count == length) { + length = (wanted-got); + offset = bend - SvPVX(bufsv); + goto more_bytes; + } + /* return value is character count */ + count = got; + SvUTF8_on(bufsv); + } SvSETMAGIC(bufsv); /* This should not be marked tainted if the fp is marked clean */ if (!(IoFLAGS(io) & IOf_UNTAINT)) SvTAINTED_on(bufsv); SP = ORIGMARK; - PUSHi(length); + PUSHi(count); RETURN; say_undef: diff --git a/t/io/utf8.t b/t/io/utf8.t index 1e47c33d3f..f4be69d3a0 100755 --- a/t/io/utf8.t +++ b/t/io/utf8.t @@ -11,7 +11,7 @@ BEGIN { } $| = 1; -print "1..11\n"; +print "1..13\n"; open(F,"+>:utf8",'a'); print F chr(0x100).'£'; @@ -45,6 +45,13 @@ seek(F,0,0); binmode(F,":utf8"); print "not " unless scalar(<F>) eq "\x{100}£\n"; print "ok 11\n"; +seek(F,0,0); +$buf = chr(0x200); +$count = read(F,$buf,2,1); +print "not " unless $count == 2; +print "ok 12\n"; +print "not " unless $buf eq "\x{200}\x{100}£"; +print "ok 13\n"; close(F); # unlink('a'); |