diff options
-rw-r--r-- | sv.c | 21 | ||||
-rw-r--r-- | t/io/utf8.t | 8 |
2 files changed, 24 insertions, 5 deletions
@@ -7720,13 +7720,26 @@ S_sv_gets_read_record(pTHX_ SV *const sv, PerlIO *const fp, I32 append) } if (charcount < recsize) { - /* read the rest of the current character, and maybe the - beginning of the next, if we need it */ - STRLEN readsize = (charstart ? 0 : skip - (bend - bufp)) - + (charcount + 1 < recsize); + STRLEN readsize; STRLEN bufp_offset = bufp - buffer; SSize_t morebytesread; + /* originally I read enough to fill any incomplete + character and the first byte of the next + character if needed, but if there's many + multi-byte encoded characters we're going to be + making a read call for every character beyond + the original read size. + + So instead, read the rest of the character if + any, and enough bytes to match at least the + start bytes for each character we're going to + read. + */ + if (charstart) + readsize = recsize - charcount; + else + readsize = skip - (bend - bufp) + recsize - charcount - 1; buffer = SvGROW(sv, append + bytesread + readsize + 1) + append; bend = buffer + bytesread; morebytesread = PerlIO_read(fp, bend, readsize); diff --git a/t/io/utf8.t b/t/io/utf8.t index 919b7342c2..29beba2020 100644 --- a/t/io/utf8.t +++ b/t/io/utf8.t @@ -10,7 +10,7 @@ BEGIN { no utf8; # needed for use utf8 not griping about the raw octets -plan(tests => 59); +plan(tests => 61); $| = 1; @@ -354,6 +354,8 @@ is($failed, undef); open F, ">:utf8", $a_file; print F "foo\xE4"; print F "bar\xFE"; + print F "\xC0\xC8\xCC\xD2"; + print F "a\xE4ab"; print F "a\xE4a"; close F; open F, "<:utf8", $a_file; @@ -363,6 +365,10 @@ is($failed, undef); $line .= <F>; is($line, "foo\xE4bar\xFE", "rcatline with \$/ = \\4"); $line = <F>; + is($line, "\xC0\xC8\xCC\xD2", "readline with several encoded characters"); + $line = <F>; + is($line, "a\xE4ab", "readline with another boundary condition"); + $line = <F>; is($line, "a\xE4a", "readline with boundary condition"); close F; |