summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sv.c21
-rw-r--r--t/io/utf8.t8
2 files changed, 24 insertions, 5 deletions
diff --git a/sv.c b/sv.c
index e2bed05753..34dc534503 100644
--- a/sv.c
+++ b/sv.c
@@ -7720,13 +7720,26 @@ S_sv_gets_read_record(pTHX_ SV *const sv, PerlIO *const fp, I32 append)
}
if (charcount < recsize) {
- /* read the rest of the current character, and maybe the
- beginning of the next, if we need it */
- STRLEN readsize = (charstart ? 0 : skip - (bend - bufp))
- + (charcount + 1 < recsize);
+ STRLEN readsize;
STRLEN bufp_offset = bufp - buffer;
SSize_t morebytesread;
+ /* originally I read enough to fill any incomplete
+ character and the first byte of the next
+ character if needed, but if there's many
+ multi-byte encoded characters we're going to be
+ making a read call for every character beyond
+ the original read size.
+
+ So instead, read the rest of the character if
+ any, and enough bytes to match at least the
+ start bytes for each character we're going to
+ read.
+ */
+ if (charstart)
+ readsize = recsize - charcount;
+ else
+ readsize = skip - (bend - bufp) + recsize - charcount - 1;
buffer = SvGROW(sv, append + bytesread + readsize + 1) + append;
bend = buffer + bytesread;
morebytesread = PerlIO_read(fp, bend, readsize);
diff --git a/t/io/utf8.t b/t/io/utf8.t
index 919b7342c2..29beba2020 100644
--- a/t/io/utf8.t
+++ b/t/io/utf8.t
@@ -10,7 +10,7 @@ BEGIN {
no utf8; # needed for use utf8 not griping about the raw octets
-plan(tests => 59);
+plan(tests => 61);
$| = 1;
@@ -354,6 +354,8 @@ is($failed, undef);
open F, ">:utf8", $a_file;
print F "foo\xE4";
print F "bar\xFE";
+ print F "\xC0\xC8\xCC\xD2";
+ print F "a\xE4ab";
print F "a\xE4a";
close F;
open F, "<:utf8", $a_file;
@@ -363,6 +365,10 @@ is($failed, undef);
$line .= <F>;
is($line, "foo\xE4bar\xFE", "rcatline with \$/ = \\4");
$line = <F>;
+ is($line, "\xC0\xC8\xCC\xD2", "readline with several encoded characters");
+ $line = <F>;
+ is($line, "a\xE4ab", "readline with another boundary condition");
+ $line = <F>;
is($line, "a\xE4a", "readline with boundary condition");
close F;