From f9a6324217cffea75ff769ddd313748c0613a128 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Sun, 28 Jan 2001 19:28:40 +0000 Subject: Patch from Inaba Hiroto: - canonical UTF-8 hash keys: if a key string for a hash is UTF8-on, try downgrade the string and use it if unicode::distinct is not in effect. For the task, I added a function bytes_from_utf8() to utf8.c. It might resemble utf8_to_bytes() but it is not convenient to the task. Made a test for it and added to t/op/each.t - Changed do_print in doio.c to apply sv_utf8_(downgrade|upgrade) to the mortal copy of the argument SV. And changed t/io/utf8.t test 18 which expects print() to upgrade its argument. - re-implement sv_eq with bytes_from_utf8() - some bug fixes - tr/// does not handle UTF8 range (\x{}-\x{}) - \ before raw UTF8 character produced "Malformed UTF-8 character" warning. - "\x{100}\N{CENT SIGN}" is Malformed. Added tests for these 3. - and one silly bug (by me) with qu operator. p4raw-id: //depot/perl@8583 --- doio.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'doio.c') diff --git a/doio.c b/doio.c index 6056ea704c..a1d0e466ea 100644 --- a/doio.c +++ b/doio.c @@ -1169,13 +1169,12 @@ Perl_do_print(pTHX_ register SV *sv, PerlIO *fp) /* FALL THROUGH */ default: if (PerlIO_isutf8(fp)) { - tmps = SvPVutf8(sv, len); - } - else { - if (DO_UTF8(sv)) - sv_utf8_downgrade(sv, FALSE); - tmps = SvPV(sv, len); + if (!SvUTF8(sv)) + sv_utf8_upgrade(sv = sv_mortalcopy(sv)); } + else if (DO_UTF8(sv)) + sv_utf8_downgrade((sv = sv_mortalcopy(sv)), FALSE); + tmps = SvPV(sv, len); break; } /* To detect whether the process is about to overstep its -- cgit v1.2.1