diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-28 19:28:40 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-28 19:28:40 +0000 |
commit | f9a6324217cffea75ff769ddd313748c0613a128 (patch) | |
tree | 9fb5b4ade5877ba969d093cfe37ec605de62d8dc /sv.c | |
parent | 9ee2bb1a7c54b1866ff07ab9c157254810ee5205 (diff) | |
download | perl-f9a6324217cffea75ff769ddd313748c0613a128.tar.gz |
Patch from Inaba Hiroto:
- canonical UTF-8 hash keys: if a key string for a hash is
UTF8-on, try downgrade the string and use it if
unicode::distinct is not in effect.
For the task, I added a function bytes_from_utf8() to utf8.c.
It might resemble utf8_to_bytes() but it is not convenient
to the task.
Made a test for it and added to t/op/each.t
- Changed do_print in doio.c to apply sv_utf8_(downgrade|upgrade) to
the mortal copy of the argument SV.
And changed t/io/utf8.t test 18 which expects print() to
upgrade its argument.
- re-implement sv_eq with bytes_from_utf8()
- some bug fixes
- tr/// does not handle UTF8 range (\x{}-\x{})
- \ before raw UTF8 character produced
"Malformed UTF-8 character" warning.
- "\x{100}\N{CENT SIGN}" is Malformed.
Added tests for these 3.
- and one silly bug (by me) with qu operator.
p4raw-id: //depot/perl@8583
Diffstat (limited to 'sv.c')
-rw-r--r-- | sv.c | 32 |
1 files changed, 14 insertions, 18 deletions
@@ -4690,30 +4690,24 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) /* do not utf8ize the comparands as a side-effect */ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + bool is_utf8 = TRUE; + if (PL_hints & HINT_UTF8_DISTINCT) return FALSE; if (SvUTF8(sv1)) { - (void)utf8_to_bytes((U8*)(pv1 = savepvn(pv1, cur1)), &cur1); - { - IV scur1 = cur1; - if (scur1 < 0) { - Safefree(pv1); - return 0; - } - } - pv1tmp = TRUE; + char *pv = bytes_from_utf8((U8*)pv1, &cur1, &is_utf8); + if (is_utf8) + return 0; + pv1tmp = (pv != pv1); + pv1 = pv; } else { - (void)utf8_to_bytes((U8*)(pv2 = savepvn(pv2, cur2)), &cur2); - { - IV scur2 = cur2; - if (scur2 < 0) { - Safefree(pv2); - return 0; - } - } - pv2tmp = TRUE; + char *pv = bytes_from_utf8((U8*)pv2, &cur2, &is_utf8); + if (is_utf8) + return 0; + pv2tmp = (pv != pv2); + pv2 = pv; } } @@ -5601,6 +5595,8 @@ Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash) len = -len; is_utf8 = TRUE; } + if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT)) + src = (char*)bytes_from_utf8((U8*)src, (STRLEN*)&len, &is_utf8); if (!hash) PERL_HASH(hash, src, len); new_SV(sv); |