summaryrefslogtreecommitdiff
path: root/sv.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-01-28 19:28:40 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-01-28 19:28:40 +0000
commitf9a6324217cffea75ff769ddd313748c0613a128 (patch)
tree9fb5b4ade5877ba969d093cfe37ec605de62d8dc /sv.c
parent9ee2bb1a7c54b1866ff07ab9c157254810ee5205 (diff)
downloadperl-f9a6324217cffea75ff769ddd313748c0613a128.tar.gz
Patch from Inaba Hiroto:
- canonical UTF-8 hash keys: if a key string for a hash is UTF8-on, try downgrade the string and use it if unicode::distinct is not in effect. For the task, I added a function bytes_from_utf8() to utf8.c. It might resemble utf8_to_bytes() but it is not convenient to the task. Made a test for it and added to t/op/each.t - Changed do_print in doio.c to apply sv_utf8_(downgrade|upgrade) to the mortal copy of the argument SV. And changed t/io/utf8.t test 18 which expects print() to upgrade its argument. - re-implement sv_eq with bytes_from_utf8() - some bug fixes - tr/// does not handle UTF8 range (\x{}-\x{}) - \ before raw UTF8 character produced "Malformed UTF-8 character" warning. - "\x{100}\N{CENT SIGN}" is Malformed. Added tests for these 3. - and one silly bug (by me) with qu operator. p4raw-id: //depot/perl@8583
Diffstat (limited to 'sv.c')
-rw-r--r--sv.c32
1 files changed, 14 insertions, 18 deletions
diff --git a/sv.c b/sv.c
index ed7ebdcbe1..c53486ac8c 100644
--- a/sv.c
+++ b/sv.c
@@ -4690,30 +4690,24 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
/* do not utf8ize the comparands as a side-effect */
if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) {
+ bool is_utf8 = TRUE;
+
if (PL_hints & HINT_UTF8_DISTINCT)
return FALSE;
if (SvUTF8(sv1)) {
- (void)utf8_to_bytes((U8*)(pv1 = savepvn(pv1, cur1)), &cur1);
- {
- IV scur1 = cur1;
- if (scur1 < 0) {
- Safefree(pv1);
- return 0;
- }
- }
- pv1tmp = TRUE;
+ char *pv = bytes_from_utf8((U8*)pv1, &cur1, &is_utf8);
+ if (is_utf8)
+ return 0;
+ pv1tmp = (pv != pv1);
+ pv1 = pv;
}
else {
- (void)utf8_to_bytes((U8*)(pv2 = savepvn(pv2, cur2)), &cur2);
- {
- IV scur2 = cur2;
- if (scur2 < 0) {
- Safefree(pv2);
- return 0;
- }
- }
- pv2tmp = TRUE;
+ char *pv = bytes_from_utf8((U8*)pv2, &cur2, &is_utf8);
+ if (is_utf8)
+ return 0;
+ pv2tmp = (pv != pv2);
+ pv2 = pv;
}
}
@@ -5601,6 +5595,8 @@ Perl_newSVpvn_share(pTHX_ const char *src, I32 len, U32 hash)
len = -len;
is_utf8 = TRUE;
}
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ src = (char*)bytes_from_utf8((U8*)src, (STRLEN*)&len, &is_utf8);
if (!hash)
PERL_HASH(hash, src, len);
new_SV(sv);