summaryrefslogtreecommitdiff
path: root/hv.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-01-28 19:28:40 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-01-28 19:28:40 +0000
commitf9a6324217cffea75ff769ddd313748c0613a128 (patch)
tree9fb5b4ade5877ba969d093cfe37ec605de62d8dc /hv.c
parent9ee2bb1a7c54b1866ff07ab9c157254810ee5205 (diff)
downloadperl-f9a6324217cffea75ff769ddd313748c0613a128.tar.gz
Patch from Inaba Hiroto:
- canonical UTF-8 hash keys: if a key string for a hash is UTF8-on, try downgrade the string and use it if unicode::distinct is not in effect. For the task, I added a function bytes_from_utf8() to utf8.c. It might resemble utf8_to_bytes() but it is not convenient to the task. Made a test for it and added to t/op/each.t - Changed do_print in doio.c to apply sv_utf8_(downgrade|upgrade) to the mortal copy of the argument SV. And changed t/io/utf8.t test 18 which expects print() to upgrade its argument. - re-implement sv_eq with bytes_from_utf8() - some bug fixes - tr/// does not handle UTF8 range (\x{}-\x{}) - \ before raw UTF8 character produced "Malformed UTF-8 character" warning. - "\x{100}\N{CENT SIGN}" is Malformed. Added tests for these 3. - and one silly bug (by me) with qu operator. p4raw-id: //depot/perl@8583
Diffstat (limited to 'hv.c')
-rw-r--r--hv.c95
1 files changed, 86 insertions, 9 deletions
diff --git a/hv.c b/hv.c
index 0e50523bea..c999488bf3 100644
--- a/hv.c
+++ b/hv.c
@@ -152,6 +152,7 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
register HE *entry;
SV *sv;
bool is_utf8 = FALSE;
+ const char *keysave = key;
if (!hv)
return 0;
@@ -196,6 +197,9 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
return 0;
}
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ key = (char*)bytes_from_utf8((U8*)key, (STRLEN*)&klen, &is_utf8);
+
PERL_HASH(hash, key, klen);
entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -208,6 +212,8 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
+ if (key != keysave)
+ Safefree(key);
return &HeVAL(entry);
}
#ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
@@ -217,14 +223,24 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
if (env) {
sv = newSVpvn(env,len);
SvTAINTED_on(sv);
+ if (key != keysave)
+ Safefree(key);
return hv_store(hv,key,klen,sv,hash);
}
}
#endif
if (lval) { /* gonna assign to this, so it better be there */
sv = NEWSV(61,0);
- return hv_store(hv,key,is_utf8?-klen:klen,sv,hash);
+ if (key != keysave) { /* must be is_utf8 == 0 */
+ SV **ret = hv_store(hv,key,klen,sv,hash);
+ Safefree(key);
+ return ret;
+ }
+ else
+ return hv_store(hv,key,is_utf8?-klen:klen,sv,hash);
}
+ if (key != keysave)
+ Safefree(key);
return 0;
}
@@ -256,6 +272,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
register HE *entry;
SV *sv;
bool is_utf8;
+ char *keysave;
if (!hv)
return 0;
@@ -304,9 +321,12 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
return 0;
}
- key = SvPV(keysv, klen);
+ keysave = key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv)!=0);
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
+
if (!hash)
PERL_HASH(hash, key, klen);
@@ -320,6 +340,8 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
+ if (key != keysave)
+ Safefree(key);
return entry;
}
#ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
@@ -333,6 +355,8 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
}
}
#endif
+ if (key != keysave)
+ Safefree(key);
if (lval) { /* gonna assign to this, so it better be there */
sv = NEWSV(61,0);
return hv_store_ent(hv,keysv,sv,hash);
@@ -385,6 +409,7 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
register HE *entry;
register HE **oentry;
bool is_utf8 = FALSE;
+ const char *keysave = key;
if (!hv)
return 0;
@@ -412,6 +437,9 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
#endif
}
}
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ key = (char*)bytes_from_utf8((U8*)key, (STRLEN*)&klen, &is_utf8);
+
if (!hash)
PERL_HASH(hash, key, klen);
@@ -433,6 +461,8 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
continue;
SvREFCNT_dec(HeVAL(entry));
HeVAL(entry) = val;
+ if (key != keysave)
+ Safefree(key);
return &HeVAL(entry);
}
@@ -441,6 +471,8 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 has
HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
else /* gotta do the real thing */
HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
+ if (key != keysave)
+ Safefree(key);
HeVAL(entry) = val;
HeNEXT(entry) = *oentry;
*oentry = entry;
@@ -484,6 +516,7 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
register HE *entry;
register HE **oentry;
bool is_utf8;
+ char *keysave;
if (!hv)
return 0;
@@ -513,9 +546,12 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
}
}
- key = SvPV(keysv, klen);
+ keysave = key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv) != 0);
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
+
if (!hash)
PERL_HASH(hash, key, klen);
@@ -537,6 +573,8 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
continue;
SvREFCNT_dec(HeVAL(entry));
HeVAL(entry) = val;
+ if (key != keysave)
+ Safefree(key);
return entry;
}
@@ -545,6 +583,8 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
else /* gotta do the real thing */
HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
+ if (key != keysave)
+ Safefree(key);
HeVAL(entry) = val;
HeNEXT(entry) = *oentry;
*oentry = entry;
@@ -581,6 +621,7 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
SV **svp;
SV *sv;
bool is_utf8 = FALSE;
+ const char *keysave = key;
if (!hv)
return Nullsv;
@@ -615,6 +656,9 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
if (!xhv->xhv_array)
return Nullsv;
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ key = (char*)bytes_from_utf8((U8*)key, (STRLEN*)&klen, &is_utf8);
+
PERL_HASH(hash, key, klen);
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -629,6 +673,8 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
+ if (key != keysave)
+ Safefree(key);
*oentry = HeNEXT(entry);
if (i && !*oentry)
xhv->xhv_fill--;
@@ -645,6 +691,8 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
--xhv->xhv_keys;
return sv;
}
+ if (key != keysave)
+ Safefree(key);
return Nullsv;
}
@@ -670,6 +718,7 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
register HE **oentry;
SV *sv;
bool is_utf8;
+ char *keysave;
if (!hv)
return Nullsv;
@@ -702,9 +751,12 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
if (!xhv->xhv_array)
return Nullsv;
- key = SvPV(keysv, klen);
+ keysave = key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv) != 0);
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
+
if (!hash)
PERL_HASH(hash, key, klen);
@@ -720,6 +772,8 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
+ if (key != keysave)
+ Safefree(key);
*oentry = HeNEXT(entry);
if (i && !*oentry)
xhv->xhv_fill--;
@@ -736,6 +790,8 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
--xhv->xhv_keys;
return sv;
}
+ if (key != keysave)
+ Safefree(key);
return Nullsv;
}
@@ -756,6 +812,7 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
register HE *entry;
SV *sv;
bool is_utf8 = FALSE;
+ const char *keysave = key;
if (!hv)
return 0;
@@ -786,6 +843,9 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
return 0;
#endif
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ key = (char*)bytes_from_utf8((U8*)key, (STRLEN*)&klen, &is_utf8);
+
PERL_HASH(hash, key, klen);
#ifdef DYNAMIC_ENV_FETCH
@@ -802,6 +862,8 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
+ if (key != keysave)
+ Safefree(key);
return TRUE;
}
#ifdef DYNAMIC_ENV_FETCH /* is it out there? */
@@ -816,6 +878,8 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
}
}
#endif
+ if (key != keysave)
+ Safefree(key);
return FALSE;
}
@@ -839,6 +903,7 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
register HE *entry;
SV *sv;
bool is_utf8;
+ char *keysave;
if (!hv)
return 0;
@@ -867,8 +932,10 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
return 0;
#endif
- key = SvPV(keysv, klen);
+ keysave = key = SvPV(keysv, klen);
is_utf8 = (SvUTF8(keysv) != 0);
+ if (is_utf8 && !(PL_hints & HINT_UTF8_DISTINCT))
+ key = (char*)bytes_from_utf8((U8*)key, &klen, &is_utf8);
if (!hash)
PERL_HASH(hash, key, klen);
@@ -886,6 +953,8 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
continue;
if (HeKUTF8(entry) != (char)is_utf8)
continue;
+ if (key != keysave)
+ Safefree(key);
return TRUE;
}
#ifdef DYNAMIC_ENV_FETCH /* is it out there? */
@@ -900,6 +969,8 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
}
}
#endif
+ if (key != keysave)
+ Safefree(key);
return FALSE;
}
@@ -1471,10 +1542,13 @@ Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash)
register I32 i = 1;
I32 found = 0;
bool is_utf8 = FALSE;
+ const char *save = str;
if (len < 0) {
len = -len;
is_utf8 = TRUE;
+ if (!(PL_hints & HINT_UTF8_DISTINCT))
+ str = (char*)bytes_from_utf8((U8*)str, (STRLEN*)&len, &is_utf8);
}
/* what follows is the moral equivalent of:
@@ -1507,7 +1581,8 @@ Perl_unsharepvn(pTHX_ const char *str, I32 len, U32 hash)
break;
}
UNLOCK_STRTAB_MUTEX;
-
+ if (str != save)
+ Safefree(str);
if (!found && ckWARN_d(WARN_INTERNAL))
Perl_warner(aTHX_ WARN_INTERNAL, "Attempt to free non-existent shared string '%s'",str);
}
@@ -1525,10 +1600,13 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
register I32 i = 1;
I32 found = 0;
bool is_utf8 = FALSE;
+ const char *save = str;
if (len < 0) {
len = -len;
is_utf8 = TRUE;
+ if (!(PL_hints & HINT_UTF8_DISTINCT))
+ str = (char*)bytes_from_utf8((U8*)str, (STRLEN*)&len, &is_utf8);
}
/* what follows is the moral equivalent of:
@@ -1568,8 +1646,7 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
++HeVAL(entry); /* use value slot as REFCNT */
UNLOCK_STRTAB_MUTEX;
+ if (str != save)
+ Safefree(str);
return HeKEY_hek(entry);
}
-
-
-