summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-12-04 19:36:51 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-12-04 19:36:51 +0000
commitda58a35d3e499cdd492619302eb044ac1841788f (patch)
tree33aec77245e5242e960797d9a1f9e3be7c6382b9
parent38ff9fd43e4d2321f907bd54ac8e6791531cd9fc (diff)
downloadperl-da58a35d3e499cdd492619302eb044ac1841788f.tar.gz
UTF-8 hash keys, patch from Inaba Hiroto.
p4raw-id: //depot/perl@7980
-rwxr-xr-xembed.pl8
-rw-r--r--hv.c92
-rw-r--r--hv.h3
-rw-r--r--pod/perlapi.pod8
-rw-r--r--proto.h8
5 files changed, 91 insertions, 28 deletions
diff --git a/embed.pl b/embed.pl
index 6412ef6b9a..055c28b963 100755
--- a/embed.pl
+++ b/embed.pl
@@ -1563,11 +1563,11 @@ Ap |HV* |gv_stashpvn |const char* name|U32 namelen|I32 create
Apd |HV* |gv_stashsv |SV* sv|I32 create
Apd |void |hv_clear |HV* tb
Ap |void |hv_delayfree_ent|HV* hv|HE* entry
-Apd |SV* |hv_delete |HV* tb|const char* key|U32 klen|I32 flags
+Apd |SV* |hv_delete |HV* tb|const char* key|I32 klen|I32 flags
Apd |SV* |hv_delete_ent |HV* tb|SV* key|I32 flags|U32 hash
-Apd |bool |hv_exists |HV* tb|const char* key|U32 klen
+Apd |bool |hv_exists |HV* tb|const char* key|I32 klen
Apd |bool |hv_exists_ent |HV* tb|SV* key|U32 hash
-Apd |SV** |hv_fetch |HV* tb|const char* key|U32 klen|I32 lval
+Apd |SV** |hv_fetch |HV* tb|const char* key|I32 klen|I32 lval
Apd |HE* |hv_fetch_ent |HV* tb|SV* key|I32 lval|U32 hash
Ap |void |hv_free_ent |HV* hv|HE* entry
Apd |I32 |hv_iterinit |HV* tb
@@ -1578,7 +1578,7 @@ Apd |SV* |hv_iternextsv |HV* hv|char** key|I32* retlen
Apd |SV* |hv_iterval |HV* tb|HE* entry
Ap |void |hv_ksplit |HV* hv|IV newmax
Apd |void |hv_magic |HV* hv|GV* gv|int how
-Apd |SV** |hv_store |HV* tb|const char* key|U32 klen|SV* val \
+Apd |SV** |hv_store |HV* tb|const char* key|I32 klen|SV* val \
|U32 hash
Apd |HE* |hv_store_ent |HV* tb|SV* key|SV* val|U32 hash
Apd |void |hv_undef |HV* tb
diff --git a/hv.c b/hv.c
index 8a43a19eb5..dd30b4d61c 100644
--- a/hv.c
+++ b/hv.c
@@ -75,13 +75,19 @@ S_save_hek(pTHX_ const char *str, I32 len, U32 hash)
{
char *k;
register HEK *hek;
+ bool is_utf8 = FALSE;
+
+ if (len < 0) {
+ len = -len;
+ is_utf8 = TRUE;
+ }
New(54, k, HEK_BASESIZE + len + 1, char);
hek = (HEK*)k;
Copy(str, HEK_KEY(hek), len, char);
- *(HEK_KEY(hek) + len) = '\0';
HEK_LEN(hek) = len;
HEK_HASH(hek) = hash;
+ HEK_UTF8(hek) = (char)is_utf8;
return hek;
}
@@ -112,9 +118,9 @@ Perl_he_dup(pTHX_ HE *e, bool shared)
if (HeKLEN(e) == HEf_SVKEY)
HeKEY_sv(ret) = SvREFCNT_inc(sv_dup(HeKEY_sv(e)));
else if (shared)
- HeKEY_hek(ret) = share_hek(HeKEY(e), HeKLEN(e), HeHASH(e));
+ HeKEY_hek(ret) = share_hek(HeKEY(e), HeKLEN_UTF8(e), HeHASH(e));
else
- HeKEY_hek(ret) = save_hek(HeKEY(e), HeKLEN(e), HeHASH(e));
+ HeKEY_hek(ret) = save_hek(HeKEY(e), HeKLEN_UTF8(e), HeHASH(e));
HeVAL(ret) = SvREFCNT_inc(sv_dup(HeVAL(e)));
return ret;
}
@@ -138,16 +144,22 @@ information on how to use this function on tied hashes.
*/
SV**
-Perl_hv_fetch(pTHX_ HV *hv, const char *key, U32 klen, I32 lval)
+Perl_hv_fetch(pTHX_ HV *hv, const char *key, I32 klen, I32 lval)
{
register XPVHV* xhv;
register U32 hash;
register HE *entry;
SV *sv;
+ bool is_utf8 = FALSE;
if (!hv)
return 0;
+ if (klen < 0) {
+ klen = -klen;
+ is_utf8 = TRUE;
+ }
+
if (SvRMAGICAL(hv)) {
if (mg_find((SV*)hv,'P')) {
dTHR;
@@ -194,6 +206,8 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, U32 klen, I32 lval)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
+ if (HeKUTF8(entry) != (char)is_utf8)
+ continue;
return &HeVAL(entry);
}
#ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
@@ -209,7 +223,7 @@ Perl_hv_fetch(pTHX_ HV *hv, const char *key, U32 klen, I32 lval)
#endif
if (lval) { /* gonna assign to this, so it better be there */
sv = NEWSV(61,0);
- return hv_store(hv,key,klen,sv,hash);
+ return hv_store(hv,key,is_utf8?-klen:klen,sv,hash);
}
return 0;
}
@@ -241,6 +255,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
STRLEN klen;
register HE *entry;
SV *sv;
+ bool is_utf8;
if (!hv)
return 0;
@@ -291,6 +306,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
}
key = SvPV(keysv, klen);
+ is_utf8 = (SvUTF8(keysv)!=0);
if (!hash)
PERL_HASH(hash, key, klen);
@@ -303,6 +319,8 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
+ if (HeKUTF8(entry) != (char)is_utf8)
+ continue;
return entry;
}
#ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
@@ -361,16 +379,22 @@ information on how to use this function on tied hashes.
*/
SV**
-Perl_hv_store(pTHX_ HV *hv, const char *key, U32 klen, SV *val, register U32 hash)
+Perl_hv_store(pTHX_ HV *hv, const char *key, I32 klen, SV *val, register U32 hash)
{
register XPVHV* xhv;
register I32 i;
register HE *entry;
register HE **oentry;
+ bool is_utf8 = FALSE;
if (!hv)
return 0;
+ if (klen < 0) {
+ klen = -klen;
+ is_utf8 = TRUE;
+ }
+
xhv = (XPVHV*)SvANY(hv);
if (SvMAGICAL(hv)) {
bool needs_copy;
@@ -406,6 +430,8 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, U32 klen, SV *val, register U32 has
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
+ if (HeKUTF8(entry) != (char)is_utf8)
+ continue;
SvREFCNT_dec(HeVAL(entry));
HeVAL(entry) = val;
return &HeVAL(entry);
@@ -413,9 +439,9 @@ Perl_hv_store(pTHX_ HV *hv, const char *key, U32 klen, SV *val, register U32 has
entry = new_HE();
if (HvSHAREKEYS(hv))
- HeKEY_hek(entry) = share_hek(key, klen, hash);
+ HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
else /* gotta do the real thing */
- HeKEY_hek(entry) = save_hek(key, klen, hash);
+ HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
HeVAL(entry) = val;
HeNEXT(entry) = *oentry;
*oentry = entry;
@@ -458,6 +484,7 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
register I32 i;
register HE *entry;
register HE **oentry;
+ bool is_utf8;
if (!hv)
return 0;
@@ -489,6 +516,7 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
}
key = SvPV(keysv, klen);
+ is_utf8 = (SvUTF8(keysv) != 0);
if (!hash)
PERL_HASH(hash, key, klen);
@@ -507,6 +535,8 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
+ if (HeKUTF8(entry) != (char)is_utf8)
+ continue;
SvREFCNT_dec(HeVAL(entry));
HeVAL(entry) = val;
return entry;
@@ -514,9 +544,9 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, register U32 hash)
entry = new_HE();
if (HvSHAREKEYS(hv))
- HeKEY_hek(entry) = share_hek(key, klen, hash);
+ HeKEY_hek(entry) = share_hek(key, is_utf8?-klen:klen, hash);
else /* gotta do the real thing */
- HeKEY_hek(entry) = save_hek(key, klen, hash);
+ HeKEY_hek(entry) = save_hek(key, is_utf8?-klen:klen, hash);
HeVAL(entry) = val;
HeNEXT(entry) = *oentry;
*oentry = entry;
@@ -543,7 +573,7 @@ will be returned.
*/
SV *
-Perl_hv_delete(pTHX_ HV *hv, const char *key, U32 klen, I32 flags)
+Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
{
register XPVHV* xhv;
register I32 i;
@@ -552,9 +582,14 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, U32 klen, I32 flags)
register HE **oentry;
SV **svp;
SV *sv;
+ bool is_utf8 = FALSE;
if (!hv)
return Nullsv;
+ if (klen < 0) {
+ klen = -klen;
+ is_utf8 = TRUE;
+ }
if (SvRMAGICAL(hv)) {
bool needs_copy;
bool needs_store;
@@ -594,6 +629,8 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, U32 klen, I32 flags)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
+ if (HeKUTF8(entry) != (char)is_utf8)
+ continue;
*oentry = HeNEXT(entry);
if (i && !*oentry)
xhv->xhv_fill--;
@@ -634,6 +671,7 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
register HE *entry;
register HE **oentry;
SV *sv;
+ bool is_utf8;
if (!hv)
return Nullsv;
@@ -667,6 +705,7 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
return Nullsv;
key = SvPV(keysv, klen);
+ is_utf8 = (SvUTF8(keysv) != 0);
if (!hash)
PERL_HASH(hash, key, klen);
@@ -681,6 +720,8 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
+ if (HeKUTF8(entry) != (char)is_utf8)
+ continue;
*oentry = HeNEXT(entry);
if (i && !*oentry)
xhv->xhv_fill--;
@@ -710,16 +751,22 @@ C<klen> is the length of the key.
*/
bool
-Perl_hv_exists(pTHX_ HV *hv, const char *key, U32 klen)
+Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
{
register XPVHV* xhv;
register U32 hash;
register HE *entry;
SV *sv;
+ bool is_utf8 = FALSE;
if (!hv)
return 0;
+ if (klen < 0) {
+ klen = -klen;
+ is_utf8 = TRUE;
+ }
+
if (SvRMAGICAL(hv)) {
if (mg_find((SV*)hv,'P')) {
dTHR;
@@ -756,6 +803,8 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, U32 klen)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
+ if (HeKUTF8(entry) != (char)is_utf8)
+ continue;
return TRUE;
}
#ifdef DYNAMIC_ENV_FETCH /* is it out there? */
@@ -1051,7 +1100,7 @@ Perl_newHVhv(pTHX_ HV *ohv)
/* Slow way */
hv_iterinit(ohv);
while ((entry = hv_iternext(ohv))) {
- hv_store(hv, HeKEY(entry), HeKLEN(entry),
+ hv_store(hv, HeKEY(entry), HeKLEN_UTF8(entry),
SvREFCNT_inc(HeVAL(entry)), HeHASH(entry));
}
HvRITER(ohv) = hv_riter;
@@ -1343,8 +1392,11 @@ Perl_hv_iterkeysv(pTHX_ register HE *entry)
if (HeKLEN(entry) == HEf_SVKEY)
return sv_mortalcopy(HeKEY_sv(entry));
else {
- return sv_2mortal(newSVpvn_share((HeKLEN(entry) ? HeKEY(entry) : ""),
- HeKLEN(entry), HeHASH(entry)));
+ SV *sv = newSVpvn_share((HeKLEN(entry) ? HeKEY(entry) : ""),
+ HeKLEN(entry), HeHASH(entry));
+ if (HeKUTF8(entry))
+ SvUTF8_on(sv);
+ return sv_2mortal(sv);
}
}
@@ -1471,6 +1523,12 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
register HE **oentry;
register I32 i = 1;
I32 found = 0;
+ bool is_utf8 = FALSE;
+
+ if (len < 0) {
+ len = -len;
+ is_utf8 = TRUE;
+ }
/* what follows is the moral equivalent of:
@@ -1488,12 +1546,14 @@ Perl_share_hek(pTHX_ const char *str, I32 len, register U32 hash)
continue;
if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */
continue;
+ if (HeKUTF8(entry) != (char)is_utf8)
+ continue;
found = 1;
break;
}
if (!found) {
entry = new_HE();
- HeKEY_hek(entry) = save_hek(str, len, hash);
+ HeKEY_hek(entry) = save_hek(str, is_utf8?-len:len, hash);
HeVAL(entry) = Nullsv;
HeNEXT(entry) = *oentry;
*oentry = entry;
diff --git a/hv.h b/hv.h
index 08f3bed7d5..f8cf2b968f 100644
--- a/hv.h
+++ b/hv.h
@@ -151,6 +151,8 @@ C<SV*>.
#define HeKEY(he) HEK_KEY(HeKEY_hek(he))
#define HeKEY_sv(he) (*(SV**)HeKEY(he))
#define HeKLEN(he) HEK_LEN(HeKEY_hek(he))
+#define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he))
+#define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he))
#define HeVAL(he) (he)->hent_val
#define HeHASH(he) HEK_HASH(HeKEY_hek(he))
#define HePV(he,lp) ((HeKLEN(he) == HEf_SVKEY) ? \
@@ -175,6 +177,7 @@ C<SV*>.
#define HEK_HASH(hek) (hek)->hek_hash
#define HEK_LEN(hek) (hek)->hek_len
#define HEK_KEY(hek) (hek)->hek_key
+#define HEK_UTF8(hek) (*(HEK_KEY(hek)+HEK_LEN(hek)))
/* calculate HV array allocation */
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 7296c81d0e..e3e7479a23 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -761,7 +761,7 @@ hash and returned to the caller. The C<klen> is the length of the key.
The C<flags> value will normally be zero; if set to G_DISCARD then NULL
will be returned.
- SV* hv_delete(HV* tb, const char* key, U32 klen, I32 flags)
+ SV* hv_delete(HV* tb, const char* key, I32 klen, I32 flags)
=for hackers
Found in file hv.c
@@ -783,7 +783,7 @@ Found in file hv.c
Returns a boolean indicating whether the specified hash key exists. The
C<klen> is the length of the key.
- bool hv_exists(HV* tb, const char* key, U32 klen)
+ bool hv_exists(HV* tb, const char* key, I32 klen)
=for hackers
Found in file hv.c
@@ -809,7 +809,7 @@ dereferencing it to a C<SV*>.
See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
information on how to use this function on tied hashes.
- SV** hv_fetch(HV* tb, const char* key, U32 klen, I32 lval)
+ SV** hv_fetch(HV* tb, const char* key, I32 klen, I32 lval)
=for hackers
Found in file hv.c
@@ -920,7 +920,7 @@ the call, and decrementing it if the function returned NULL.
See L<perlguts/"Understanding the Magic of Tied Hashes and Arrays"> for more
information on how to use this function on tied hashes.
- SV** hv_store(HV* tb, const char* key, U32 klen, SV* val, U32 hash)
+ SV** hv_store(HV* tb, const char* key, I32 klen, SV* val, U32 hash)
=for hackers
Found in file hv.c
diff --git a/proto.h b/proto.h
index 1e34c81cec..e561d1a024 100644
--- a/proto.h
+++ b/proto.h
@@ -303,11 +303,11 @@ PERL_CALLCONV HV* Perl_gv_stashpvn(pTHX_ const char* name, U32 namelen, I32 crea
PERL_CALLCONV HV* Perl_gv_stashsv(pTHX_ SV* sv, I32 create);
PERL_CALLCONV void Perl_hv_clear(pTHX_ HV* tb);
PERL_CALLCONV void Perl_hv_delayfree_ent(pTHX_ HV* hv, HE* entry);
-PERL_CALLCONV SV* Perl_hv_delete(pTHX_ HV* tb, const char* key, U32 klen, I32 flags);
+PERL_CALLCONV SV* Perl_hv_delete(pTHX_ HV* tb, const char* key, I32 klen, I32 flags);
PERL_CALLCONV SV* Perl_hv_delete_ent(pTHX_ HV* tb, SV* key, I32 flags, U32 hash);
-PERL_CALLCONV bool Perl_hv_exists(pTHX_ HV* tb, const char* key, U32 klen);
+PERL_CALLCONV bool Perl_hv_exists(pTHX_ HV* tb, const char* key, I32 klen);
PERL_CALLCONV bool Perl_hv_exists_ent(pTHX_ HV* tb, SV* key, U32 hash);
-PERL_CALLCONV SV** Perl_hv_fetch(pTHX_ HV* tb, const char* key, U32 klen, I32 lval);
+PERL_CALLCONV SV** Perl_hv_fetch(pTHX_ HV* tb, const char* key, I32 klen, I32 lval);
PERL_CALLCONV HE* Perl_hv_fetch_ent(pTHX_ HV* tb, SV* key, I32 lval, U32 hash);
PERL_CALLCONV void Perl_hv_free_ent(pTHX_ HV* hv, HE* entry);
PERL_CALLCONV I32 Perl_hv_iterinit(pTHX_ HV* tb);
@@ -318,7 +318,7 @@ PERL_CALLCONV SV* Perl_hv_iternextsv(pTHX_ HV* hv, char** key, I32* retlen);
PERL_CALLCONV SV* Perl_hv_iterval(pTHX_ HV* tb, HE* entry);
PERL_CALLCONV void Perl_hv_ksplit(pTHX_ HV* hv, IV newmax);
PERL_CALLCONV void Perl_hv_magic(pTHX_ HV* hv, GV* gv, int how);
-PERL_CALLCONV SV** Perl_hv_store(pTHX_ HV* tb, const char* key, U32 klen, SV* val, U32 hash);
+PERL_CALLCONV SV** Perl_hv_store(pTHX_ HV* tb, const char* key, I32 klen, SV* val, U32 hash);
PERL_CALLCONV HE* Perl_hv_store_ent(pTHX_ HV* tb, SV* key, SV* val, U32 hash);
PERL_CALLCONV void Perl_hv_undef(pTHX_ HV* tb);
PERL_CALLCONV I32 Perl_ibcmp(pTHX_ const char* a, const char* b, I32 len);