summaryrefslogtreecommitdiff
path: root/hv.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2014-07-07 23:17:13 +0100
committerDavid Mitchell <davem@iabyn.com>2014-07-08 15:36:41 +0100
commit34dadc62d399176c3286094e10619c6300ab9243 (patch)
tree02cdeaf6cc51935e6afc42560ae87c25d36e13fb /hv.c
parent86f641010e0569b8d5a3f09a3011af1522b6c14a (diff)
downloadperl-34dadc62d399176c3286094e10619c6300ab9243.tar.gz
faster constant hash key lookups ($hash{const})
On something like $hash{constantstring}, at compile-time the PVX string on the SV attached to the OP_CONST is converted into a HEK (with an appropriate offset shift). At run-time on hash keying, this HEK is used to speed up the bucket search; however it turns out that this can be improved. Currently, the main bucket loop does: for (; entry; entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) continue; if (HeKLEN(entry) != (I32)klen) continue; if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) continue; if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) continue; The 'HeKEY(entry) != key' test is the bit that allows us to skip the memNE() when 'key' is actually part of a HEK. However, this means that in the const HEK scenario, for a match, we do pointless hash, klen and HVhek_UTF8 tests, when HeKEY(entry) == key is sufficient for a match. Conversely, in the non-const-HEK scenario, the 'HeKEY(entry) != key' will always fail, and so it's just dead weight in the loop. To work around this, this commit splits the code into two separate bucket search loops; one for const-HEKs that just compare HEK pointers, and a general loop that now doesn't have do the 'HeKEY(entry) != key' test. Analysing this code with cachegrind shows that with this commit, lookups of constant keys that exist (e.g. the typical perl object scenario, $self->{somefield}) takes 15% less instruction reads in hv_common(), 14% less data reads and 27% less writes. A lookup with a non-existing constant key ($hash{not_exist}) is about the same as before (0.7% improvement). Non-constant existing lookup ($hash{$existing_key}) is about 5% less instructions, while $hash{$non_existing_key} is about 0.7%.
Diffstat (limited to 'hv.c')
-rw-r--r--hv.c107
1 files changed, 89 insertions, 18 deletions
diff --git a/hv.c b/hv.c
index a01cb76d7d..5bab2d7640 100644
--- a/hv.c
+++ b/hv.c
@@ -345,6 +345,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
bool is_utf8;
int masked_flags;
const int return_svp = action & HV_FETCH_JUST_SV;
+ HEK *keysv_hek = NULL;
if (!hv)
return NULL;
@@ -614,12 +615,13 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
}
}
- if (!hash) {
- if (keysv && (SvIsCOW_shared_hash(keysv)))
- hash = SvSHARED_HASH(keysv);
- else
- PERL_HASH(hash, key, klen);
+ if (keysv && (SvIsCOW_shared_hash(keysv))) {
+ if (HvSHAREKEYS(hv))
+ keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
+ hash = SvSHARED_HASH(keysv);
}
+ else if (!hash)
+ PERL_HASH(hash, key, klen);
masked_flags = (flags & HVhek_MASK);
@@ -630,16 +632,48 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
{
entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)];
}
+
+ if (!entry)
+ goto not_found;
+
+ if (keysv_hek) {
+ /* keysv is actually a HEK in disguise, so we can match just by
+ * comparing the HEK pointers in the HE chain. There is a slight
+ * caveat: on something like "\x80", which has both plain and utf8
+ * representations, perl's hashes do encoding-insensitive lookups,
+ * but preserve the encoding of the stored key. Thus a particular
+ * key could map to two different HEKs in PL_strtab. We only
+ * conclude 'not found' if all the flags are the same; otherwise
+ * we fall back to a full search (this should only happen in rare
+ * cases).
+ */
+ int keysv_flags = HEK_FLAGS(keysv_hek);
+ HE *orig_entry = entry;
+
+ for (; entry; entry = HeNEXT(entry)) {
+ HEK *hek = HeKEY_hek(entry);
+ if (hek == keysv_hek)
+ goto found;
+ if (HEK_FLAGS(hek) != keysv_flags)
+ break; /* need to do full match */
+ }
+ if (!entry)
+ goto not_found;
+ /* failed on shortcut - do full search loop */
+ entry = orig_entry;
+ }
+
for (; entry; entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
continue;
- if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
+ if (memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8)
continue;
+ found:
if (action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE)) {
if (HeKFLAGS(entry) != masked_flags) {
/* We match if HVhek_UTF8 bit in our flags and hash key's
@@ -708,6 +742,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
}
return entry;
}
+
+ not_found:
#ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */
if (!(action & HV_FETCH_ISSTORE)
&& SvRMAGICAL((const SV *)hv)
@@ -955,9 +991,14 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
XPVHV* xhv;
HE *entry;
HE **oentry;
- HE *const *first_entry;
+ HE **first_entry;
bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE;
int masked_flags;
+ HEK *keysv_hek = NULL;
+ U8 mro_changes = 0; /* 1 = isa; 2 = package moved */
+ SV *sv;
+ GV *gv = NULL;
+ HV *stash = NULL;
if (SvRMAGICAL(hv)) {
bool needs_copy;
@@ -1022,32 +1063,60 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
HvHASKFLAGS_on(MUTABLE_SV(hv));
}
- if (!hash) {
- if (keysv && (SvIsCOW_shared_hash(keysv)))
- hash = SvSHARED_HASH(keysv);
- else
- PERL_HASH(hash, key, klen);
+ if (keysv && (SvIsCOW_shared_hash(keysv))) {
+ if (HvSHAREKEYS(hv))
+ keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
+ hash = SvSHARED_HASH(keysv);
}
+ else if (!hash)
+ PERL_HASH(hash, key, klen);
masked_flags = (k_flags & HVhek_MASK);
first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
entry = *oentry;
- for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
- SV *sv;
- U8 mro_changes = 0; /* 1 = isa; 2 = package moved */
- GV *gv = NULL;
- HV *stash = NULL;
+ if (!entry)
+ goto not_found;
+
+ if (keysv_hek) {
+ /* keysv is actually a HEK in disguise, so we can match just by
+ * comparing the HEK pointers in the HE chain. There is a slight
+ * caveat: on something like "\x80", which has both plain and utf8
+ * representations, perl's hashes do encoding-insensitive lookups,
+ * but preserve the encoding of the stored key. Thus a particular
+ * key could map to two different HEKs in PL_strtab. We only
+ * conclude 'not found' if all the flags are the same; otherwise
+ * we fall back to a full search (this should only happen in rare
+ * cases).
+ */
+ int keysv_flags = HEK_FLAGS(keysv_hek);
+
+ for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
+ HEK *hek = HeKEY_hek(entry);
+ if (hek == keysv_hek)
+ goto found;
+ if (HEK_FLAGS(hek) != keysv_flags)
+ break; /* need to do full match */
+ }
+ if (!entry)
+ goto not_found;
+ /* failed on shortcut - do full search loop */
+ oentry = first_entry;
+ entry = *oentry;
+ }
+
+ for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
continue;
- if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */
+ if (memNE(HeKEY(entry),key,klen)) /* is this it? */
continue;
if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8)
continue;
+ found:
if (hv == PL_strtab) {
if (k_flags & HVhek_FREEKEY)
Safefree(key);
@@ -1148,6 +1217,8 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
return sv;
}
+
+ not_found:
if (SvREADONLY(hv)) {
hv_notallowed(k_flags, key, klen,
"Attempt to delete disallowed key '%"SVf"' from"