diff options
author | David Mitchell <davem@iabyn.com> | 2014-07-07 23:17:13 +0100 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2014-07-08 15:36:41 +0100 |
commit | 34dadc62d399176c3286094e10619c6300ab9243 (patch) | |
tree | 02cdeaf6cc51935e6afc42560ae87c25d36e13fb /hv.c | |
parent | 86f641010e0569b8d5a3f09a3011af1522b6c14a (diff) | |
download | perl-34dadc62d399176c3286094e10619c6300ab9243.tar.gz |
faster constant hash key lookups ($hash{const})
On something like $hash{constantstring}, at compile-time the
PVX string on the SV attached to the OP_CONST is converted into a
HEK (with an appropriate offset shift).
At run-time on hash keying, this HEK is used to speed up the bucket
search; however it turns out that this can be improved. Currently,
the main bucket loop does:
for (; entry; entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash)
continue;
if (HeKLEN(entry) != (I32)klen)
continue;
if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen))
continue;
if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8)
continue;
The 'HeKEY(entry) != key' test is the bit that allows us to skip the
memNE() when 'key' is actually part of a HEK. However, this means that in
the const HEK scenario, for a match, we do pointless hash, klen and
HVhek_UTF8 tests, when HeKEY(entry) == key is sufficient for a
match. Conversely, in the non-const-HEK scenario, the 'HeKEY(entry) !=
key' will always fail, and so it's just dead weight in the loop.
To work around this, this commit splits the code into two separate bucket
search loops; one for const-HEKs that just compare HEK pointers, and a
general loop that now doesn't have do the 'HeKEY(entry) != key' test.
Analysing this code with cachegrind shows that with this commit, lookups
of constant keys that exist (e.g. the typical perl object scenario,
$self->{somefield}) takes 15% less instruction reads in hv_common(), 14%
less data reads and 27% less writes.
A lookup with a non-existing constant key ($hash{not_exist}) is about the
same as before (0.7% improvement).
Non-constant existing lookup ($hash{$existing_key}) is about 5% less
instructions, while $hash{$non_existing_key} is about 0.7%.
Diffstat (limited to 'hv.c')
-rw-r--r-- | hv.c | 107 |
1 files changed, 89 insertions, 18 deletions
@@ -345,6 +345,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, bool is_utf8; int masked_flags; const int return_svp = action & HV_FETCH_JUST_SV; + HEK *keysv_hek = NULL; if (!hv) return NULL; @@ -614,12 +615,13 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } } - if (!hash) { - if (keysv && (SvIsCOW_shared_hash(keysv))) - hash = SvSHARED_HASH(keysv); - else - PERL_HASH(hash, key, klen); + if (keysv && (SvIsCOW_shared_hash(keysv))) { + if (HvSHAREKEYS(hv)) + keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); + hash = SvSHARED_HASH(keysv); } + else if (!hash) + PERL_HASH(hash, key, klen); masked_flags = (flags & HVhek_MASK); @@ -630,16 +632,48 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, { entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; } + + if (!entry) + goto not_found; + + if (keysv_hek) { + /* keysv is actually a HEK in disguise, so we can match just by + * comparing the HEK pointers in the HE chain. There is a slight + * caveat: on something like "\x80", which has both plain and utf8 + * representations, perl's hashes do encoding-insensitive lookups, + * but preserve the encoding of the stored key. Thus a particular + * key could map to two different HEKs in PL_strtab. We only + * conclude 'not found' if all the flags are the same; otherwise + * we fall back to a full search (this should only happen in rare + * cases). + */ + int keysv_flags = HEK_FLAGS(keysv_hek); + HE *orig_entry = entry; + + for (; entry; entry = HeNEXT(entry)) { + HEK *hek = HeKEY_hek(entry); + if (hek == keysv_hek) + goto found; + if (HEK_FLAGS(hek) != keysv_flags) + break; /* need to do full match */ + } + if (!entry) + goto not_found; + /* failed on shortcut - do full search loop */ + entry = orig_entry; + } + for (; entry; entry = HeNEXT(entry)) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; if (HeKLEN(entry) != (I32)klen) continue; - if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */ + if (memNE(HeKEY(entry),key,klen)) /* is this it? */ continue; if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) continue; + found: if (action & (HV_FETCH_LVALUE|HV_FETCH_ISSTORE)) { if (HeKFLAGS(entry) != masked_flags) { /* We match if HVhek_UTF8 bit in our flags and hash key's @@ -708,6 +742,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } return entry; } + + not_found: #ifdef DYNAMIC_ENV_FETCH /* %ENV lookup? If so, try to fetch the value now */ if (!(action & HV_FETCH_ISSTORE) && SvRMAGICAL((const SV *)hv) @@ -955,9 +991,14 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, XPVHV* xhv; HE *entry; HE **oentry; - HE *const *first_entry; + HE **first_entry; bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE; int masked_flags; + HEK *keysv_hek = NULL; + U8 mro_changes = 0; /* 1 = isa; 2 = package moved */ + SV *sv; + GV *gv = NULL; + HV *stash = NULL; if (SvRMAGICAL(hv)) { bool needs_copy; @@ -1022,32 +1063,60 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HvHASKFLAGS_on(MUTABLE_SV(hv)); } - if (!hash) { - if (keysv && (SvIsCOW_shared_hash(keysv))) - hash = SvSHARED_HASH(keysv); - else - PERL_HASH(hash, key, klen); + if (keysv && (SvIsCOW_shared_hash(keysv))) { + if (HvSHAREKEYS(hv)) + keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); + hash = SvSHARED_HASH(keysv); } + else if (!hash) + PERL_HASH(hash, key, klen); masked_flags = (k_flags & HVhek_MASK); first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; entry = *oentry; - for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { - SV *sv; - U8 mro_changes = 0; /* 1 = isa; 2 = package moved */ - GV *gv = NULL; - HV *stash = NULL; + if (!entry) + goto not_found; + + if (keysv_hek) { + /* keysv is actually a HEK in disguise, so we can match just by + * comparing the HEK pointers in the HE chain. There is a slight + * caveat: on something like "\x80", which has both plain and utf8 + * representations, perl's hashes do encoding-insensitive lookups, + * but preserve the encoding of the stored key. Thus a particular + * key could map to two different HEKs in PL_strtab. We only + * conclude 'not found' if all the flags are the same; otherwise + * we fall back to a full search (this should only happen in rare + * cases). + */ + int keysv_flags = HEK_FLAGS(keysv_hek); + + for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { + HEK *hek = HeKEY_hek(entry); + if (hek == keysv_hek) + goto found; + if (HEK_FLAGS(hek) != keysv_flags) + break; /* need to do full match */ + } + if (!entry) + goto not_found; + /* failed on shortcut - do full search loop */ + oentry = first_entry; + entry = *oentry; + } + + for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { if (HeHASH(entry) != hash) /* strings can't be equal */ continue; if (HeKLEN(entry) != (I32)klen) continue; - if (HeKEY(entry) != key && memNE(HeKEY(entry),key,klen)) /* is this it? */ + if (memNE(HeKEY(entry),key,klen)) /* is this it? */ continue; if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) continue; + found: if (hv == PL_strtab) { if (k_flags & HVhek_FREEKEY) Safefree(key); @@ -1148,6 +1217,8 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, return sv; } + + not_found: if (SvREADONLY(hv)) { hv_notallowed(k_flags, key, klen, "Attempt to delete disallowed key '%"SVf"' from" |