summaryrefslogtreecommitdiff
path: root/erts/emulator/beam/utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/utils.c')
-rw-r--r--erts/emulator/beam/utils.c112
1 files changed, 87 insertions, 25 deletions
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 9cd4a57b85..9881eb4a78 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -1616,15 +1616,12 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_
* We therefore calculate context independent hashes for all .
* key-value pairs and then xor them together.
*/
- ESTACK_PUSH(s, hash_xor_pairs);
- ESTACK_PUSH(s, hash);
- ESTACK_PUSH(s, HASH_MAP_TAIL);
+ ESTACK_PUSH3(s, hash_xor_pairs, hash, HASH_MAP_TAIL);
hash = 0;
hash_xor_pairs = 0;
for (ctx.i = ctx.size - 1; ctx.i >= 0; ctx.i--) {
- ESTACK_PUSH(s, HASH_MAP_PAIR);
- ESTACK_PUSH(s, ctx.vs[ctx.i]);
- ESTACK_PUSH(s, ctx.ks[ctx.i]);
+ ESTACK_PUSH3(s, HASH_MAP_PAIR,
+ ctx.vs[ctx.i], ctx.ks[ctx.i]);
TRAP_LOCATION(hamt_subtag_head_flatmap);
}
goto hash2_common;
@@ -1636,9 +1633,7 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_
UINT32_HASH(size, HCONST_16);
if (size == 0)
goto hash2_common;
- ESTACK_PUSH(s, hash_xor_pairs);
- ESTACK_PUSH(s, hash);
- ESTACK_PUSH(s, HASH_MAP_TAIL);
+ ESTACK_PUSH3(s, hash_xor_pairs, hash, HASH_MAP_TAIL);
hash = 0;
hash_xor_pairs = 0;
}
@@ -1656,13 +1651,22 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_
while (ctx.i) {
if (is_list(*ctx.ptr)) {
Eterm* cons = list_val(*ctx.ptr);
- ESTACK_PUSH(s, HASH_MAP_PAIR);
- ESTACK_PUSH(s, CDR(cons));
- ESTACK_PUSH(s, CAR(cons));
+ ESTACK_PUSH3(s, HASH_MAP_PAIR, CDR(cons), CAR(cons));
}
else {
ASSERT(is_boxed(*ctx.ptr));
- ESTACK_PUSH(s, *ctx.ptr);
+ if (is_tuple(*ctx.ptr)) { /* collision node */
+ Eterm *coll_ptr = tuple_val(*ctx.ptr);
+ Uint n = arityval(*coll_ptr);
+ ASSERT(n >= 2);
+ coll_ptr++;
+ for (; n; n--, coll_ptr++) {
+ Eterm* cons = list_val(*coll_ptr);
+ ESTACK_PUSH3(s, HASH_MAP_PAIR, CDR(cons), CAR(cons));
+ }
+ }
+ else
+ ESTACK_PUSH(s, *ctx.ptr);
}
ctx.i--; ctx.ptr++;
TRAP_LOCATION(map_subtag);
@@ -2047,6 +2051,45 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p)
return make_hash2_helper(term, 1, state_mref_write_back, p);
}
+#ifdef DBG_HASHMAP_COLLISION_BONANZA
+Uint32 erts_dbg_hashmap_collision_bonanza(Uint32 hash, Eterm key)
+{
+/*{
+ static Uint32 hashvec[7] = {
+ 0x02345678,
+ 0x12345678,
+ 0xe2345678,
+ 0xf2345678,
+ 0x12abcdef,
+ 0x13abcdef,
+ 0xcafebabe
+ };
+ hash = hashvec[hash % (sizeof(hashvec) / sizeof(hashvec[0]))];
+ }*/
+ const Uint32 bad_hash = (hash & 0x12482481) * 1442968193;
+ const Uint32 bad_bits = hash % 67;
+ if (bad_bits < 32) {
+ /* Mix in a number of high good bits to get "randomly" close
+ to the collision nodes */
+ const Uint32 bad_mask = (1 << bad_bits) - 1;
+ return (hash & ~bad_mask) | (bad_hash & bad_mask);
+ }
+ return bad_hash;
+}
+#endif
+
+/* Term hash function for maps, with a separate depth parameter */
+Uint32 make_map_hash(Eterm key) {
+ Uint32 hash = 0;
+
+ hash = make_internal_hash(key, hash);
+
+#ifdef DBG_HASHMAP_COLLISION_BONANZA
+ hash = erts_dbg_hashmap_collision_bonanza(hash, key);
+#endif
+ return hash;
+}
+
/* Term hash function for internal use.
*
* Limitation #1: Is not "portable" in any way between different VM instances.
@@ -2057,12 +2100,10 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p)
* with a new ErlNode struct, externals from that node will hash different than
* before.
*
- * One IMPORTANT property must hold (for hamt).
- * EVERY BIT of the term that is significant for equality (see EQ)
- * MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a
- * chance of hashing different when salted.
- *
- * This is why we cannot use cached hash values for atoms for example.
+ * The property "EVERY BIT of the term that is significant for equality
+ * MUST BE USED AS INPUT FOR THE HASH" is nice but no longer crucial for the
+ * hashmap implementation that now uses collision nodes at the bottom of
+ * the HAMT when all hash bits are exhausted.
*
*/
@@ -2208,6 +2249,8 @@ make_internal_hash(Eterm term, Uint32 salt)
}
else {
ASSERT(is_boxed(*ptr));
+ /* no special treatment of collision nodes needed,
+ hash them as the tuples they are */
ESTACK_PUSH(s, *ptr);
}
i--; ptr++;
@@ -3073,8 +3116,8 @@ tailrecur_ne:
sz = hashmap_bitcount(MAP_HEADER_VAL(hdr));
ASSERT(sz > 0 && sz < 17);
break;
- default:
- erts_exit(ERTS_ERROR_EXIT, "Unknown hashmap subsubtag\n");
+ default:
+ erts_exit(ERTS_ERROR_EXIT, "bad header");
}
goto term_array;
}
@@ -3437,6 +3480,11 @@ tailrecur_ne:
A minimal key can only be candidate as tie-breaker if we
have passed that hash value in the other tree (which means
the key did not exist in the other tree).
+
+ Collision node amendment:
+ The leafs in collision nodes are sorted in map-key order.
+ If keys are different but hashes are equal we advance the
+ one lagging behind key-wise.
*/
sp = PSTACK_PUSH(hmap_stack);
@@ -3835,12 +3883,19 @@ pop_next:
sp = PSTACK_TOP(hmap_stack);
if (j) {
/* Key diff found, enter phase 2 */
- if (hashmap_key_hash_cmp(sp->ap, sp->bp) < 0) {
+ int hash_cmp = hashmap_key_hash_cmp(sp->ap, sp->bp);
+ if (hash_cmp == 0) {
+ /* Hash collision. Collision nodes are sorted by map key
+ * order, so we advance the one with the lesser key */
+ hash_cmp = j;
+ }
+ if (hash_cmp < 0) {
sp->min_key = CAR(sp->ap);
sp->cmp_res = -1;
sp->ap = hashmap_iterator_next(&stack);
}
else {
+ ASSERT(hash_cmp > 0);
sp->min_key = CAR(sp->bp);
sp->cmp_res = 1;
sp->bp = hashmap_iterator_next(&b_stack);
@@ -3890,7 +3945,7 @@ pop_next:
sp->bp = hashmap_iterator_next(&b_stack);
if (!sp->ap) {
/* end of maps with identical keys */
- ASSERT(!sp->bp);
+ ASSERT(!sp->bp); /* as we assume indentical map sizes */
j = sp->cmp_res;
exact = sp->was_exact;
(void) PSTACK_POP(hmap_stack);
@@ -3925,14 +3980,21 @@ pop_next:
/* fall through */
case_HASHMAP_PHASE2_NEXT_STEP:
if (sp->ap || sp->bp) {
- if (hashmap_key_hash_cmp(sp->ap, sp->bp) < 0) {
+ int hash_cmp = hashmap_key_hash_cmp(sp->ap, sp->bp);
+ if (hash_cmp == 0) {
+ /* Hash collision. Collision nodes are sorted by map key
+ * order, so we advance the one with the lesser key */
+ hash_cmp = j;
+ }
+ if (hash_cmp < 0) {
ASSERT(sp->ap);
a = CAR(sp->ap);
b = sp->min_key;
ASSERT(exact);
WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE2_IS_MIN_KEY_A));
}
- else { /* hash_cmp > 0 */
+ else {
+ ASSERT(hash_cmp > 0);
ASSERT(sp->bp);
a = CAR(sp->bp);
b = sp->min_key;