diff options
-rw-r--r-- | erts/emulator/beam/dist.c | 8 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_info.c | 7 | ||||
-rw-r--r-- | erts/emulator/beam/erl_db_util.c | 107 | ||||
-rw-r--r-- | erts/emulator/beam/erl_map.c | 677 | ||||
-rw-r--r-- | erts/emulator/beam/erl_map.h | 32 | ||||
-rw-r--r-- | erts/emulator/beam/erl_term.h | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_utils.h | 8 | ||||
-rw-r--r-- | erts/emulator/beam/external.c | 49 | ||||
-rw-r--r-- | erts/emulator/beam/jit/arm/instr_map.cpp | 86 | ||||
-rw-r--r-- | erts/emulator/beam/jit/x86/instr_map.cpp | 86 | ||||
-rw-r--r-- | erts/emulator/beam/utils.c | 111 | ||||
-rw-r--r-- | erts/emulator/test/map_SUITE.erl | 70 |
12 files changed, 793 insertions, 450 deletions
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 9a760fd97f..550d4301a2 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -6246,7 +6246,7 @@ nodes(Process *c_p, Eterm node_types, Eterm options) } } else { - Eterm ks[2], *hp, keys_tuple = THE_NON_VALUE; + Eterm ks[2], *hp; Uint map_size = 0, el_xtra, xtra; ErtsHeapFactory hfact; @@ -6281,8 +6281,7 @@ nodes(Process *c_p, Eterm node_types, Eterm options) vs[map_size++] = eni->type; } - info_map = erts_map_from_sorted_ks_and_vs(&hfact, ks, vs, - map_size, &keys_tuple); + info_map = erts_map_from_sorted_ks_and_vs(&hfact, ks, vs, map_size); hp = erts_produce_heap(&hfact, 3+2, xtra); @@ -6868,8 +6867,7 @@ send_nodes_mon_msgs(Process *c_p, Eterm what, Eterm node, map_size++; } - info = erts_map_from_sorted_ks_and_vs(&hfact, ks, vs, - map_size, NULL); + info = erts_map_from_sorted_ks_and_vs(&hfact, ks, vs, map_size); ASSERT(is_value(info)); } else { /* Info list */ diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index f340015661..a187646857 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -4292,6 +4292,13 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1) BIF_RET(uword_to_big(size, hp)); } } + else if (ERTS_IS_ATOM_STR("hashmap_collision_bonanza", BIF_ARG_1)) { +#ifdef DBG_HASHMAP_COLLISION_BONANZA + return am_true; +#else + return am_false; +#endif + } } else if (is_tuple(BIF_ARG_1)) { Eterm* tp = tuple_val(BIF_ARG_1); diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c index 8243ac1a73..a236a09791 100644 --- a/erts/emulator/beam/erl_db_util.c +++ b/erts/emulator/beam/erl_db_util.c @@ -898,8 +898,8 @@ static Eterm dmc_lookup_bif_reversed(void *f); static int cmp_uint(void *a, void *b); static int cmp_guard_bif(void *a, void *b); static int match_compact(ErlHeapFragment *expr, DMCErrInfo *err_info); -static Uint my_size_object(Eterm t); -static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap); +static Uint my_size_object(Eterm t, int is_hashmap_node); +static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap, int); /* Guard subroutines */ static void @@ -3879,11 +3879,11 @@ static void do_emit_constant(DMCContext *context, DMC_STACK_TYPE(UWord) *text, if (is_immed(t)) { tmp = t; } else { - sz = my_size_object(t); + sz = my_size_object(t, 0); if (sz) { emb = new_message_buffer(sz); hp = emb->mem; - tmp = my_copy_struct(t,&hp,&(emb->off_heap)); + tmp = my_copy_struct(t,&hp,&(emb->off_heap), 0); emb->next = context->save; context->save = emb; } @@ -5368,33 +5368,39 @@ static int match_compact(ErlHeapFragment *expr, DMCErrInfo *err_info) /* ** Simple size object that takes care of function calls and constant tuples */ -static Uint my_size_object(Eterm t) +static Uint my_size_object(Eterm t, int is_hashmap_node) { Uint sum = 0; - Eterm tmp; Eterm *p; switch (t & _TAG_PRIMARY_MASK) { case TAG_PRIMARY_LIST: - sum += 2 + my_size_object(CAR(list_val(t))) + - my_size_object(CDR(list_val(t))); + sum += 2 + my_size_object(CAR(list_val(t)), 0) + + my_size_object(CDR(list_val(t)), 0); break; case TAG_PRIMARY_BOXED: if (is_tuple(t)) { - if (tuple_val(t)[0] == make_arityval(1) && is_tuple(tmp = tuple_val(t)[1])) { - Uint i,n; - p = tuple_val(tmp); - n = arityval(p[0]); - sum += 1 + n; - for (i = 1; i <= n; ++i) - sum += my_size_object(p[i]); - } else if (tuple_val(t)[0] == make_arityval(2) && - is_atom(tmp = tuple_val(t)[1]) && - tmp == am_const) { + Eterm* tpl = tuple_val(t); + Uint i,n; + + if (is_hashmap_node) { + /* hashmap collision node, no matchspec syntax here */ + } + else if (tpl[0] == make_arityval(1) && is_tuple(tpl[1])) { + tpl = tuple_val(tpl[1]); + } + else if (tpl[0] == make_arityval(2) && tpl[1] == am_const) { sum += size_object(tuple_val(t)[2]); - } else { + break; + } + else { erts_exit(ERTS_ERROR_EXIT,"Internal error, sizing unrecognized object in " "(d)ets:match compilation."); } + + n = arityval(tpl[0]); + sum += 1 + n; + for (i = 1; i <= n; ++i) + sum += my_size_object(tpl[i], 0); break; } else if (is_map(t)) { if (is_flatmap(t)) { @@ -5407,7 +5413,7 @@ static Uint my_size_object(Eterm t) n = arityval(p[0]); sum += 1 + n; for (int i = 1; i <= n; ++i) - sum += my_size_object(p[i]); + sum += my_size_object(p[i], 0); /* Calculate size of values */ p = (Eterm *)mp; @@ -5415,18 +5421,19 @@ static Uint my_size_object(Eterm t) sum += n + 3; p += 3; /* hdr + size + keys words */ while (n--) { - sum += my_size_object(*p++); + sum += my_size_object(*p++, 0); } } else { Eterm *head = (Eterm *)hashmap_val(t); Eterm hdr = *head; Uint sz; + sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); sum += 1 + sz + header_arity(hdr); head += 1 + header_arity(hdr); while(sz-- > 0) { - sum += my_size_object(head[sz]); + sum += my_size_object(head[sz], 1); } } break; @@ -5439,46 +5446,54 @@ static Uint my_size_object(Eterm t) return sum; } -static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap) +static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap, + int is_hashmap_node) { Eterm ret = NIL, a, b; Eterm *p; Uint sz; switch (t & _TAG_PRIMARY_MASK) { case TAG_PRIMARY_LIST: - a = my_copy_struct(CAR(list_val(t)), hp, off_heap); - b = my_copy_struct(CDR(list_val(t)), hp, off_heap); + a = my_copy_struct(CAR(list_val(t)), hp, off_heap, 0); + b = my_copy_struct(CDR(list_val(t)), hp, off_heap, 0); ret = CONS(*hp, a, b); *hp += 2; break; case TAG_PRIMARY_BOXED: if (is_tuple(t)) { - if (tuple_val(t)[0] == make_arityval(1) && - is_tuple(a = tuple_val(t)[1])) { - Uint i,n; - p = tuple_val(a); - n = arityval(p[0]); - if (n == 0) { - ret = ERTS_GLOBAL_LIT_EMPTY_TUPLE; - } else { - Eterm *savep = *hp; - ret = make_tuple(savep); - *hp += n + 1; - *savep++ = make_arityval(n); - for(i = 1; i <= n; ++i) - *savep++ = my_copy_struct(p[i], hp, off_heap); - } + Eterm* tpl = tuple_val(t); + Uint i,n; + Eterm *savep; + + if (is_hashmap_node) { + /* hashmap collision node, no matchspec syntax here */ + } + else if (tpl[0] == make_arityval(1) && is_tuple(tpl[1])) { + /* A {{...}} expression */ + tpl = tuple_val(tpl[1]); } - else if (tuple_val(t)[0] == make_arityval(2) && - tuple_val(t)[1] == am_const) { + else if (tpl[0] == make_arityval(2) && tpl[1] == am_const) { /* A {const, XXX} expression */ - b = tuple_val(t)[2]; + b = tpl[2]; sz = size_object(b); ret = copy_struct(b,sz,hp,off_heap); + break; } else { erts_exit(ERTS_ERROR_EXIT, "Trying to constant-copy non constant expression " "0x%bex in (d)ets:match compilation.", t); } + n = arityval(tpl[0]); + if (n == 0) { + ret = ERTS_GLOBAL_LIT_EMPTY_TUPLE; + } else { + savep = *hp; + ret = make_tuple(savep); + *hp += n + 1; + *savep++ = tpl[0]; + for(i = 1; i <= n; ++i) + *savep++ = my_copy_struct(tpl[i], hp, off_heap, 0); + } + } else if (is_map(t)) { if (is_flatmap(t)) { Uint i,n; @@ -5499,7 +5514,7 @@ static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap) *hp += n + 1; *savep++ = make_arityval(n); for(i = 1; i <= n; ++i) - *savep++ = my_copy_struct(p[i], hp, off_heap); + *savep++ = my_copy_struct(p[i], hp, off_heap, 0); } savep = *hp; ret = make_flatmap(savep); @@ -5511,7 +5526,7 @@ static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap) *savep++ = keys; p += 3; /* hdr + size + keys words */ for (i = 0; i < n; i++) - *savep++ = my_copy_struct(p[i], hp, off_heap); + *savep++ = my_copy_struct(p[i], hp, off_heap, 0); erts_usort_flatmap((flatmap_t*)flatmap_val(ret)); } else { Eterm *head = hashmap_val(t); @@ -5528,7 +5543,7 @@ static Eterm my_copy_struct(Eterm t, Eterm **hp, ErlOffHeap* off_heap) *savep++ = *head++; /* map size */ for (int i = 0; i < sz; i++) { - *savep++ = my_copy_struct(head[i],hp,off_heap); + *savep++ = my_copy_struct(head[i],hp,off_heap, 1); } } } else { diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c index e0b881db0a..119bd5ebea 100644 --- a/erts/emulator/beam/erl_map.c +++ b/erts/emulator/beam/erl_map.c @@ -95,8 +95,8 @@ static Uint hashmap_subtree_size(Eterm node); static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key, Eterm node, Eterm *value); static Eterm flatmap_from_validated_list(Process *p, Eterm list, Eterm fill_value, Uint size); static Eterm hashmap_from_unsorted_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int reject_dupkeys, ErtsAlcType_t temp_memory_allocator); -static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, int is_root, ErtsAlcType_t temp_memory_allocator); -static Eterm hashmap_from_chunked_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, Uint size, int is_root, ErtsAlcType_t temp_memory_allocator); +static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, ErtsAlcType_t temp_memory_allocator); +static Eterm hashmap_from_chunked_array(ErtsHeapFactory*, hxnode_t *hxns, Uint n, Uint size, ErtsAlcType_t temp_memory_allocator); static Eterm hashmap_info(Process *p, Eterm node); static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]); static int hxnodecmp(const void* a, const void* b); @@ -112,8 +112,7 @@ static int hxnodecmpkey(const void* a, const void* b); #define maskval(V,L) (((V) >> ((7 - (L))*4)) & 0xf) #define DBG_PRINT(X) /*erts_printf X*/ -#define HALLOC_EXTRA_HASHMAP_FROM_CHUNKED_ARRAY 200 -#define HALLOC_EXTRA HALLOC_EXTRA_HASHMAP_FROM_CHUNKED_ARRAY +#define HALLOC_EXTRA 200 /* ******************************* * ** Yielding C Fun (YCF) Note ** * ******************************* @@ -135,7 +134,6 @@ static int hxnodecmpkey(const void* a, const void* b); #include "erl_map.ycf.h" #endif #define NOT_YCF_YIELDING_VERSION 1 -#undef HALLOC_EXTRA #define YCF_CONSUME_REDS(X) while(0){} void erts_init_map(void) { @@ -278,6 +276,7 @@ BIF_RETTYPE map_get_2(BIF_ALIST_2) { * means that the code has to follow some restrictions. See note about * YCF near the top of the file for more information. */ + #ifdef INCLUDE_YCF_TRANSFORMED_ONLY_FUNCTIONS static BIF_RETTYPE maps_from_keys_2_helper(Process* p, Eterm* bif_args) { Eterm list = bif_args[0]; @@ -671,33 +670,20 @@ Eterm erts_hashmap_from_array(ErtsHeapFactory* factory, Eterm *leafs, Uint n, static ERTS_INLINE Eterm from_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, - Uint n, Eterm *key_tuple, flatmap_t **fmpp) + Uint n, flatmap_t **fmpp) { if (n <= MAP_SMALL_MAP_LIMIT) { Eterm *hp; flatmap_t *fmp; Eterm keys; - if (key_tuple && is_value(*key_tuple)) { - keys = *key_tuple; - hp = erts_produce_heap(factory, MAP_HEADER_FLATMAP_SZ + n, 0); - ASSERT(is_tuple_arity(keys, n)); - ASSERT(n == 0 || sys_memcmp((void *) (tuple_val(keys) + 1), - (void *) ks, - n * sizeof(Eterm)) == 0); - } - else if (n == 0) { + if (n == 0) { keys = ERTS_GLOBAL_LIT_EMPTY_TUPLE; - if (key_tuple) - *key_tuple = keys; hp = erts_produce_heap(factory, MAP_HEADER_FLATMAP_SZ + n, 0); } else { hp = erts_produce_heap(factory, 1 + MAP_HEADER_FLATMAP_SZ + 2*n, 0); keys = make_tuple(hp); - if (key_tuple) { - *key_tuple = keys; - } *hp++ = make_arityval(n); sys_memcpy((void *) hp, (void *) ks, @@ -732,7 +718,7 @@ Eterm erts_map_from_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, Ui Eterm res; flatmap_t *fmp; - res = from_ks_and_vs(factory, ks, vs, n, NULL, &fmp); + res = from_ks_and_vs(factory, ks, vs, n, &fmp); if (fmp) { if (erts_validate_and_sort_flatmap(fmp)) { res = make_flatmap(fmp); @@ -745,7 +731,7 @@ Eterm erts_map_from_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, Ui } Eterm erts_map_from_sorted_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, - Uint n, Eterm *key_tuple) + Uint n) { #ifdef DEBUG Uint i; @@ -755,7 +741,7 @@ Eterm erts_map_from_sorted_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm } #endif - return from_ks_and_vs(factory, ks, vs, n, key_tuple, NULL); + return from_ks_and_vs(factory, ks, vs, n, NULL); } @@ -832,7 +818,7 @@ static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory, if (hxns[ix].hx == hxns[ix+1].hx) { /* find region of equal hash values */ - jx = ix + 1; + jx = ix + 2; while(jx < n && hxns[ix].hx == hxns[jx].hx) { jx++; } /* find all correct keys from region * (last in list but now hash sorted so we check highest id instead) */ @@ -875,7 +861,8 @@ static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory, if (cx > 1) { /* recursive decompose array */ - res = hashmap_from_sorted_unique_array(factory, hxns, cx, 0, temp_memory_allocator); + res = hashmap_from_sorted_unique_array(factory, hxns, cx, + temp_memory_allocator); } else { Eterm *hp; @@ -902,51 +889,49 @@ static Eterm hashmap_from_unsorted_array(ErtsHeapFactory* factory, * YCF near the top of the file for more information. */ static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory* factory, - hxnode_t *hxns, Uint n, int lvl, + hxnode_t *hxns, Uint n, ErtsAlcType_t temp_memory_allocator) { Eterm res = NIL; - Uint i; Uint ix; - Uint jx; Uint elems; - Uint32 sw; - Uint32 hx; - Eterm val; hxnode_t *tmp = NULL; - ASSERT(lvl < 32); + ix = 0; elems = 1; while (ix < n - 1) { if (hxns[ix].hx == hxns[ix+1].hx) { - jx = ix + 1; - while (jx < n && hxns[ix].hx == hxns[jx].hx) { jx++; } - tmp = (hxnode_t *)erts_alloc(temp_memory_allocator, ((jx - ix)) * sizeof(hxnode_t)); - - for(i = 0; i < jx - ix; i++) { - val = hxns[i + ix].val; - hx = hashmap_restore_hash(lvl + 8, CAR(list_val(val))); - swizzle32(sw,hx); - tmp[i].hx = sw; - tmp[i].val = val; - tmp[i].i = i; - tmp[i].skip = 1; - } + Uint n_colliders; + Eterm* hp; + Eterm collision_node; + Uint jx = ix + 2; + Uint i; + + while (jx < n && hxns[ix].hx == hxns[jx].hx) + jx++; + + n_colliders = jx - ix; + hp = erts_produce_heap(factory, HAMT_COLLISION_NODE_SZ(n_colliders), + HALLOC_EXTRA); + collision_node = make_tuple(hp); + + *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n_colliders); + for (i = 0; i < n_colliders; i++) { + *hp++ = hxns[ix + i].val; + ASSERT(i == 0 + || CMP_TERM(CAR(list_val(hxns[ix+i-1].val)), + CAR(list_val(hxns[ix+i].val))) < 0); + } - erts_qsort(tmp, jx - ix, sizeof(hxnode_t), hxnodecmp); + hxns[ix].val = collision_node; + hxns[ix].skip = n_colliders; + ix = jx; - hxns[ix].skip = jx - ix; - hxns[ix].val = - hashmap_from_sorted_unique_array(factory, tmp, jx - ix, lvl + 8, temp_memory_allocator); - erts_free(temp_memory_allocator, (void *) tmp); - /* Memory management depend on the statement below */ - tmp = NULL; - ix = jx; - if (ix < n) { elems++; } - continue; + if (ix < n) { elems++; } + continue; } - hxns[ix].skip = 1; - elems++; - ix++; + hxns[ix].skip = 1; + elems++; + ix++; } YCF_SPECIAL_CODE_START(ON_DESTROY_STATE); { @@ -957,14 +942,13 @@ static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory* factory, } } YCF_SPECIAL_CODE_END(); - res = hashmap_from_chunked_array(factory, hxns, elems, n, !lvl, temp_memory_allocator); + res = hashmap_from_chunked_array(factory, hxns, elems, n, temp_memory_allocator); ERTS_FACTORY_HOLE_CHECK(factory); return res; } -#define HALLOC_EXTRA HALLOC_EXTRA_HASHMAP_FROM_CHUNKED_ARRAY /* **Important Note** * * A yielding version of this function is generated with YCF. This @@ -972,7 +956,7 @@ static Eterm hashmap_from_sorted_unique_array(ErtsHeapFactory* factory, * YCF near the top of the file for more information. */ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns, Uint n, - Uint size, int is_root, + Uint size, ErtsAlcType_t temp_memory_allocator) { Uint ix; Uint d; @@ -1024,16 +1008,11 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns } slot = maskval(v,0); - hp = erts_produce_heap(factory, (is_root ? 3 : 2), 0); + hp = erts_produce_heap(factory, 3, 0); - if (is_root) { - hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << slot); - hp[1] = size; - hp[2] = res; - } else { - hp[0] = MAP_HEADER_HAMT_NODE_BITMAP(1 << slot); - hp[1] = res; - } + hp[0] = MAP_HEADER_HAMT_HEAD_BITMAP(1 << slot); + hp[1] = size; + hp[2] = res; return make_hashmap(hp); } @@ -1195,15 +1174,11 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns bp = 1 << slot; hdr |= bp; sz = hashmap_bitcount(hdr); - hp = erts_produce_heap(factory, sz + /* hdr + item */ (is_root ? 2 : 1), 0); + hp = erts_produce_heap(factory, sz + /* hdr + item */ 2, 0); nhp = hp; - if (is_root) { - *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_HEAD_ARRAY : MAP_HEADER_HAMT_HEAD_BITMAP(hdr); - *hp++ = size; - } else { - *hp++ = MAP_HEADER_HAMT_NODE_BITMAP(hdr); - } + *hp++ = (hdr == 0xffff) ? MAP_HEADER_HAMT_HEAD_ARRAY : MAP_HEADER_HAMT_HEAD_BITMAP(hdr); + *hp++ = size; *hp++ = res; sz--; while (sz--) { *hp++ = ESTACK_POP(stack); } @@ -1215,7 +1190,6 @@ static Eterm hashmap_from_chunked_array(ErtsHeapFactory *factory, hxnode_t *hxns ERTS_FACTORY_HOLE_CHECK(factory); return res; } -#undef HALLOC_EXTRA static int hxnodecmpkey(const void *va, const void *vb) { const hxnode_t *a = (const hxnode_t*) va; @@ -1544,9 +1518,62 @@ BIF_RETTYPE maps_merge_trap_1(BIF_ALIST_1) { (HashmapMergeContext*) ERTS_MAGIC_BIN_DATA(ctx_bin)); } -#define HALLOC_EXTRA 200 #define MAP_MERGE_LOOP_FACTOR 8 +static Eterm merge_maybe_collision_node(Process* p, + Eterm* srcA, Uint szA, + Eterm* srcB, Uint szB, + Uint* map_sizep) +{ + Eterm *hp; + Eterm *hdr_ptr; + Eterm *hp_end; + Uint arity; + + ERTS_ASSERT(szA >= 1 && szB >= 1); + arity = szA + szB; + hp = HAlloc(p, HAMT_COLLISION_NODE_SZ(arity)); + hp_end = hp + HAMT_COLLISION_NODE_SZ(arity); + hdr_ptr = hp++; + + while (szA && szB) { + Eterm keyA = CAR(list_val(*srcA)); + Eterm keyB = CAR(list_val(*srcB)); + const Sint key_cmp = CMP_TERM(keyA, keyB); + + if (key_cmp < 0) { + *hp++ = *srcA++; + szA--; + } + else { + *hp++ = *srcB++; + szB--; + if (key_cmp == 0) { + srcA++; + szA--; + arity--; + (*map_sizep)--; + } + } + } + if (arity == 1) { /* no collision node needed */ + Eterm ret_cons = hdr_ptr[1]; + ASSERT(!szA && !szB); + HRelease(p, hp_end, hdr_ptr); + return ret_cons; + } + + for ( ; szA; szA--) + *hp++ = *srcA++; + for ( ; szB; szB--) + *hp++ = *srcB++; + + HRelease(p, hp_end, hp); + *hdr_ptr = make_arityval(arity); + return make_tuple(hdr_ptr); +} + + static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B, int swap_args, HashmapMergeContext* ctx) { #define PSTACK_TYPE struct HashmapMergePStackType @@ -1560,6 +1587,7 @@ static BIF_RETTYPE hashmap_merge(Process *p, Eterm map_A, Eterm map_B, Eterm trap_ret; Sint initial_reds = (Sint) (ERTS_BIF_REDS_LEFT(p) * MAP_MERGE_LOOP_FACTOR); Sint reds = initial_reds; + Uint coll_szA = 0, coll_szB = 0; /* * Strategy: Do depth-first traversal of both trees (at the same time) @@ -1619,8 +1647,13 @@ recurse: goto merge_nodes; } } - hx = hashmap_restore_hash(ctx->lvl, keyA); - sp->abm = 1 << hashmap_index(hx); + if (ctx->lvl < HAMT_MAX_LEVEL) { + hx = hashmap_restore_hash(ctx->lvl, keyA); + sp->abm = 1 << hashmap_index(hx); + } + else { + coll_szA = 1; + } /* keep srcA pointing at the leaf */ } else { /* A is NODE */ @@ -1629,25 +1662,35 @@ recurse: ASSERT(is_header(hdrA)); switch (hdrA & _HEADER_MAP_SUBTAG_MASK) { case HAMT_SUBTAG_HEAD_ARRAY: { + ASSERT(ctx->lvl < HAMT_MAX_LEVEL); sp->srcA++; sp->abm = 0xffff; break; } case HAMT_SUBTAG_HEAD_BITMAP: sp->srcA++; case HAMT_SUBTAG_NODE_BITMAP: { + ASSERT(ctx->lvl < HAMT_MAX_LEVEL); sp->abm = MAP_HEADER_VAL(hdrA); break; } - default: - erts_exit(ERTS_ABORT_EXIT, "bad header %ld\r\n", hdrA); + default: /* collision node */ + ERTS_ASSERT(is_arity_value(hdrA)); + ASSERT(ctx->lvl == HAMT_MAX_LEVEL); + coll_szA = arityval(hdrA); + ASSERT(coll_szA >= 2); } } if (is_list(sp->nodeB)) { /* B is LEAF */ Eterm keyB = CAR(list_val(sp->nodeB)); - hx = hashmap_restore_hash(ctx->lvl, keyB); - sp->bbm = 1 << hashmap_index(hx); + if (ctx->lvl < HAMT_MAX_LEVEL) { + hx = hashmap_restore_hash(ctx->lvl, keyB); + sp->bbm = 1 << hashmap_index(hx); + } + else { + coll_szB = 1; + } /* keep srcB pointing at the leaf */ } else { /* B is NODE */ @@ -1656,17 +1699,22 @@ recurse: ASSERT(is_header(hdrB)); switch (hdrB & _HEADER_MAP_SUBTAG_MASK) { case HAMT_SUBTAG_HEAD_ARRAY: { + ASSERT(ctx->lvl < HAMT_MAX_LEVEL); sp->srcB++; sp->bbm = 0xffff; break; } case HAMT_SUBTAG_HEAD_BITMAP: sp->srcB++; case HAMT_SUBTAG_NODE_BITMAP: { + ASSERT(ctx->lvl < HAMT_MAX_LEVEL); sp->bbm = MAP_HEADER_VAL(hdrB); break; } - default: - erts_exit(ERTS_ABORT_EXIT, "bad header %ld\r\n", hdrB); + default: /* collision node */ + ERTS_ASSERT(is_arity_value(hdrB)); + ASSERT(ctx->lvl == HAMT_MAX_LEVEL); + coll_szB = arityval(hdrB); + ASSERT(coll_szB >= 2); } } } @@ -1690,13 +1738,22 @@ merge_nodes: sp->srcA++; sp->srcB++; } - } else { /* Start build a node */ + } + else if (ctx->lvl < HAMT_MAX_LEVEL) { /* Start build a node */ sp->ix = 0; sp->rbm = sp->abm | sp->bbm; ASSERT(!(sp->rbm == 0 && ctx->lvl > 0)); } + else { + res = merge_maybe_collision_node(p, sp->srcA, coll_szA, + sp->srcB, coll_szB, &ctx->size); + sp->mix = 3; + coll_szA = coll_szB = 0; + continue; + } if (--reds <= 0) { + ASSERT(!coll_szA && !coll_szB); goto trap; } resume_from_trap: @@ -1840,21 +1897,14 @@ static int hash_cmp(Uint32 ha, Uint32 hb) int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp) { - unsigned int lvl = 0; - if (ap && bp) { + Uint32 ha, hb; ASSERT(CMP_TERM(CAR(ap), CAR(bp)) != 0); - for (;;) { - Uint32 ha = hashmap_restore_hash(lvl, CAR(ap)); - Uint32 hb = hashmap_restore_hash(lvl, CAR(bp)); - int cmp = hash_cmp(ha, hb); - if (cmp) { - return cmp; - } - lvl += 8; - } + ha = hashmap_make_hash(CAR(ap)); + hb = hashmap_make_hash(CAR(bp)); + return hash_cmp(ha, hb); } - + ASSERT(ap || bp); return ap ? -1 : 1; } @@ -2289,7 +2339,9 @@ Uint hashmap_node_size(Eterm hdr, Eterm **nodep) ASSERT(sz < 17); break; default: - erts_exit(ERTS_ABORT_EXIT, "bad header"); + ERTS_ASSERT(is_arity_value(hdr)); + sz = arityval(hdr); + break; } return sz; } @@ -2383,7 +2435,7 @@ Eterm* hashmap_iterator_prev(ErtsWStack* s) { const Eterm * erts_hashmap_get(Uint32 hx, Eterm key, Eterm node) { - Eterm *ptr, hdr, *res; + Eterm *ptr, hdr; Uint ix, lvl = 0; Uint32 hval,bp; @@ -2394,15 +2446,16 @@ erts_hashmap_get(Uint32 hx, Eterm key, Eterm node) ASSERT(is_hashmap_header_head(hdr)); ptr++; - for (;;) { + do { + ASSERT(lvl == 0 || is_hashmap_header_node(hdr)); + hval = MAP_HEADER_VAL(hdr); ix = hashmap_index(hx); if (hval != 0xffff) { bp = 1 << ix; if (!(bp & hval)) { /* not occupied */ - res = NULL; - break; + return NULL; } ix = hashmap_bitcount(hval & (bp - 1)); } @@ -2410,8 +2463,7 @@ erts_hashmap_get(Uint32 hx, Eterm key, Eterm node) if (is_list(node)) { /* LEAF NODE [K|V] */ ptr = list_val(node); - res = EQ(CAR(ptr), key) ? &(CDR(ptr)) : NULL; - break; + return EQ(CAR(ptr), key) ? &(CDR(ptr)) : NULL; } hx = hashmap_shift_hash(hx,lvl,key); @@ -2420,10 +2472,17 @@ erts_hashmap_get(Uint32 hx, Eterm key, Eterm node) ptr = boxed_val(node); hdr = *ptr; ASSERT(is_header(hdr)); - ASSERT(!is_hashmap_header_head(hdr)); - } + } while (!is_arity_value(hdr)); - return res; + /* collision node */ + ix = arityval(hdr); + ASSERT(ix > 1); + do { + Eterm* kv = list_val(*(++ptr)); + if (EQ(CAR(kv), key)) + return &(CDR(kv)); + } while (--ix > 0); + return NULL; } Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value, @@ -2437,6 +2496,8 @@ Eterm erts_hashmap_insert(Process *p, Uint32 hx, Eterm key, Eterm value, /* We are putting a new value (under a new or existing key) */ hp = HAlloc(p, size); res = erts_hashmap_insert_up(hp, key, value, upsz, &stack); + ASSERT(hashmap_val(res) + 2 + hashmap_bitcount(MAP_HEADER_VAL(*hashmap_val(res))) + == hp + size); } else { /* We are putting the same key-value */ @@ -2544,9 +2605,34 @@ int erts_hashmap_insert_down(Uint32 hx, Eterm key, Eterm value, Eterm node, Uint } size += HAMT_HEAD_BITMAP_SZ(n+1); goto unroll; - default: - erts_exit(ERTS_ERROR_EXIT, "bad header tag %ld\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); - break; + default: + ERTS_ASSERT(is_arity_value(hdr)); + n = arityval(hdr); + ASSERT(n >= 2); + for (slot = 0; slot < n; slot++) { + Eterm* kv = list_val(ptr[1+slot]); + Sint c; + ckey = CAR(kv); + c = CMP_TERM(key, ckey); + if (c == 0) { + if (CDR(ptr) == value) { + *sz = 0; + return 1; + } + *update_size = 0; + size += HAMT_COLLISION_NODE_SZ(n); + ESTACK_PUSH3(*sp, slot, 0, node); + goto unroll; + } + if (c < 0) + break; + } + if (is_update) { + return 0; + } + size += HAMT_COLLISION_NODE_SZ(n+1); + ESTACK_PUSH3(*sp, slot, 1, node); + goto unroll; } break; default: @@ -2555,21 +2641,25 @@ int erts_hashmap_insert_down(Uint32 hx, Eterm key, Eterm value, Eterm node, Uint } } insert_subnodes: - clvl = lvl; - chx = hashmap_restore_hash(clvl,ckey); - size += HAMT_NODE_BITMAP_SZ(2); - ix = hashmap_index(hx); - cix = hashmap_index(chx); - - while (cix == ix) { - ESTACK_PUSH4(*sp, 0, 1 << ix, 0, MAP_HEADER_HAMT_NODE_BITMAP(0)); - size += HAMT_NODE_BITMAP_SZ(1); - hx = hashmap_shift_hash(hx,lvl,key); - chx = hashmap_shift_hash(chx,clvl,ckey); - ix = hashmap_index(hx); - cix = hashmap_index(chx); - } - ESTACK_PUSH3(*sp, cix, ix, node); + if (lvl < HAMT_MAX_LEVEL) { + clvl = lvl; + chx = hashmap_restore_hash(clvl,ckey); + do { + ix = hashmap_index(hx); + cix = hashmap_index(chx); + if (cix != ix) { + size += HAMT_NODE_BITMAP_SZ(2); + ESTACK_PUSH4(*sp, cix, ix, 0, node); + goto unroll; + } + ESTACK_PUSH4(*sp, 0, 1 << ix, 0, MAP_HEADER_HAMT_NODE_BITMAP(0)); + size += HAMT_NODE_BITMAP_SZ(1); + hx = hashmap_shift_hash(hx,lvl,key); + chx = hashmap_shift_hash(chx,clvl,ckey); + } while (lvl < HAMT_MAX_LEVEL); + } + size += HAMT_COLLISION_NODE_SZ(2); + ESTACK_PUSH2(*sp, 1, node); unroll: *sz = size + /* res cons */ 2; @@ -2583,17 +2673,29 @@ Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value, Eterm *nhp = NULL; Uint32 ix, cix, bp, hval; Uint slot, n; - /* Needed for halfword */ - DeclareTmpHeapNoproc(fake,1); - UseTmpHeapNoproc(1); + Eterm fake; res = CONS(hp, key, value); hp += 2; do { node = ESTACK_POP(*sp); switch(primary_tag(node)) { - case TAG_PRIMARY_LIST: - ix = (Uint32) ESTACK_POP(*sp); + case TAG_PRIMARY_LIST: { + const int is_collision_node = (int) ESTACK_POP(*sp); + if (is_collision_node) { + nhp = hp; + *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(2); + if (CMP_TERM(key, CAR(list_val(node))) < 0){ + *hp++ = res; + *hp++ = node; + } else { + *hp++ = node; + *hp++ = res; + } + res = make_hashmap(nhp); + break; + } + ix = (Uint32)ESTACK_POP(*sp); cix = (Uint32) ESTACK_POP(*sp); nhp = hp; @@ -2607,10 +2709,11 @@ Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value, } res = make_hashmap(nhp); break; + } case TAG_PRIMARY_HEADER: /* subnodes, fake it */ - *fake = node; - node = make_boxed(fake); + fake = node; + node = make_boxed(&fake); case TAG_PRIMARY_BOXED: ptr = boxed_val(node); hdr = *ptr; @@ -2663,9 +2766,27 @@ Eterm erts_hashmap_insert_up(Eterm *hp, Eterm key, Eterm value, } res = make_hashmap(nhp); break; - default: - erts_exit(ERTS_ERROR_EXIT, "bad header tag %x\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); - break; + default: { + int is_insert; + ERTS_ASSERT(is_arity_value(hdr)); + n = arityval(hdr); + ASSERT(n >= 2); + is_insert = (int) ESTACK_POP(*sp); + slot = (Uint) ESTACK_POP(*sp); + nhp = hp; + n += is_insert; + *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n); ptr++; + ix = 0; + while (ix++ < slot) + *hp++ = *ptr++; + *hp++ = res; + if (!is_insert) + ptr++; + while (ix++ < n) + *hp++ = *ptr++; + res = make_hashmap(nhp); + break; + } } break; default: @@ -2844,9 +2965,22 @@ static Eterm hashmap_delete(Process *p, Uint32 hx, Eterm key, /* not occupied */ res = THE_NON_VALUE; goto not_found; - default: - erts_exit(ERTS_ERROR_EXIT, "bad header tag %ld\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); - break; + default: /* collision node */ + ERTS_ASSERT(is_arity_value(hdr)); + n = arityval(hdr); + ASSERT(n >= 2); + for (slot = 0; slot < n; slot++) { + Eterm* kv = list_val(ptr[1+slot]); + if (EQ(key, CAR(kv))) { + if (value) + *value = CDR(kv); + ESTACK_PUSH2(stack, slot, node); + size += HAMT_COLLISION_NODE_SZ(n); + goto unroll; + } + } + res = THE_NON_VALUE; + goto not_found; } break; default: @@ -3028,8 +3162,25 @@ unroll: } res = make_hashmap(nhp); break; - default: - erts_exit(ERTS_ERROR_EXIT, "bad header tag %x\r\n", hdr & _HEADER_MAP_SUBTAG_MASK); + default: /* collision node */ + ERTS_ASSERT(is_arity_value(hdr)); + n = arityval(hdr); + ASSERT(n >= 2); + slot = (Uint) ESTACK_POP(stack); + ASSERT(slot < n); + if (n > 2) { /* Shrink collision node */ + nhp = hp; + *hp++ = MAP_HEADER_HAMT_COLLISION_NODE(n-1); ptr++; + n -= slot + 1; + while (slot--) { *hp++ = *ptr++; } + ptr++; + while(n--) { *hp++ = *ptr++; } + res = make_hashmap(nhp); + } + else { /* Collapse collision node */ + ASSERT(res == THE_NON_VALUE); + res = ptr[1 + (1-slot)]; + } break; } } while(!ESTACK_ISEMPTY(stack)); @@ -3304,8 +3455,10 @@ BIF_RETTYPE erts_internal_map_hashmap_children_1(BIF_ALIST_1) { sz = 16; ptr += 2; break; - default: - erts_exit(ERTS_ERROR_EXIT, "bad header\r\n"); + default: /* collision node */ + ERTS_ASSERT(is_arity_value(hdr)); + sz = arityval(hdr); + ASSERT(sz >= 2); break; } ASSERT(sz < 17); @@ -3327,15 +3480,17 @@ static Eterm hashmap_info(Process *p, Eterm node) { DECL_AM(leafs); DECL_AM(bitmaps); DECL_AM(arrays); - Uint nleaf=0, nbitmap=0, narray=0; - Uint bitmap_usage[16], leaf_usage[16]; - Uint lvl = 0, clvl; + DECL_AM(collisions); + Uint nleaf=0, nbitmap=0, narray=0, ncollision = 0; + Uint bitmap_usage[16]; + Uint collision_usage[16]; + Uint leaf_usage[HAMT_MAX_LEVEL + 2]; + Uint max_depth = 0, clvl; DECLARE_ESTACK(stack); - for (sz = 0; sz < 16; sz++) { - bitmap_usage[sz] = 0; - leaf_usage[sz] = 0; - } + sys_memzero(bitmap_usage, sizeof(bitmap_usage)); + sys_memzero(collision_usage, sizeof(collision_usage)); + sys_memzero(leaf_usage, sizeof(leaf_usage)); ptr = boxed_val(node); ESTACK_PUSH(stack, 0); @@ -3343,8 +3498,6 @@ static Eterm hashmap_info(Process *p, Eterm node) { do { node = ESTACK_POP(stack); clvl = ESTACK_POP(stack); - if (lvl < clvl) - lvl = clvl; switch(primary_tag(node)) { case TAG_PRIMARY_LIST: nleaf++; @@ -3360,45 +3513,49 @@ static Eterm hashmap_info(Process *p, Eterm node) { sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); ASSERT(sz < 17); bitmap_usage[sz-1] += 1; - while(sz--) { - ESTACK_PUSH(stack, clvl + 1); - ESTACK_PUSH(stack, ptr[sz+1]); - } break; case HAMT_SUBTAG_HEAD_BITMAP: nbitmap++; sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); bitmap_usage[sz-1] += 1; - while(sz--) { - ESTACK_PUSH(stack, clvl + 1); - ESTACK_PUSH(stack, ptr[sz+2]); - } + ptr++; break; case HAMT_SUBTAG_HEAD_ARRAY: narray++; sz = 16; - while(sz--) { - ESTACK_PUSH(stack, clvl + 1); - ESTACK_PUSH(stack, ptr[sz+2]); - } - break; - default: - erts_exit(ERTS_ERROR_EXIT, "bad header\r\n"); + ptr++; break; + default: /* collision node */ + ERTS_ASSERT(is_arity_value(hdr)); + ncollision++; + sz = arityval(hdr); + ASSERT(sz >= 2); + collision_usage[(sz > 16 ? 16 : sz) - 1] += 1; + break; } + ASSERT(sz >= 1); + clvl++; + ASSERT(clvl <= HAMT_MAX_LEVEL+1); + if (max_depth < clvl) + max_depth = clvl; + while(sz--) { + ESTACK_PUSH(stack, clvl); + ESTACK_PUSH(stack, ptr[sz+1]); + } } } while(!ESTACK_ISEMPTY(stack)); /* size */ sz = 0; - hashmap_bld_tuple_uint(NULL,&sz,16,leaf_usage); - hashmap_bld_tuple_uint(NULL,&sz,16,bitmap_usage); + hashmap_bld_tuple_uint(NULL, &sz, HAMT_MAX_LEVEL+2, leaf_usage); + hashmap_bld_tuple_uint(NULL, &sz, 16, bitmap_usage); + hashmap_bld_tuple_uint(NULL, &sz, 16, collision_usage); /* alloc */ - hp = HAlloc(p, 2+3 + 3*(2+4) + sz); + hp = HAlloc(p, 2+3 + 4*(2+4) + sz); - info = hashmap_bld_tuple_uint(&hp,NULL,16,leaf_usage); + info = hashmap_bld_tuple_uint(&hp, NULL, HAMT_MAX_LEVEL+2, leaf_usage); tup = TUPLE3(hp, AM_leafs, make_small(nleaf),info); hp += 4; res = CONS(hp, tup, res); hp += 2; @@ -3406,10 +3563,14 @@ static Eterm hashmap_info(Process *p, Eterm node) { tup = TUPLE3(hp, AM_bitmaps, make_small(nbitmap), info); hp += 4; res = CONS(hp, tup, res); hp += 2; + info = hashmap_bld_tuple_uint(&hp, NULL, 16, collision_usage); + tup = TUPLE3(hp, AM_collisions, make_small(ncollision), info); hp += 4; + res = CONS(hp, tup, res); hp += 2; + tup = TUPLE3(hp, AM_arrays, make_small(narray),NIL); hp += 4; res = CONS(hp, tup, res); hp += 2; - tup = TUPLE2(hp, AM_depth, make_small(lvl)); hp += 3; + tup = TUPLE2(hp, AM_depth, make_small(max_depth)); hp += 3; res = CONS(hp, tup, res); hp += 2; DESTROY_ESTACK(stack); @@ -3437,12 +3598,16 @@ static Eterm hashmap_bld_tuple_uint(Uint **hpp, Uint *szp, Uint n, Uint nums[]) * Since each hashmap node can only be up to 16 elements * large we use 4 bits per level in the path. * - * So a Path with value 0x110 will first get the 0:th + * So a Path with value 0x210 will first get the 0:th * slot in the head node, and then the 1:st slot in the - * resulting node and then finally the 1:st slot in the + * resulting node and then finally the 2:st slot in the * node beneath. If that slot is not a leaf, then the path * continues down the 0:th slot until it finds a leaf. * + * Collision nodes may (theoretically and in debug) have more + * than 16 elements. To not complicate the 4-bit path format + * we avoid yielding in collision nodes. + * * Once the leaf has been found, the return value is created * by traversing the tree using the stack that was built * when searching for the first leaf to return. @@ -3531,7 +3696,12 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) { Uint path_length = 0; Uint *path_rest = NULL; int i, elems, orig_elems; - Eterm node = map, res, *patch_ptr = NULL, *hp; + Eterm node = map, res, *patch_ptr = NULL; + Eterm *hp = NULL; + Eterm *hp_end; + Eterm *ptr; + Uint sz, words_per_elem; + Uint idx; /* A stack WSTACK is used when traversing the hashmap. * It contains: node, idx, sz, ptr @@ -3589,61 +3759,29 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) { BIF_ERROR(BIF_P, BADARG); } - if (type == iterator) { - /* - * Iterator uses the format {K1, V1, {K2, V2, {K3, V3, [Path | Map]}}}, - * so each element is 4 words large. - * To make iteration order independent of input reductions - * the KV-pairs are here built in DESTRUCTIVE non-reverse order. - */ - hp = HAlloc(BIF_P, 4 * elems); - } else { - /* - * List used the format [Path, Map, {K3,V3}, {K2,V2}, {K1,V1} | BIF_ARG_3], - * so each element is 2+3 words large. - * To make list order independent of input reductions - * the KV-pairs are here built in FUNCTIONAL reverse order - * as this is how the list as a whole is constructed. - */ - hp = HAlloc(BIF_P, (2 + 3) * elems); - } - - orig_elems = elems; - /* First we look for the leaf to start at using the path given. While doing so, we push each map node and the index onto the stack to use later. */ for (i = 1; ; i++) { - Eterm *ptr = hashmap_val(node), - hdr = *ptr++; - Uint sz; + Eterm hdr; + + ptr = hashmap_val(node); + hdr = *ptr++; sz = hashmap_node_size(hdr, &ptr); - if (PATH_ELEM(curr_path) >= sz) + idx = PATH_ELEM(curr_path); + if (idx >= sz) goto badarg; - WSTACK_PUSH4(stack, node, PATH_ELEM(curr_path)+1, sz, (UWord)ptr); - - /* We have found a leaf, return it and the next X elements */ - if (is_list(ptr[PATH_ELEM(curr_path)])) { - Eterm *lst = list_val(ptr[PATH_ELEM(curr_path)]); - if (type == iterator) { - res = make_tuple(hp); - hp[0] = make_arityval(3); - hp[1] = CAR(lst); - hp[2] = CDR(lst); - patch_ptr = &hp[3]; - hp += 4; - } else { - Eterm tup = TUPLE2(hp, CAR(lst), CDR(lst)); hp += 3; - res = CONS(hp, tup, BIF_ARG_3); hp += 2; - } - elems--; + if (is_list(ptr[idx])) { + /* We have found a leaf, return it and the next X elements */ break; } - node = ptr[PATH_ELEM(curr_path)]; + WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr); + + node = ptr[idx]; curr_path >>= PATH_ELEM_SIZE; @@ -3661,12 +3799,50 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) { } } + if (type == iterator) { + /* + * Iterator uses the format {K1, V1, {K2, V2, {K3, V3, [Path | Map]}}}, + * so each element is 4 words large. + * To make iteration order independent of input reductions + * the KV-pairs are here built in DESTRUCTIVE non-reverse order. + */ + words_per_elem = 4; + patch_ptr = &res; + } else { + /* + * List used the format [Path, Map, {K3,V3}, {K2,V2}, {K1,V1} | BIF_ARG_3], + * so each element is 2+3 words large. + * To make list order independent of input reductions + * the KV-pairs are here built in FUNCTIONAL reverse order + * as this is how the list as a whole is constructed. + */ + words_per_elem = 2 + 3; + res = BIF_ARG_3; + } + hp = HAlloc(BIF_P, words_per_elem * elems); + hp_end = hp + words_per_elem * elems; + + orig_elems = elems; + /* We traverse the hashmap and return at most `elems` elements */ while(1) { - Eterm *ptr = (Eterm*)WSTACK_POP(stack); - Uint sz = (Uint)WSTACK_POP(stack); - Uint idx = (Uint)WSTACK_POP(stack); - Eterm node = (Eterm)WSTACK_POP(stack); + + if (idx == 0) { + if (elems < sz && is_arity_value(*hashmap_val(node))) { + /* + * This is a collision node! + * Make sure 'elems' is large enough not to yield in the + * middle of it. Collision nodes may be larger than 16 + * and that would complicate the 4-bit path format. + */ + elems = sz; + HRelease(BIF_P, hp_end, hp); + hp = HAlloc(BIF_P, words_per_elem * elems); + hp_end = hp + words_per_elem * elems; + } + } + else + ASSERT(!is_arity_value(*hashmap_val(node))); while (idx < sz && elems != 0 && is_list(ptr[idx])) { Eterm *lst = list_val(ptr[idx]); @@ -3685,6 +3861,8 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) { idx++; } + ASSERT(idx == sz || !is_arity_value(*hashmap_val(node))); + if (elems == 0) { if (idx < sz) { /* There are more elements in this node to explore */ @@ -3703,26 +3881,29 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) { } } break; - } else { - if (idx < sz) { - Eterm hdr; - /* Push next idx in current node */ - WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr); - - /* Push first idx in child node */ - node = ptr[idx]; - ptr = hashmap_val(ptr[idx]); - hdr = *ptr++; - sz = hashmap_node_size(hdr, &ptr); - WSTACK_PUSH4(stack, node, 0, sz, (UWord)ptr); - } } - - /* There are no more element in the hashmap */ - if (WSTACK_ISEMPTY(stack)) { + else if (idx < sz) { + Eterm hdr; + /* Push next idx in current node */ + WSTACK_PUSH4(stack, node, idx+1, sz, (UWord)ptr); + + /* Continue with first idx in child node */ + node = ptr[idx]; + ptr = hashmap_val(ptr[idx]); + hdr = *ptr++; + sz = hashmap_node_size(hdr, &ptr); + idx = 0; + } + else if (!WSTACK_ISEMPTY(stack)) { + ptr = (Eterm*)WSTACK_POP(stack); + sz = (Uint)WSTACK_POP(stack); + idx = (Uint)WSTACK_POP(stack); + node = (Eterm)WSTACK_POP(stack); + } + else { + /* There are no more element in the hashmap */ break; } - } if (!WSTACK_ISEMPTY(stack)) { @@ -3781,24 +3962,16 @@ BIF_RETTYPE erts_internal_map_next_3(BIF_ALIST_3) { res = CONS(hp, path, res); hp += 2; } } else { - if (type == iterator) { + if (type == iterator) *patch_ptr = am_none; - HRelease(BIF_P, hp + 4 * elems, hp); - } else { - HRelease(BIF_P, hp + (2+3) * elems, hp); - } + HRelease(BIF_P, hp_end, hp); } BIF_P->fcalls -= 4 * (orig_elems - elems); DESTROY_WSTACK(stack); BIF_RET(res); badarg: - if (type == iterator) { - HRelease(BIF_P, hp + 4 * elems, hp); - } else { - HRelease(BIF_P, hp + (2+3) * elems, hp); - } - BIF_P->fcalls -= 4 * (orig_elems - elems); + ASSERT(hp == NULL); DESTROY_WSTACK(stack); BIF_ERROR(BIF_P, BADARG); } diff --git a/erts/emulator/beam/erl_map.h b/erts/emulator/beam/erl_map.h index 3430ec6d7a..a8f9271e99 100644 --- a/erts/emulator/beam/erl_map.h +++ b/erts/emulator/beam/erl_map.h @@ -56,17 +56,15 @@ typedef struct flatmap_s { /* the head-node is a bitmap or array with an untagged size */ #define hashmap_size(x) (((hashmap_head_t*) hashmap_val(x))->size) -#define hashmap_make_hash(Key) make_map_hash(Key, 0) +#define hashmap_make_hash(Key) make_map_hash(Key) #define hashmap_restore_hash(Lvl, Key) \ - (((Lvl) < 8) ? \ - hashmap_make_hash(Key) >> (4*(Lvl)) : \ - make_map_hash(Key, ((Lvl) >> 3)) >> (4 * ((Lvl) & 7))) + (ASSERT(Lvl < 8), \ + hashmap_make_hash(Key) >> (4*(Lvl))) #define hashmap_shift_hash(Hx, Lvl, Key) \ - (((++(Lvl)) & 7) ? \ - (Hx) >> 4 : \ - make_map_hash(Key, ((Lvl) >> 3))) + (++(Lvl), ASSERT(Lvl <= HAMT_MAX_LEVEL), /* we allow one level too much */\ + (Hx) >> 4) /* erl_term.h stuff */ #define flatmap_get_values(x) (((Eterm *)(x)) + sizeof(flatmap_t)/sizeof(Eterm)) @@ -107,7 +105,7 @@ Eterm erts_hashmap_from_array(ErtsHeapFactory*, Eterm *leafs, Uint n, int rejec Eterm erts_map_from_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, Uint n); Eterm erts_map_from_sorted_ks_and_vs(ErtsHeapFactory *factory, Eterm *ks0, Eterm *vs0, - Uint n, Eterm *key_tuple); + Uint n); Eterm erts_hashmap_from_ks_and_vs_extra(ErtsHeapFactory *factory, Eterm *ks, Eterm *vs, Uint n, Eterm k, Eterm v); @@ -151,7 +149,8 @@ typedef struct hashmap_head_s { /* erl_map.h stuff */ -#define is_hashmap_header_head(x) ((MAP_HEADER_TYPE(x) & (0x2))) +#define is_hashmap_header_head(x) (MAP_HEADER_TYPE(x) & (0x2)) +#define is_hashmap_header_node(x) (MAP_HEADER_TYPE(x) == 1) #define MAKE_MAP_HEADER(Type,Arity,Val) \ (_make_header(((((Uint16)(Val)) << MAP_HEADER_ARITY_SZ) | (Arity)) << MAP_HEADER_TAG_SZ | (Type) , _TAG_HEADER_MAP)) @@ -168,12 +167,21 @@ typedef struct hashmap_head_s { #define MAP_HEADER_HAMT_NODE_BITMAP(Bmp) \ MAKE_MAP_HEADER(MAP_HEADER_TAG_HAMT_NODE_BITMAP,0x0,Bmp) +#define MAP_HEADER_HAMT_COLLISION_NODE(Arity) make_arityval(Arity) + #define MAP_HEADER_FLATMAP_SZ (sizeof(flatmap_t) / sizeof(Eterm)) #define HAMT_NODE_ARRAY_SZ (17) #define HAMT_HEAD_ARRAY_SZ (18) #define HAMT_NODE_BITMAP_SZ(n) (1 + n) #define HAMT_HEAD_BITMAP_SZ(n) (2 + n) +#define HAMT_COLLISION_NODE_SZ(n) (1 + n) +/* + * Collision nodes are used when all hash bits have been exhausted. + * They are normal tuples of arity 2 or larger. The elements of a collision + * node tuple contain key-value cons cells like the other nodes, + * but they are sorted in map-key order. + */ /* 2 bits maps tag + 4 bits subtag + 2 ignore bits */ #define _HEADER_MAP_SUBTAG_MASK (0xfc) @@ -187,11 +195,17 @@ typedef struct hashmap_head_s { #define hashmap_index(hash) (((Uint32)hash) & 0xf) +#define HAMT_MAX_LEVEL 8 + /* hashmap heap size: [one cons cell + one list term in parent node] per key [one header + one boxed term in parent node] per inner node [one header + one size word] for root node Observed average number of nodes per key is about 0.35. + + Amendment: This size estimation does not take collision nodes into account. + It should be good enough though, as collision nodes are rare + and only make the size smaller compared to unlimited HAMT depth. */ #define HASHMAP_WORDS_PER_KEY 3 #define HASHMAP_WORDS_PER_NODE 2 diff --git a/erts/emulator/beam/erl_term.h b/erts/emulator/beam/erl_term.h index 5ec1d885e1..73c155ef27 100644 --- a/erts/emulator/beam/erl_term.h +++ b/erts/emulator/beam/erl_term.h @@ -330,7 +330,7 @@ _ET_DECLARE_CHECKED(Uint,header_arity,Eterm) #define is_sane_arity_value(x) ((((x) & _TAG_HEADER_MASK) == _TAG_HEADER_ARITYVAL) && \ (((x) >> _HEADER_ARITY_OFFS) <= MAX_ARITYVAL)) #define is_not_arity_value(x) (!is_arity_value((x))) -#define _unchecked_arityval(x) _unchecked_header_arity((x)) +#define _unchecked_arityval(x) ((x) >> _HEADER_ARITY_OFFS) _ET_DECLARE_CHECKED(Uint,arityval,Eterm) #define arityval(x) _ET_APPLY(arityval,(x)) diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index e83881f52f..1585bbc6ec 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -71,9 +71,15 @@ Sint erts_list_length(Eterm); int erts_is_builtin(Eterm, Eterm, int); Uint32 make_hash2(Eterm); Uint32 trapping_make_hash2(Eterm, Eterm*, struct process*); +#ifdef DEBUG +# define DBG_HASHMAP_COLLISION_BONANZA +#endif +#ifdef DBG_HASHMAP_COLLISION_BONANZA +Uint32 erts_dbg_hashmap_collision_bonanza(Uint32 hash, Eterm key); +#endif Uint32 make_hash(Eterm); Uint32 make_internal_hash(Eterm, Uint32 salt); -Uint32 make_map_hash(Eterm key, int level); +Uint32 make_map_hash(Eterm key); void erts_save_emu_args(int argc, char **argv); Eterm erts_get_emu_args(struct process *c_p); diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index a95e098c59..d0a2c61834 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -3011,7 +3011,6 @@ dec_pid(ErtsDistExternal *edep, ErtsHeapFactory* factory, const byte* ep, #define ENC_BIN_COPY ((Eterm) 3) #define ENC_MAP_PAIR ((Eterm) 4) #define ENC_HASHMAP_NODE ((Eterm) 5) -#define ENC_STORE_MAP_ELEMENT ((Eterm) 6) #define ENC_START_SORTING_MAP ((Eterm) 7) #define ENC_CONTINUE_SORTING_MAP ((Eterm) 8) #define ENC_PUSH_SORTED_MAP ((Eterm) 9) @@ -3180,18 +3179,32 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, case ENC_HASHMAP_NODE: if (is_list(obj)) { /* leaf node [K|V] */ ptr = list_val(obj); + if (dflags & DFLAG_DETERMINISTIC) { + *next_map_element++ = CAR(ptr); + *next_map_element++ = CDR(ptr); + goto outer_loop; + } WSTACK_PUSH2(s, ENC_TERM, CDR(ptr)); obj = CAR(ptr); } - break; - case ENC_STORE_MAP_ELEMENT: /* option `deterministic` */ - if (is_list(obj)) { /* leaf node [K|V] */ - ptr = list_val(obj); - *next_map_element++ = CAR(ptr); - *next_map_element++ = CDR(ptr); + else if (is_tuple(obj)) { /* collision node */ + Uint tpl_sz; + ptr = tuple_val(obj); + tpl_sz = arityval(*ptr); + ASSERT(tpl_sz >= 2); + ptr++; + WSTACK_RESERVE(s, tpl_sz * 2); + while(tpl_sz--) { + ASSERT(is_list(*ptr)); + WSTACK_FAST_PUSH(s, ENC_HASHMAP_NODE); + WSTACK_FAST_PUSH(s, *ptr++); + } goto outer_loop; - } - break; + } + else + ASSERT((*boxed_val(obj) & _HEADER_MAP_SUBTAG_MASK) + == HAMT_SUBTAG_NODE_BITMAP); + break; case ENC_START_SORTING_MAP: /* option `deterministic` */ { long num_reductions = r; @@ -3474,7 +3487,6 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, } else { Eterm hdr; Uint node_sz; - Eterm node_processor; ptr = boxed_val(obj); hdr = *ptr; ASSERT(is_header(hdr)); @@ -3515,17 +3527,14 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, node_sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); ASSERT(node_sz < 17); break; - default: - erts_exit(ERTS_ERROR_EXIT, "bad header\r\n"); - } - + default: + erts_exit(ERTS_ERROR_EXIT, "bad header\r\n"); + } ptr++; - node_processor = (dflags & DFLAG_DETERMINISTIC) ? - ENC_STORE_MAP_ELEMENT : ENC_HASHMAP_NODE; WSTACK_RESERVE(s, node_sz*2); while(node_sz--) { - WSTACK_FAST_PUSH(s, node_processor); - WSTACK_FAST_PUSH(s, *ptr++); + WSTACK_FAST_PUSH(s, ENC_HASHMAP_NODE); + WSTACK_FAST_PUSH(s, *ptr++); } } break; @@ -5143,8 +5152,8 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, node_sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); ASSERT(node_sz < 17); break; - default: - erts_exit(ERTS_ERROR_EXIT, "bad header\r\n"); + default: + erts_exit(ERTS_ERROR_EXIT, "bad header\r\n"); } ptr++; diff --git a/erts/emulator/beam/jit/arm/instr_map.cpp b/erts/emulator/beam/jit/arm/instr_map.cpp index 36d0d95433..651461112d 100644 --- a/erts/emulator/beam/jit/arm/instr_map.cpp +++ b/erts/emulator/beam/jit/arm/instr_map.cpp @@ -29,13 +29,10 @@ extern "C" } static const Uint32 INTERNAL_HASH_SALT = 3432918353; -static const Uint32 HCONST_22 = 0x98C475E6UL; static const Uint32 HCONST = 0x9E3779B9; -/* ARG3 = incoming hash - * ARG6 = lower 32 +/* ARG6 = lower 32 * ARG7 = upper 32 - * ARG8 = type constant * * Helper function for calculating the internal hash of keys before looking * them up in a map. @@ -49,6 +46,9 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { a64::Gp hash = ARG3.w(), lower = ARG6.w(), upper = ARG7.w(), constant = ARG8.w(); + mov_imm(hash, INTERNAL_HASH_SALT); + mov_imm(constant, HCONST); + a.add(lower, lower, constant); a.add(upper, upper, constant); @@ -75,6 +75,24 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { } } +#ifdef DBG_HASHMAP_COLLISION_BONANZA + emit_enter_runtime_frame(); + emit_enter_runtime(); + + a.stp(ARG1, ARG2, TMP_MEM1q); + a.str(ARG4, TMP_MEM3q); + + a.mov(ARG1, ARG3); + runtime_call<2>(erts_dbg_hashmap_collision_bonanza); + a.mov(ARG3, ARG1); + + a.ldp(ARG1, ARG2, TMP_MEM1q); + a.ldr(ARG4, TMP_MEM3q); + + emit_leave_runtime(); + emit_leave_runtime_frame(); +#endif + a.ret(a64::x30); } @@ -88,7 +106,7 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { Label node_loop = a.newLabel(); arm::Gp node = ARG1, key = ARG2, key_hash = ARG3, header_val = ARG4, - depth = TMP4, index = TMP5, one = TMP6; + depth = TMP5, index = TMP6; const int header_shift = (_HEADER_ARITY_OFFS + MAP_HEADER_TAG_SZ + MAP_HEADER_ARITY_SZ); @@ -100,8 +118,9 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { a.bind(node_loop); { - Label fail = a.newLabel(), leaf_node = a.newLabel(), - skip_index_adjustment = a.newLabel(), update_hash = a.newLabel(); + Label done = a.newLabel(), leaf_node = a.newLabel(), + skip_index_adjustment = a.newLabel(), + collision_node = a.newLabel(); /* Find out which child we should follow, and shift the hash for the * next round. */ @@ -121,7 +140,7 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { * Note that we jump directly to the return sequence as ZF is clear * at this point. */ a.lsr(TMP1, header_val, index); - a.tbz(TMP1, imm(0), fail); + a.tbz(TMP1, imm(0), done); /* The actual offset of our entry is the number of bits set (in * essence "entries present") before our index in the bitmap. Clear @@ -145,11 +164,11 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { * word. */ a.ldr(header_val, arm::Mem(node).post(sizeof(Eterm))); - /* After 8 nodes we've run out of the 32 bits we started with, so we - * need to update the hash to keep going. */ - a.tst(depth, imm(0x7)); - a.b_eq(update_hash); - a.b(node_loop); + /* After 8 nodes we've run out of the 32 bits we started with + * and we end up in a collision node. */ + a.cmp(depth, imm(HAMT_MAX_LEVEL)); + a.b_ne(node_loop); + a.b(collision_node); a.bind(leaf_node); { @@ -159,36 +178,33 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { a.cmp(TMP1, key); /* See comment at the jump. */ - a.bind(fail); + a.bind(done); a.ret(a64::x30); } - /* After 8 nodes we've run out of the 32 bits we started with, so we - * must calculate a new hash to continue. - * - * This is a manual expansion `make_map_hash` from utils.c, and all - * changes to that function must be mirrored here. */ - a.bind(update_hash); + /* A collision node is a tuple of leafs where we do linear search.*/ + a.bind(collision_node); { - emit_enter_runtime_frame(); + Label linear_loop = a.newLabel(); + + a.lsr(TMP1, header_val, imm(_HEADER_ARITY_OFFS - 3)); - /* NOTE: ARG3 (key_hash) is always 0 at this point. */ - a.lsr(ARG6, depth, imm(3)); - mov_imm(ARG7, 1); - mov_imm(ARG8, HCONST_22); - a.bl(labels[internal_hash_helper]); + a.bind(linear_loop); + { + a.sub(TMP1, TMP1, imm(8)); - mov_imm(TMP1, INTERNAL_HASH_SALT); - a.eor(ARG3, ARG3, TMP1); + a.ldr(TMP2, arm::Mem(node, TMP1)); - a.mov(ARG6.w(), key.w()); - a.lsr(ARG7, key, imm(32)); - mov_imm(ARG8, HCONST); - a.bl(labels[internal_hash_helper]); + emit_untag_ptr(TMP2, TMP2); + a.ldp(TMP3, TMP4, arm::Mem(TMP2)); + a.cmp(key, TMP3); + a.csel(ARG1, node, TMP4, imm(arm::CondCode::kNE)); + a.b_eq(done); - emit_leave_runtime_frame(); + a.cbnz(TMP1, linear_loop); + } - a.b(node_loop); + a.ret(a64::x30); } } } @@ -336,10 +352,8 @@ void BeamGlobalAssembler::emit_i_get_map_element_shared() { emit_enter_runtime_frame(); /* Calculate the internal hash of ARG2 before diving into the HAMT. */ - mov_imm(ARG3, INTERNAL_HASH_SALT); a.mov(ARG6.w(), ARG2.w()); a.lsr(ARG7, ARG2, imm(32)); - mov_imm(ARG8, HCONST); a.bl(labels[internal_hash_helper]); emit_leave_runtime_frame(); diff --git a/erts/emulator/beam/jit/x86/instr_map.cpp b/erts/emulator/beam/jit/x86/instr_map.cpp index cf0063e967..4ead792fab 100644 --- a/erts/emulator/beam/jit/x86/instr_map.cpp +++ b/erts/emulator/beam/jit/x86/instr_map.cpp @@ -29,13 +29,11 @@ extern "C" } static const Uint32 INTERNAL_HASH_SALT = 3432918353; -static const Uint32 HCONST_22 = 0x98C475E6UL; static const Uint32 HCONST = 0x9E3779B9; -/* ARG3 = incoming hash +/* * ARG4 = lower 32 * ARG5 = upper 32 - * ARG6 = type constant * * Helper function for calculating the internal hash of keys before looking * them up in a map. @@ -48,8 +46,9 @@ static const Uint32 HCONST = 0x9E3779B9; void BeamGlobalAssembler::emit_internal_hash_helper() { x86::Gp hash = ARG3d, lower = ARG4d, upper = ARG5d; - a.add(lower, ARG6d); - a.add(upper, ARG6d); + a.mov(hash, imm(INTERNAL_HASH_SALT)); + a.add(lower, imm(HCONST)); + a.add(upper, imm(HCONST)); using rounds = std::initializer_list<std::tuple<x86::Gp, x86::Gp, x86::Gp, int>>; @@ -80,6 +79,23 @@ void BeamGlobalAssembler::emit_internal_hash_helper() { a.xor_(r_a, ARG6d); } +#ifdef DBG_HASHMAP_COLLISION_BONANZA + a.mov(TMP_MEM1q, ARG1); + a.mov(TMP_MEM2q, ARG2); + a.mov(TMP_MEM3q, RET); + + a.mov(ARG1, ARG3); + emit_enter_runtime(); + runtime_call<2>(erts_dbg_hashmap_collision_bonanza); + emit_leave_runtime(); + + a.mov(ARG3d, RETd); + + a.mov(ARG1, TMP_MEM1q); + a.mov(ARG2, TMP_MEM2q); + a.mov(RET, TMP_MEM3q); +#endif + a.ret(); } @@ -101,8 +117,9 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { a.bind(node_loop); { - Label fail = a.newLabel(), leaf_node = a.newLabel(), - skip_index_adjustment = a.newLabel(), update_hash = a.newLabel(); + Label done = a.newLabel(), leaf_node = a.newLabel(), + skip_index_adjustment = a.newLabel(), + collision_node = a.newLabel(); /* Find out which child we should follow, and shift the hash for the * next round. */ @@ -123,7 +140,7 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { * Note that we jump directly to a `RET` instruction, as `BT` only * affects CF, and ZF ("not found") is clear at this point. */ a.bt(header_val, index); - a.short_().jnc(fail); + a.short_().jnc(done); /* The actual offset of our entry is the number of bits set (in * essence "entries present") before our index in the bitmap. */ @@ -146,11 +163,11 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { /* Nope, we have to search another node. */ a.mov(header_val, emit_boxed_val(node, 0, sizeof(Uint32))); - /* After 8 nodes we've run out of the 32 bits we started with, so we - * need to update the hash to keep going. */ - a.test(depth, imm(0x7)); - a.short_().jz(update_hash); - a.short_().jmp(node_loop); + /* After 8 nodes we've run out of the 32 bits we started with + * and we end up in a collision node. */ + a.test(depth, imm(HAMT_MAX_LEVEL - 1)); + a.short_().jnz(node_loop); + a.short_().jmp(collision_node); a.bind(leaf_node); { @@ -160,36 +177,33 @@ void BeamGlobalAssembler::emit_hashmap_get_element() { a.mov(RET, getCDRRef(node)); /* See comment at the jump. */ - a.bind(fail); + a.bind(done); a.ret(); } - /* After 8 nodes we've run out of the 32 bits we started with, so we - * must calculate a new hash to continue. - * - * This is a manual expansion `make_map_hash` from utils.c, and all - * changes to that function must be mirrored here. */ - a.bind(update_hash); + /* A collision node is a tuple of leafs where we do linear search.*/ + a.bind(collision_node); { - a.mov(TMP_MEM1d, depth); + Label linear_loop = a.newLabel(); - /* NOTE: ARG3d is always 0 at this point. */ - a.mov(ARG4d, depth); - a.shr(ARG4d, imm(3)); - mov_imm(ARG5d, 1); - a.mov(ARG6d, imm(HCONST_22)); - a.call(labels[internal_hash_helper]); + a.shr(header_val, imm(_HEADER_ARITY_OFFS)); + a.lea(ARG6d, x86::qword_ptr(header_val, -1)); - a.xor_(ARG3d, imm(INTERNAL_HASH_SALT)); - a.mov(ARG4d, key.r32()); - a.mov(ARG5, key); - a.shr(ARG5, imm(32)); - a.mov(ARG6d, imm(HCONST)); - a.call(labels[internal_hash_helper]); + a.bind(linear_loop); + { + a.mov(ARG3, + x86::qword_ptr(node, ARG6, 3, 8 - TAG_PRIMARY_BOXED)); + + emit_ptr_val(ARG3, ARG3); + a.cmp(key, getCARRef(ARG3)); + a.mov(RET, getCDRRef(ARG3)); + a.short_().jz(done); - a.mov(depth, TMP_MEM1d); + a.dec(ARG6d); + a.short_().jns(linear_loop); + } - a.jmp(node_loop); + a.ret(); } } } @@ -330,8 +344,6 @@ void BeamGlobalAssembler::emit_i_get_map_element_shared() { a.shr(ARG5, imm(32)); a.mov(ARG4d, ARG2d); - a.mov(ARG3d, imm(INTERNAL_HASH_SALT)); - a.mov(ARG6d, imm(HCONST)); a.call(labels[internal_hash_helper]); emit_hashmap_get_element(); diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index 9feb3aab0e..1baeb44835 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -1640,15 +1640,12 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_ * We therefore calculate context independent hashes for all . * key-value pairs and then xor them together. */ - ESTACK_PUSH(s, hash_xor_pairs); - ESTACK_PUSH(s, hash); - ESTACK_PUSH(s, HASH_MAP_TAIL); + ESTACK_PUSH3(s, hash_xor_pairs, hash, HASH_MAP_TAIL); hash = 0; hash_xor_pairs = 0; for (ctx.i = ctx.size - 1; ctx.i >= 0; ctx.i--) { - ESTACK_PUSH(s, HASH_MAP_PAIR); - ESTACK_PUSH(s, ctx.vs[ctx.i]); - ESTACK_PUSH(s, ctx.ks[ctx.i]); + ESTACK_PUSH3(s, HASH_MAP_PAIR, + ctx.vs[ctx.i], ctx.ks[ctx.i]); TRAP_LOCATION(hamt_subtag_head_flatmap); } goto hash2_common; @@ -1660,9 +1657,7 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_ UINT32_HASH(size, HCONST_16); if (size == 0) goto hash2_common; - ESTACK_PUSH(s, hash_xor_pairs); - ESTACK_PUSH(s, hash); - ESTACK_PUSH(s, HASH_MAP_TAIL); + ESTACK_PUSH3(s, hash_xor_pairs, hash, HASH_MAP_TAIL); hash = 0; hash_xor_pairs = 0; } @@ -1680,13 +1675,22 @@ make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_ while (ctx.i) { if (is_list(*ctx.ptr)) { Eterm* cons = list_val(*ctx.ptr); - ESTACK_PUSH(s, HASH_MAP_PAIR); - ESTACK_PUSH(s, CDR(cons)); - ESTACK_PUSH(s, CAR(cons)); + ESTACK_PUSH3(s, HASH_MAP_PAIR, CDR(cons), CAR(cons)); } else { ASSERT(is_boxed(*ctx.ptr)); - ESTACK_PUSH(s, *ctx.ptr); + if (is_tuple(*ctx.ptr)) { /* collision node */ + Eterm *coll_ptr = tuple_val(*ctx.ptr); + Uint n = arityval(*coll_ptr); + ASSERT(n >= 2); + coll_ptr++; + for (; n; n--, coll_ptr++) { + Eterm* cons = list_val(*coll_ptr); + ESTACK_PUSH3(s, HASH_MAP_PAIR, CDR(cons), CAR(cons)); + } + } + else + ESTACK_PUSH(s, *ctx.ptr); } ctx.i--; ctx.ptr++; TRAP_LOCATION(map_subtag); @@ -2078,15 +2082,43 @@ trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p) return make_hash2_helper(term, 1, state_mref_write_back, p); } +#ifdef DBG_HASHMAP_COLLISION_BONANZA +Uint32 erts_dbg_hashmap_collision_bonanza(Uint32 hash, Eterm key) +{ +/*{ + static Uint32 hashvec[7] = { + 0x02345678, + 0x12345678, + 0xe2345678, + 0xf2345678, + 0x12abcdef, + 0x13abcdef, + 0xcafebabe + }; + hash = hashvec[hash % (sizeof(hashvec) / sizeof(hashvec[0]))]; + }*/ + const Uint32 bad_hash = (hash & 0x12482481) * 1442968193; + const Uint32 bad_bits = hash % 67; + if (bad_bits < 32) { + /* Mix in a number of high good bits to get "randomly" close + to the collision nodes */ + const Uint32 bad_mask = (1 << bad_bits) - 1; + return (hash & ~bad_mask) | (bad_hash & bad_mask); + } + return bad_hash; +} +#endif + /* Term hash function for maps, with a separate depth parameter */ -Uint32 make_map_hash(Eterm key, int depth) { +Uint32 make_map_hash(Eterm key) { Uint32 hash = 0; - if (depth > 0) { - UINT32_HASH_2(depth, 1, HCONST_22); - } + hash = make_internal_hash(key, hash); - return make_internal_hash(key, hash); +#ifdef DBG_HASHMAP_COLLISION_BONANZA + hash = erts_dbg_hashmap_collision_bonanza(hash, key); +#endif + return hash; } /* Term hash function for internal use. @@ -2099,12 +2131,10 @@ Uint32 make_map_hash(Eterm key, int depth) { * with a new ErlNode struct, externals from that node will hash different than * before. * - * One IMPORTANT property must hold (for hamt). - * EVERY BIT of the term that is significant for equality (see EQ) - * MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a - * chance of hashing different when salted. - * - * This is why we cannot use cached hash values for atoms for example. + * The property "EVERY BIT of the term that is significant for equality + * MUST BE USED AS INPUT FOR THE HASH" is nice but no longer crucial for the + * hashmap implementation that now uses collision nodes at the bottom of + * the HAMT when all hash bits are exhausted. * */ @@ -2250,6 +2280,8 @@ make_internal_hash(Eterm term, Uint32 salt) } else { ASSERT(is_boxed(*ptr)); + /* no special treatment of collision nodes needed, + hash them as the tuples they are */ ESTACK_PUSH(s, *ptr); } i--; ptr++; @@ -3132,8 +3164,8 @@ tailrecur_ne: sz = hashmap_bitcount(MAP_HEADER_VAL(hdr)); ASSERT(sz > 0 && sz < 17); break; - default: - erts_exit(ERTS_ERROR_EXIT, "Unknown hashmap subsubtag\n"); + default: + erts_exit(ERTS_ERROR_EXIT, "bad header"); } goto term_array; } @@ -3496,6 +3528,11 @@ tailrecur_ne: A minimal key can only be candidate as tie-breaker if we have passed that hash value in the other tree (which means the key did not exist in the other tree). + + Collision node amendment: + The leafs in collision nodes are sorted in map-key order. + If keys are different but hashes are equal we advance the + one lagging behind key-wise. */ sp = PSTACK_PUSH(hmap_stack); @@ -3905,12 +3942,19 @@ pop_next: sp = PSTACK_TOP(hmap_stack); if (j) { /* Key diff found, enter phase 2 */ - if (hashmap_key_hash_cmp(sp->ap, sp->bp) < 0) { + int hash_cmp = hashmap_key_hash_cmp(sp->ap, sp->bp); + if (hash_cmp == 0) { + /* Hash collision. Collision nodes are sorted by map key + * order, so we advance the one with the lesser key */ + hash_cmp = j; + } + if (hash_cmp < 0) { sp->min_key = CAR(sp->ap); sp->cmp_res = -1; sp->ap = hashmap_iterator_next(&stack); } else { + ASSERT(hash_cmp > 0); sp->min_key = CAR(sp->bp); sp->cmp_res = 1; sp->bp = hashmap_iterator_next(&b_stack); @@ -3960,7 +4004,7 @@ pop_next: sp->bp = hashmap_iterator_next(&b_stack); if (!sp->ap) { /* end of maps with identical keys */ - ASSERT(!sp->bp); + ASSERT(!sp->bp); /* as we assume indentical map sizes */ j = sp->cmp_res; exact = sp->was_exact; (void) PSTACK_POP(hmap_stack); @@ -3995,14 +4039,21 @@ pop_next: /* fall through */ case_HASHMAP_PHASE2_NEXT_STEP: if (sp->ap || sp->bp) { - if (hashmap_key_hash_cmp(sp->ap, sp->bp) < 0) { + int hash_cmp = hashmap_key_hash_cmp(sp->ap, sp->bp); + if (hash_cmp == 0) { + /* Hash collision. Collision nodes are sorted by map key + * order, so we advance the one with the lesser key */ + hash_cmp = j; + } + if (hash_cmp < 0) { ASSERT(sp->ap); a = CAR(sp->ap); b = sp->min_key; ASSERT(exact); WSTACK_PUSH(stack, OP_WORD(HASHMAP_PHASE2_IS_MIN_KEY_A)); } - else { /* hash_cmp > 0 */ + else { + ASSERT(hash_cmp > 0); ASSERT(sp->bp); a = CAR(sp->bp); b = sp->min_key; diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl index 23c072ce86..75dfb36cf6 100644 --- a/erts/emulator/test/map_SUITE.erl +++ b/erts/emulator/test/map_SUITE.erl @@ -270,6 +270,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) -> 60 = map_size(M0), 60 = maps:size(M0), + 60 = apply(erlang, id(map_size), [M0]), + 60 = apply(maps, id(size), [M0]), % with repeating M1 = id(#{ 10=>a0,20=>b0,30=>"c0","40"=>"d0",<<"50">>=>"e0",{["00"]}=>"10", @@ -306,6 +308,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) -> 60 = map_size(M1), 60 = maps:size(M1), + 60 = apply(erlang, id(map_size), [M1]), + 60 = apply(maps, id(size), [M1]), % with floats @@ -359,6 +363,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) -> 90 = map_size(M2), 90 = maps:size(M2), + 90 = apply(erlang, id(map_size), [M2]), + 90 = apply(maps, id(size), [M2]), % with bignums M3 = id(#{ 10=>a0,20=>b0,30=>"c0","40"=>"d0",<<"50">>=>"e0",{["00"]}=>"10", @@ -422,6 +428,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) -> 98 = map_size(M3), 98 = maps:size(M3), + 98 = apply(erlang, id(map_size), [M3]), + 98 = apply(maps, id(size), [M3]), %% with maps @@ -542,6 +550,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) -> 95 = map_size(M4), 95 = maps:size(M4), + 95 = apply(erlang, id(map_size), [M4]), + 95 = apply(maps, id(size), [M4]), % call for value @@ -639,6 +649,8 @@ t_build_and_match_literals_large(Config) when is_list(Config) -> 95 = map_size(M5), 95 = maps:size(M5), + 95 = apply(erlang, id(map_size), [M5]), + 95 = apply(maps, id(size), [M5]), %% remember @@ -2077,6 +2089,10 @@ t_bif_map_merge(Config) when is_list(Config) -> {'EXIT',{{badmap,T},[{maps,merge,_,_}|_]}} = (catch maps:merge(T, #{})), {'EXIT',{{badmap,T},[{maps,merge,_,_}|_]}} = + (catch maps:merge(M11, T)), + {'EXIT',{{badmap,T},[{maps,merge,_,_}|_]}} = + (catch maps:merge(T, M11)), + {'EXIT',{{badmap,T},[{maps,merge,_,_}|_]}} = (catch maps:merge(T, T)) end), ok. @@ -2310,6 +2326,16 @@ t_bif_erlang_phash2() -> 70249457 = erlang:phash2(M0), % 118679416 59617982 = erlang:phash2(M1), % 51612236 70249457 = erlang:phash2(M2), % 118679416 + + M1000 = maps:from_list([{K,K} || K <- lists:seq(1,1000)]), + 66609305 = erlang:phash2(M1000), + + Mnested1 = #{flatmap => M0, M0 => flatmap, hashmap => M1000, M1000 => hashmap}, + 113689339 = erlang:phash2(Mnested1), + + Mnested2 = maps:merge(Mnested1, M1000), + 29167443 = erlang:phash2(Mnested2), + ok. t_bif_erlang_phash() -> @@ -2330,6 +2356,16 @@ t_bif_erlang_phash() -> 2620391445 = erlang:phash(M0,Sz), % 3590546636 1670235874 = erlang:phash(M1,Sz), % 4066388227 2620391445 = erlang:phash(M2,Sz), % 3590546636 + + M1000 = maps:from_list([{K,K} || K <- lists:seq(1,1000)]), + 1945662653 = erlang:phash(M1000, Sz), + + Mnested1 = #{flatmap => M0, M0 => flatmap, hashmap => M1000, M1000 => hashmap}, + 113694495 = erlang:phash(Mnested1, Sz), + + Mnested2 = maps:merge(Mnested1, M1000), + 431825783 = erlang:phash(Mnested2, Sz), + ok. t_map_encode_decode(Config) when is_list(Config) -> @@ -2805,25 +2841,31 @@ t_maps_without(_Config) -> %% Verify that the the number of nodes in hashmaps %% of different types and sizes does not deviate too %% much from the theoretical model. +%% For debug with DBG_HASHMAP_COLLISION_BONANZA the test will expect +%% the hashmaps to NOT be well balanced. t_hashmap_balance(_Config) -> + erts_debug:set_internal_state(available_internal_state, true), + ExpectBalance = not erts_debug:get_internal_state(hashmap_collision_bonanza), + hashmap_balance(ExpectBalance), + erts_debug:set_internal_state(available_internal_state, false), + ok. + +hashmap_balance(EB) -> io:format("Integer keys\n", []), - hashmap_balance(fun(I) -> I end), + hashmap_balance(fun(I) -> I end, EB), io:format("Float keys\n", []), - hashmap_balance(fun(I) -> float(I) end), + hashmap_balance(fun(I) -> float(I) end, EB), io:format("String keys\n", []), - hashmap_balance(fun(I) -> integer_to_list(I) end), + hashmap_balance(fun(I) -> integer_to_list(I) end, EB), io:format("Binary (big) keys\n", []), - hashmap_balance(fun(I) -> <<I:16/big>> end), + hashmap_balance(fun(I) -> <<I:16/big>> end, EB), io:format("Binary (little) keys\n", []), - hashmap_balance(fun(I) -> <<I:16/little>> end), + hashmap_balance(fun(I) -> <<I:16/little>> end, EB), io:format("Atom keys\n", []), - erts_debug:set_internal_state(available_internal_state, true), - hashmap_balance(fun(I) -> erts_debug:get_internal_state({atom,I}) end), - erts_debug:set_internal_state(available_internal_state, false), - + hashmap_balance(fun(I) -> erts_debug:get_internal_state({atom,I}) end, EB), ok. -hashmap_balance(KeyFun) -> +hashmap_balance(KeyFun, ExpectBalance) -> %% For uniformly distributed hash values, the average number of nodes N %% in a hashmap varies between 0.3*K and 0.4*K where K is number of keys. %% The standard deviation of N is about sqrt(K)/3. @@ -2867,9 +2909,10 @@ hashmap_balance(KeyFun) -> erts_debug:flat_size(MaxMap)]) end, - true = (MaxDiff < 6), % The probability of this line failing is about 0.000000001 - % for a uniform hash. I've set the probability this "high" for now - % to detect flaws in our make_internal_hash. + %% The probability of this line failing is about 0.000000001 + %% for a uniform hash. I've set the probability this "high" for now + %% to detect flaws in our make_internal_hash. + ExpectBalance = (MaxDiff < 6), ok. hashmap_nodes(M) -> @@ -2878,6 +2921,7 @@ hashmap_nodes(M) -> case element(1,Tpl) of bitmaps -> Acc + element(2,Tpl); arrays -> Acc + element(2,Tpl); + collisions -> Acc + element(2,Tpl); _ -> Acc end end, |