From a0b96aa489877513c452275d187f0bbf0ac8ca16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= Date: Wed, 11 Jan 2023 17:32:19 +0000 Subject: Do not allocate a new map when the value is the same encore This is a followup to PR #1889 almost 5 years in the making. The optimisation there was implemented for many cases, but somehow `M#{existing_key => same_value}` and `M#{existing_key => new_value}` cases were missed. This is a bit more complicated since there are two cases to handle: * the keys and values didn't change, and we can just return the original map * the keys didn't change, but values did - we return a new map but keeping original keys tuple This seems to be hit farily frequently - while I was working on it, it wasn't even possible to start the VM successfully. --- erts/emulator/beam/beam_common.c | 94 ++++++++++++++++++++++++++++------------ erts/emulator/test/map_SUITE.erl | 49 ++++++++++++++++++++- 2 files changed, 114 insertions(+), 29 deletions(-) diff --git a/erts/emulator/beam/beam_common.c b/erts/emulator/beam/beam_common.c index 6194644869..a6e8905794 100644 --- a/erts/emulator/beam/beam_common.c +++ b/erts/emulator/beam/beam_common.c @@ -2076,6 +2076,8 @@ erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, Eterm new_key; Eterm* kp; Eterm map; + int changed_values = 0; + int changed_keys = 0; num_updates = n / 2; map = reg[live]; @@ -2127,35 +2129,46 @@ erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, * Build the skeleton for the map, ready to be filled in. * * +-----------------------------------+ - * | (Space for aritvyal for keys) | <-----------+ - * +-----------------------------------+ | - * | (Space for key 1) | | <-- kp - * +-----------------------------------+ | - * . | - * . | - * . | - * +-----------------------------------+ | - * | (Space for last key) | | - * +-----------------------------------+ | - * | MAP_HEADER | | - * +-----------------------------------+ | - * | (Space for number of keys/values) | | - * +-----------------------------------+ | + * | MAP_HEADER_FLATMAP | + * +-----------------------------------+ + * | (Space for number of keys/values) | + * +-----------------------------------+ * | Boxed tuple pointer >----------------+ + * +-----------------------------------+ | + * | (Space for value 1) | | <-- hp + * +-----------------------------------+ | + * . | + * . | + * . | + * +-----------------------------------+ | + * | (Space for last value) | | + * +-----------------------------------+ | + * +-----------------------------------+ | + * | (Space for aritvyal for keys) | <-----------+ + * +-----------------------------------+ + * | (Space for key 1) | <-- kp * +-----------------------------------+ - * | (Space for value 1) | <-- hp + * . + * . + * . + * +-----------------------------------+ + * | (Space for last key) | * +-----------------------------------+ */ + hp = p->htop; E = p->stop; - kp = p->htop + 1; /* Point to first key */ - hp = kp + num_old + num_updates; res = make_flatmap(hp); mp = (flatmap_t *)hp; hp += MAP_HEADER_FLATMAP_SZ; mp->thing_word = MAP_HEADER_FLATMAP; - mp->keys = make_tuple(kp-1); + + kp = hp + num_old + num_updates; /* Point to key tuple. */ + + mp->keys = make_tuple(kp); + + kp = kp + 1; /* Point to first key. */ old_vals = flatmap_get_values(old_mp); old_keys = flatmap_get_keys(old_mp); @@ -2172,7 +2185,6 @@ erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, Eterm key; Sint c; - ASSERT(kp < (Eterm *)mp); key = *old_keys; if ((c = (key == new_key) ? 0 : CMP_TERM(key, new_key)) < 0) { /* Copy old key and value */ @@ -2180,13 +2192,18 @@ erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, *hp++ = *old_vals; old_keys++, old_vals++, num_old--; } else { /* Replace or insert new */ - GET_TERM(new_p[1], *hp++); + GET_TERM(new_p[1], *hp); if (c > 0) { /* If new key */ *kp++ = new_key; + changed_keys = 1; } else { /* If replacement */ + if (*old_vals != *hp) { + changed_values = 1; + } *kp++ = key; old_keys++, old_vals++, num_old--; } + hp++; n--; if (n == 0) { break; @@ -2218,6 +2235,28 @@ erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, GET_TERM(new_p[1], *hp++); new_p += 2; } + } else if (!changed_keys && !changed_values) { + /* + * All updates are now done, no new keys were introduced, and + * all new values were the same as old ones. We can just + * return the old map and skip committing the new allocation, + * effectively releasing it. + */ + ASSERT(n == 0); + return map; + } else if (!changed_keys) { + /* + * All updates are now done, no new keys were introduced, but + * some values were changed. We can retain the old key tuple. + */ + ASSERT(n == 0); + mp->size = old_mp->size; + mp->keys = old_mp->keys; + while (num_old-- > 0) { + *hp++ = *old_vals++; + } + p->htop = hp; + return res; } else { /* * All updates are now done. We may still have old @@ -2225,7 +2264,6 @@ erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, */ ASSERT(n == 0); while (num_old-- > 0) { - ASSERT(kp < (Eterm *)mp); *kp++ = *old_keys++; *hp++ = *old_vals++; } @@ -2233,20 +2271,22 @@ erts_gc_update_map_assoc(Process* p, Eterm* reg, Uint live, /* * Calculate how many values that are unused at the end of the - * key tuple and fill it out with a bignum header. + * value array and fill it out with a bignum header. */ - if ((n = (Eterm *)mp - kp) > 0) { - *kp = make_pos_bignum_header(n-1); + if ((n = boxed_val(mp->keys) - hp) > 0) { + ASSERT(n <= num_updates); + *hp = make_pos_bignum_header(n-1); } /* * Fill in the size of the map in both the key tuple and in the map. */ - n = kp - p->htop - 1; /* Actual number of keys/values */ - *p->htop = make_arityval(n); - p->htop = hp; + n = hp - (Eterm *)mp - MAP_HEADER_FLATMAP_SZ; /* Actual number of keys/values */ + ASSERT(n <= old_mp->size + num_updates); mp->size = n; + *(boxed_val(mp->keys)) = make_arityval(n); + p->htop = kp; /* The expensive case, need to build a hashmap */ if (n > MAP_SMALL_MAP_LIMIT) { diff --git a/erts/emulator/test/map_SUITE.erl b/erts/emulator/test/map_SUITE.erl index 7fb2961cad..3dc6e8a46a 100644 --- a/erts/emulator/test/map_SUITE.erl +++ b/erts/emulator/test/map_SUITE.erl @@ -25,7 +25,7 @@ t_update_literals/1, t_update_literals_large/1, t_match_and_update_literals/1, t_match_and_update_literals_large/1, t_update_map_expressions/1, - t_update_assoc/1, t_update_assoc_large/1, + t_update_assoc/1, t_update_assoc_large/1, t_update_assoc_sharing/1, t_update_exact/1, t_update_exact_large/1, t_guard_bifs/1, t_guard_sequence/1, t_guard_sequence_large/1, @@ -120,7 +120,7 @@ groups() -> t_update_literals, t_update_literals_large, t_match_and_update_literals, t_match_and_update_literals_large, t_update_map_expressions, - t_update_assoc, t_update_assoc_large, + t_update_assoc, t_update_assoc_large, t_update_assoc_sharing, t_update_exact, t_update_exact_large, t_guard_bifs, t_guard_sequence, t_guard_sequence_large, @@ -1113,6 +1113,51 @@ t_update_assoc_large(Config) when is_list(Config) -> ok. +t_update_assoc_sharing(Config) when is_list(Config) -> + Complex = id(#{nested=>map}), + + case erlang:system_info(debug_compiled) of + true -> + %% the maximum size of a flatmap in a debug-compiled + %% system is three + M0 = id(#{1=>a,2=>b,complex=>Complex}), + + %% all keys & values are the same + M1 = M0#{1=>a,complex=>Complex}, + true = erts_debug:same(M1, M0), + M1 = M0, + + %% only keys are the same + M2 = M0#{1=>new_value,complex=>Complex#{extra=>key}}, + true = same_keys(M0, M2); + false -> + M0 = id(#{1=>a,2=>b,3.0=>c,4=>d,5=>e,complex=>Complex}), + + %% all keys & values are the same + M1 = M0#{1=>a,complex=>Complex}, + true = erts_debug:same(M1, M0), + M1 = M0, + + %% only keys are the same + M2 = M0#{1=>new_value,complex=>Complex#{extra=>key}}, + true = same_keys(M0, M2), + + M3 = M0#{2=>new_value}, + true = same_keys(M0, M3), + #{2:=new_value} = M3, + + M4 = M0#{1=>1,2=>2,3.0=>3,4=>4,5=>5,complex=>6}, + true = same_keys(M0, M4), + #{1:=1,2:=2,3.0:=3,4:=4,5:=5,complex:=6} = M4 + end, + + ok. + +same_keys(M0, M1) -> + Keys0 = erts_internal:map_to_tuple_keys(M0), + Keys1 = erts_internal:map_to_tuple_keys(M1), + erts_debug:same(Keys0, Keys1). + t_update_exact(Config) when is_list(Config) -> M0 = id(#{1=>a,2=>b,3.0=>c,4=>d,5=>e}), -- cgit v1.2.1