From 5199f2aaf9527c97e6ec371e19748d0c2ac7a70e Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 19 Apr 2023 15:59:25 -0400 Subject: Implement Hash AR tables on VWA --- common.mk | 1 - gc.c | 37 ++++------------- hash.c | 99 ++++++++++---------------------------------- internal/hash.h | 40 +++--------------- ractor.c | 5 --- test/ruby/test_gc_compact.rb | 20 +++++++++ test/ruby/test_shapes.rb | 6 ++- transient_heap.c | 13 ------ transient_heap.h | 2 - 9 files changed, 60 insertions(+), 163 deletions(-) diff --git a/common.mk b/common.mk index 16ff4c5cda..3d78ef8434 100644 --- a/common.mk +++ b/common.mk @@ -16475,7 +16475,6 @@ transient_heap.$(OBJEXT): $(top_srcdir)/internal/array.h transient_heap.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h transient_heap.$(OBJEXT): $(top_srcdir)/internal/compilers.h transient_heap.$(OBJEXT): $(top_srcdir)/internal/gc.h -transient_heap.$(OBJEXT): $(top_srcdir)/internal/hash.h transient_heap.$(OBJEXT): $(top_srcdir)/internal/imemo.h transient_heap.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h transient_heap.$(OBJEXT): $(top_srcdir)/internal/serial.h diff --git a/gc.c b/gc.c index db6fa9c861..d2cf6d07db 100644 --- a/gc.c +++ b/gc.c @@ -3568,19 +3568,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj) RB_DEBUG_COUNTER_INC(obj_hash_st); } #endif - if (/* RHASH_AR_TABLE_P(obj) */ !FL_TEST_RAW(obj, RHASH_ST_TABLE_FLAG)) { - struct ar_table_struct *tab = RHASH(obj)->as.ar; - - if (tab) { - if (RHASH_TRANSIENT_P(obj)) { - RB_DEBUG_COUNTER_INC(obj_hash_transient); - } - else { - ruby_xfree(tab); - } - } - } - else { + if (!RHASH_AR_TABLE_P(obj)) { GC_ASSERT(RHASH_ST_TABLE_P(obj)); st_free_table(RHASH(obj)->as.st); } @@ -4922,10 +4910,6 @@ obj_memsize_of(VALUE obj, int use_all_types) break; case T_HASH: if (RHASH_AR_TABLE_P(obj)) { - if (RHASH_AR_TABLE(obj) != NULL) { - size_t rb_hash_ar_table_size(void); - size += rb_hash_ar_table_size(); - } } else { VM_ASSERT(RHASH_ST_TABLE(obj) != NULL); @@ -6608,14 +6592,6 @@ mark_hash(rb_objspace_t *objspace, VALUE hash) rb_hash_stlike_foreach(hash, mark_keyvalue, (st_data_t)objspace); } - if (RHASH_AR_TABLE_P(hash)) { - if (LIKELY(during_gc) && RHASH_TRANSIENT_P(hash)) { - rb_transient_heap_mark(hash, RHASH_AR_TABLE(hash)); - } - } - else { - VM_ASSERT(!RHASH_TRANSIENT_P(hash)); - } gc_mark(objspace, RHASH(hash)->ifnone); } @@ -8425,8 +8401,12 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V obj_size = rb_str_size_as_embedded(src); break; - default: - return src_pool; + case T_HASH: + obj_size = rb_hash_size_as_embedded(src); + break; + + default: + return src_pool; } if (rb_gc_size_allocatable_p(obj_size)){ @@ -13583,9 +13563,8 @@ rb_raw_obj_info_buitin_type(char *const buff, const size_t buff_size, const VALU break; } case T_HASH: { - APPEND_F("[%c%c] %"PRIdSIZE, + APPEND_F("[%c] %"PRIdSIZE, RHASH_AR_TABLE_P(obj) ? 'A' : 'S', - RHASH_TRANSIENT_P(obj) ? 'T' : ' ', RHASH_SIZE(obj)); break; } diff --git a/hash.c b/hash.c index 213bba01bf..f4aeee36c6 100644 --- a/hash.c +++ b/hash.c @@ -388,10 +388,15 @@ typedef struct ar_table_struct { ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE]; } ar_table; +#define RHASH_EMBED_SIZE (offsetof(struct RHash, as.st) + sizeof(ar_table)) + size_t -rb_hash_ar_table_size(void) +rb_hash_size_as_embedded(VALUE hash) { - return sizeof(ar_table); + if (RHASH_AR_TABLE_P(hash)) { + return RHASH_EMBED_SIZE; + } + return sizeof(struct RHash); } static inline st_hash_t @@ -534,14 +539,6 @@ hash_verify_(VALUE hash, const char *file, int line) HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0); HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0); } - -#if USE_TRANSIENT_HEAP - if (RHASH_TRANSIENT_P(hash)) { - volatile st_data_t MAYBE_UNUSED(key) = RHASH_AR_TABLE_REF(hash, 0)->key; /* read */ - HASH_ASSERT(RHASH_AR_TABLE(hash) != NULL); - HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH_AR_TABLE(hash))); - } -#endif return hash; } @@ -552,7 +549,7 @@ hash_verify_(VALUE hash, const char *file, int line) static inline int RHASH_TABLE_NULL_P(VALUE hash) { - if (RHASH(hash)->as.ar == NULL) { + if (RHASH_AR_TABLE(hash) == NULL) { HASH_ASSERT(RHASH_AR_TABLE_P(hash)); return TRUE; } @@ -579,8 +576,7 @@ static void hash_ar_table_set(VALUE hash, ar_table *ar) { HASH_ASSERT(RHASH_AR_TABLE_P(hash)); - HASH_ASSERT((RHASH_TRANSIENT_P(hash) && ar == NULL) ? FALSE : TRUE); - RHASH(hash)->as.ar = ar; + *(RHASH_AR_TABLE(hash)) = *ar; hash_verify(hash); } @@ -641,25 +637,20 @@ RHASH_AR_TABLE_CLEAR(VALUE h) RBASIC(h)->flags &= ~RHASH_AR_TABLE_SIZE_MASK; RBASIC(h)->flags &= ~RHASH_AR_TABLE_BOUND_MASK; - hash_ar_table_set(h, NULL); + memset(RHASH_AR_TABLE(h), 0, sizeof(ar_table)); } static ar_table* ar_alloc_table(VALUE hash) { - ar_table *tab = (ar_table*)rb_transient_heap_alloc(hash, sizeof(ar_table)); + ar_table *tab = RHASH_AR_TABLE(hash); + memset(tab, 0, sizeof(ar_table)); - if (tab != NULL) { - RHASH_SET_TRANSIENT_FLAG(hash); - } - else { - RHASH_UNSET_TRANSIENT_FLAG(hash); - tab = (ar_table*)ruby_xmalloc(sizeof(ar_table)); - } + // RHASH_UNSET_ST_FLAG(hash); + // hash_ar_table_set(hash, tab); RHASH_AR_TABLE_SIZE_SET(hash, 0); RHASH_AR_TABLE_BOUND_SET(hash, 0); - hash_ar_table_set(hash, tab); return tab; } @@ -722,23 +713,14 @@ ar_find_entry(VALUE hash, st_hash_t hash_value, st_data_t key) return ar_find_entry_hint(hash, hint, key); } +//old one static inline void ar_free_and_clear_table(VALUE hash) { - ar_table *tab = RHASH_AR_TABLE(hash); + RHASH_AR_TABLE_CLEAR(hash); - if (tab) { - if (RHASH_TRANSIENT_P(hash)) { - RHASH_UNSET_TRANSIENT_FLAG(hash); - } - else { - ruby_xfree(RHASH_AR_TABLE(hash)); - } - RHASH_AR_TABLE_CLEAR(hash); - } HASH_ASSERT(RHASH_AR_TABLE_SIZE(hash) == 0); HASH_ASSERT(RHASH_AR_TABLE_BOUND(hash) == 0); - HASH_ASSERT(RHASH_TRANSIENT_P(hash) == 0); } static void @@ -1055,6 +1037,7 @@ ar_insert(VALUE hash, st_data_t key, st_data_t value) return -1; } + HASH_ASSERT(RHASH_AR_TABLE(hash)); hash_ar_table(hash); /* prepare ltbl */ bin = ar_find_entry(hash, hash_value, key); @@ -1064,6 +1047,7 @@ ar_insert(VALUE hash, st_data_t key, st_data_t value) } else if (bin >= RHASH_AR_TABLE_MAX_BOUND) { bin = ar_compact_table(hash); + HASH_ASSERT(RHASH_AR_TABLE(hash)); hash_ar_table(hash); } HASH_ASSERT(bin < RHASH_AR_TABLE_MAX_BOUND); @@ -1202,17 +1186,10 @@ ar_copy(VALUE hash1, VALUE hash2) ar_table *new_tab = RHASH_AR_TABLE(hash1); if (new_tab == NULL) { - new_tab = (ar_table*) rb_transient_heap_alloc(hash1, sizeof(ar_table)); - if (new_tab != NULL) { - RHASH_SET_TRANSIENT_FLAG(hash1); - } - else { - RHASH_UNSET_TRANSIENT_FLAG(hash1); - new_tab = (ar_table*)ruby_xmalloc(sizeof(ar_table)); - } + new_tab = ar_alloc_table(hash1); } - *new_tab = *old_tab; + memcpy(new_tab, old_tab, sizeof(ar_table)); RHASH(hash1)->ar_hint.word = RHASH(hash2)->ar_hint.word; RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2)); RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2)); @@ -1236,33 +1213,6 @@ ar_clear(VALUE hash) } } -#if USE_TRANSIENT_HEAP -void -rb_hash_transient_heap_evacuate(VALUE hash, int promote) -{ - if (RHASH_TRANSIENT_P(hash)) { - ar_table *new_tab; - ar_table *old_tab = RHASH_AR_TABLE(hash); - - if (UNLIKELY(old_tab == NULL)) { - return; - } - HASH_ASSERT(old_tab != NULL); - if (! promote) { - new_tab = rb_transient_heap_alloc(hash, sizeof(ar_table)); - if (new_tab == NULL) promote = true; - } - if (promote) { - new_tab = ruby_xmalloc(sizeof(ar_table)); - RHASH_UNSET_TRANSIENT_FLAG(hash); - } - *new_tab = *old_tab; - hash_ar_table_set(hash, new_tab); - } - hash_verify(hash); -} -#endif - typedef int st_foreach_func(st_data_t, st_data_t, st_data_t); struct foreach_safe_arg { @@ -1504,7 +1454,7 @@ static VALUE hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone) { const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0); - NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, sizeof(struct RHash), 0); + NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_EMBED_SIZE, 0); RHASH_SET_IFNONE((VALUE)hash, ifnone); @@ -1547,10 +1497,7 @@ rb_hash_new_with_size(st_index_t size) if (size == 0) { /* do nothing */ } - else if (size <= RHASH_AR_TABLE_MAX_SIZE) { - ar_alloc_table(ret); - } - else { + else if (size > RHASH_AR_TABLE_MAX_SIZE) { RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size)); } return ret; @@ -2027,11 +1974,9 @@ rb_hash_rehash(VALUE hash) rb_hash_modify_check(hash); if (RHASH_AR_TABLE_P(hash)) { tmp = hash_alloc(0); - ar_alloc_table(tmp); rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); ar_free_and_clear_table(hash); ar_copy(hash, tmp); - ar_free_and_clear_table(tmp); } else if (RHASH_ST_TABLE_P(hash)) { st_table *old_tab = RHASH_ST_TABLE(hash); diff --git a/internal/hash.h b/internal/hash.h index 275c5167e4..4eea9afbce 100644 --- a/internal/hash.h +++ b/internal/hash.h @@ -42,15 +42,14 @@ enum ruby_rhash_flags { struct RHash { struct RBasic basic; - union { - st_table *st; - struct ar_table_struct *ar; /* possibly 0 */ - } as; const VALUE ifnone; union { ar_hint_t ary[RHASH_AR_TABLE_MAX_SIZE]; VALUE word; } ar_hint; + union { + st_table *st; + } as; }; #define RHASH(obj) ((struct RHash *)(obj)) @@ -86,6 +85,8 @@ int rb_hash_stlike_foreach_with_replace(VALUE hash, st_foreach_check_callback_fu int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg); VALUE rb_ident_hash_new_with_size(st_index_t size); +size_t rb_hash_size_as_embedded(VALUE hash); + static inline unsigned RHASH_AR_TABLE_SIZE_RAW(VALUE h); static inline VALUE RHASH_IFNONE(VALUE h); static inline size_t RHASH_SIZE(VALUE h); @@ -96,9 +97,6 @@ static inline struct ar_table_struct *RHASH_AR_TABLE(VALUE h); static inline st_table *RHASH_ST_TABLE(VALUE h); static inline size_t RHASH_ST_SIZE(VALUE h); static inline void RHASH_ST_CLEAR(VALUE h); -static inline bool RHASH_TRANSIENT_P(VALUE h); -static inline void RHASH_SET_TRANSIENT_FLAG(VALUE h); -static inline void RHASH_UNSET_TRANSIENT_FLAG(VALUE h); RUBY_SYMBOL_EXPORT_BEGIN /* hash.c (export) */ @@ -128,7 +126,7 @@ RHASH_AR_TABLE_P(VALUE h) static inline struct ar_table_struct * RHASH_AR_TABLE(VALUE h) { - return RHASH(h)->as.ar; + return (struct ar_table_struct *)((uintptr_t)h + offsetof(struct RHash, as.st)); } static inline st_table * @@ -187,30 +185,4 @@ RHASH_AR_TABLE_SIZE_RAW(VALUE h) return (unsigned)ret; } -static inline bool -RHASH_TRANSIENT_P(VALUE h) -{ -#if USE_TRANSIENT_HEAP - return FL_TEST_RAW(h, RHASH_TRANSIENT_FLAG); -#else - return false; -#endif -} - -static inline void -RHASH_SET_TRANSIENT_FLAG(VALUE h) -{ -#if USE_TRANSIENT_HEAP - FL_SET_RAW(h, RHASH_TRANSIENT_FLAG); -#endif -} - -static inline void -RHASH_UNSET_TRANSIENT_FLAG(VALUE h) -{ -#if USE_TRANSIENT_HEAP - FL_UNSET_RAW(h, RHASH_TRANSIENT_FLAG); -#endif -} - #endif /* INTERNAL_HASH_H */ diff --git a/ractor.c b/ractor.c index 044d376235..77acf3a97f 100644 --- a/ractor.c +++ b/ractor.c @@ -3112,7 +3112,6 @@ obj_traverse_replace_rec(struct obj_traverse_replace_data *data) #if USE_TRANSIENT_HEAP void rb_ary_transient_heap_evacuate(VALUE ary, int promote); void rb_obj_transient_heap_evacuate(VALUE obj, int promote); -void rb_hash_transient_heap_evacuate(VALUE hash, int promote); void rb_struct_transient_heap_evacuate(VALUE st, int promote); #endif @@ -3249,12 +3248,8 @@ obj_traverse_replace_i(VALUE obj, struct obj_traverse_replace_data *data) RB_GC_GUARD(obj); } break; - case T_HASH: { -#if USE_TRANSIENT_HEAP - if (data->move) rb_hash_transient_heap_evacuate(obj, TRUE); -#endif struct obj_traverse_replace_callback_data d = { .stop = false, .data = data, diff --git a/test/ruby/test_gc_compact.rb b/test/ruby/test_gc_compact.rb index bef2ba9605..b1b964589a 100644 --- a/test/ruby/test_gc_compact.rb +++ b/test/ruby/test_gc_compact.rb @@ -417,6 +417,26 @@ class TestGCCompact < Test::Unit::TestCase end; end + def test_moving_hashes_down_size_pools + omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1 + + assert_separately(%w[-robjspace], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV) + begin; + HASH_COUNT = 500 + + GC.verify_compaction_references(expand_heap: true, toward: :empty) + + base_hash = { a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8 } + ary = HASH_COUNT.times.map { base_hash.dup } + ary.each { |h| h[:i] = 9 } + + stats = GC.verify_compaction_references(expand_heap: true, toward: :empty) + + assert_operator(stats[:moved_down][:T_HASH], :>=, HASH_COUNT) + assert_include(ObjectSpace.dump(ary[0]), '"slot_size":' + GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE].to_s) + end; + end + def test_moving_objects_between_size_pools_keeps_shape_frozen_status # [Bug #19536] assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}") diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb index 1d9159bac0..d9cce4a337 100644 --- a/test/ruby/test_shapes.rb +++ b/test/ruby/test_shapes.rb @@ -372,8 +372,10 @@ class TestShapes < Test::Unit::TestCase assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of([])) end - def test_hash_has_root_shape - assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of({})) + def test_hash_has_correct_pool_shape + # All hashes are now allocated their own ar_table, so start in a + # larger pool, and have already transitioned once. + assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of({}).parent) end def test_true_has_special_const_shape_id diff --git a/transient_heap.c b/transient_heap.c index 2340df4912..a3bf14fe0b 100644 --- a/transient_heap.c +++ b/transient_heap.c @@ -10,7 +10,6 @@ #include "internal.h" #include "internal/array.h" #include "internal/gc.h" -#include "internal/hash.h" #include "internal/sanitizers.h" #include "internal/static_assert.h" #include "internal/struct.h" @@ -607,15 +606,6 @@ transient_heap_ptr(VALUE obj, int error) ptr = rb_struct_const_heap_ptr(obj); } break; - case T_HASH: - if (RHASH_TRANSIENT_P(obj)) { - TH_ASSERT(RHASH_AR_TABLE_P(obj)); - ptr = (VALUE *)(RHASH(obj)->as.ar); - } - else { - ptr = NULL; - } - break; default: if (error) { rb_bug("transient_heap_ptr: unknown obj %s\n", rb_obj_info(obj)); @@ -736,9 +726,6 @@ transient_heap_block_evacuate(struct transient_heap* theap, struct transient_hea case T_STRUCT: rb_struct_transient_heap_evacuate(obj, !TRANSIENT_HEAP_DEBUG_DONT_PROMOTE); break; - case T_HASH: - rb_hash_transient_heap_evacuate(obj, !TRANSIENT_HEAP_DEBUG_DONT_PROMOTE); - break; default: rb_bug("unsupported: %s\n", rb_obj_info(obj)); } diff --git a/transient_heap.h b/transient_heap.h index 4ac52aad07..6c6141f71a 100644 --- a/transient_heap.h +++ b/transient_heap.h @@ -42,7 +42,6 @@ int rb_transient_heap_managed_ptr_p(const void *ptr); /* evacuate functions for each type */ void rb_ary_transient_heap_evacuate(VALUE ary, int promote); void rb_obj_transient_heap_evacuate(VALUE obj, int promote); -void rb_hash_transient_heap_evacuate(VALUE hash, int promote); void rb_struct_transient_heap_evacuate(VALUE st, int promote); #else /* USE_TRANSIENT_HEAP */ @@ -58,7 +57,6 @@ void rb_struct_transient_heap_evacuate(VALUE st, int promote); #define rb_ary_transient_heap_evacuate(x, y) ((void)0) #define rb_obj_transient_heap_evacuate(x, y) ((void)0) -#define rb_hash_transient_heap_evacuate(x, y) ((void)0) #define rb_struct_transient_heap_evacuate(x, y) ((void)0) #endif /* USE_TRANSIENT_HEAP */ -- cgit v1.2.1