summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2023-04-19 15:59:25 -0400
committerPeter Zhu <peter@peterzhu.ca>2023-05-17 09:19:40 -0400
commit5199f2aaf9527c97e6ec371e19748d0c2ac7a70e (patch)
tree752910a6360dbd7f00af1e665e17238cbce96c17
parent264ba0f89a52c6d0d6425da0cdfb12bbd420c619 (diff)
downloadruby-5199f2aaf9527c97e6ec371e19748d0c2ac7a70e.tar.gz
Implement Hash AR tables on VWA
-rw-r--r--common.mk1
-rw-r--r--gc.c37
-rw-r--r--hash.c99
-rw-r--r--internal/hash.h40
-rw-r--r--ractor.c5
-rw-r--r--test/ruby/test_gc_compact.rb20
-rw-r--r--test/ruby/test_shapes.rb6
-rw-r--r--transient_heap.c13
-rw-r--r--transient_heap.h2
9 files changed, 60 insertions, 163 deletions
diff --git a/common.mk b/common.mk
index 16ff4c5cda..3d78ef8434 100644
--- a/common.mk
+++ b/common.mk
@@ -16475,7 +16475,6 @@ transient_heap.$(OBJEXT): $(top_srcdir)/internal/array.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/compilers.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/gc.h
-transient_heap.$(OBJEXT): $(top_srcdir)/internal/hash.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/imemo.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
transient_heap.$(OBJEXT): $(top_srcdir)/internal/serial.h
diff --git a/gc.c b/gc.c
index db6fa9c861..d2cf6d07db 100644
--- a/gc.c
+++ b/gc.c
@@ -3568,19 +3568,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
RB_DEBUG_COUNTER_INC(obj_hash_st);
}
#endif
- if (/* RHASH_AR_TABLE_P(obj) */ !FL_TEST_RAW(obj, RHASH_ST_TABLE_FLAG)) {
- struct ar_table_struct *tab = RHASH(obj)->as.ar;
-
- if (tab) {
- if (RHASH_TRANSIENT_P(obj)) {
- RB_DEBUG_COUNTER_INC(obj_hash_transient);
- }
- else {
- ruby_xfree(tab);
- }
- }
- }
- else {
+ if (!RHASH_AR_TABLE_P(obj)) {
GC_ASSERT(RHASH_ST_TABLE_P(obj));
st_free_table(RHASH(obj)->as.st);
}
@@ -4922,10 +4910,6 @@ obj_memsize_of(VALUE obj, int use_all_types)
break;
case T_HASH:
if (RHASH_AR_TABLE_P(obj)) {
- if (RHASH_AR_TABLE(obj) != NULL) {
- size_t rb_hash_ar_table_size(void);
- size += rb_hash_ar_table_size();
- }
}
else {
VM_ASSERT(RHASH_ST_TABLE(obj) != NULL);
@@ -6608,14 +6592,6 @@ mark_hash(rb_objspace_t *objspace, VALUE hash)
rb_hash_stlike_foreach(hash, mark_keyvalue, (st_data_t)objspace);
}
- if (RHASH_AR_TABLE_P(hash)) {
- if (LIKELY(during_gc) && RHASH_TRANSIENT_P(hash)) {
- rb_transient_heap_mark(hash, RHASH_AR_TABLE(hash));
- }
- }
- else {
- VM_ASSERT(!RHASH_TRANSIENT_P(hash));
- }
gc_mark(objspace, RHASH(hash)->ifnone);
}
@@ -8425,8 +8401,12 @@ gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, V
obj_size = rb_str_size_as_embedded(src);
break;
- default:
- return src_pool;
+ case T_HASH:
+ obj_size = rb_hash_size_as_embedded(src);
+ break;
+
+ default:
+ return src_pool;
}
if (rb_gc_size_allocatable_p(obj_size)){
@@ -13583,9 +13563,8 @@ rb_raw_obj_info_buitin_type(char *const buff, const size_t buff_size, const VALU
break;
}
case T_HASH: {
- APPEND_F("[%c%c] %"PRIdSIZE,
+ APPEND_F("[%c] %"PRIdSIZE,
RHASH_AR_TABLE_P(obj) ? 'A' : 'S',
- RHASH_TRANSIENT_P(obj) ? 'T' : ' ',
RHASH_SIZE(obj));
break;
}
diff --git a/hash.c b/hash.c
index 213bba01bf..f4aeee36c6 100644
--- a/hash.c
+++ b/hash.c
@@ -388,10 +388,15 @@ typedef struct ar_table_struct {
ar_table_pair pairs[RHASH_AR_TABLE_MAX_SIZE];
} ar_table;
+#define RHASH_EMBED_SIZE (offsetof(struct RHash, as.st) + sizeof(ar_table))
+
size_t
-rb_hash_ar_table_size(void)
+rb_hash_size_as_embedded(VALUE hash)
{
- return sizeof(ar_table);
+ if (RHASH_AR_TABLE_P(hash)) {
+ return RHASH_EMBED_SIZE;
+ }
+ return sizeof(struct RHash);
}
static inline st_hash_t
@@ -534,14 +539,6 @@ hash_verify_(VALUE hash, const char *file, int line)
HASH_ASSERT(RHASH_AR_TABLE_SIZE_RAW(hash) == 0);
HASH_ASSERT(RHASH_AR_TABLE_BOUND_RAW(hash) == 0);
}
-
-#if USE_TRANSIENT_HEAP
- if (RHASH_TRANSIENT_P(hash)) {
- volatile st_data_t MAYBE_UNUSED(key) = RHASH_AR_TABLE_REF(hash, 0)->key; /* read */
- HASH_ASSERT(RHASH_AR_TABLE(hash) != NULL);
- HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH_AR_TABLE(hash)));
- }
-#endif
return hash;
}
@@ -552,7 +549,7 @@ hash_verify_(VALUE hash, const char *file, int line)
static inline int
RHASH_TABLE_NULL_P(VALUE hash)
{
- if (RHASH(hash)->as.ar == NULL) {
+ if (RHASH_AR_TABLE(hash) == NULL) {
HASH_ASSERT(RHASH_AR_TABLE_P(hash));
return TRUE;
}
@@ -579,8 +576,7 @@ static void
hash_ar_table_set(VALUE hash, ar_table *ar)
{
HASH_ASSERT(RHASH_AR_TABLE_P(hash));
- HASH_ASSERT((RHASH_TRANSIENT_P(hash) && ar == NULL) ? FALSE : TRUE);
- RHASH(hash)->as.ar = ar;
+ *(RHASH_AR_TABLE(hash)) = *ar;
hash_verify(hash);
}
@@ -641,25 +637,20 @@ RHASH_AR_TABLE_CLEAR(VALUE h)
RBASIC(h)->flags &= ~RHASH_AR_TABLE_SIZE_MASK;
RBASIC(h)->flags &= ~RHASH_AR_TABLE_BOUND_MASK;
- hash_ar_table_set(h, NULL);
+ memset(RHASH_AR_TABLE(h), 0, sizeof(ar_table));
}
static ar_table*
ar_alloc_table(VALUE hash)
{
- ar_table *tab = (ar_table*)rb_transient_heap_alloc(hash, sizeof(ar_table));
+ ar_table *tab = RHASH_AR_TABLE(hash);
+ memset(tab, 0, sizeof(ar_table));
- if (tab != NULL) {
- RHASH_SET_TRANSIENT_FLAG(hash);
- }
- else {
- RHASH_UNSET_TRANSIENT_FLAG(hash);
- tab = (ar_table*)ruby_xmalloc(sizeof(ar_table));
- }
+ // RHASH_UNSET_ST_FLAG(hash);
+ // hash_ar_table_set(hash, tab);
RHASH_AR_TABLE_SIZE_SET(hash, 0);
RHASH_AR_TABLE_BOUND_SET(hash, 0);
- hash_ar_table_set(hash, tab);
return tab;
}
@@ -722,23 +713,14 @@ ar_find_entry(VALUE hash, st_hash_t hash_value, st_data_t key)
return ar_find_entry_hint(hash, hint, key);
}
+//old one
static inline void
ar_free_and_clear_table(VALUE hash)
{
- ar_table *tab = RHASH_AR_TABLE(hash);
+ RHASH_AR_TABLE_CLEAR(hash);
- if (tab) {
- if (RHASH_TRANSIENT_P(hash)) {
- RHASH_UNSET_TRANSIENT_FLAG(hash);
- }
- else {
- ruby_xfree(RHASH_AR_TABLE(hash));
- }
- RHASH_AR_TABLE_CLEAR(hash);
- }
HASH_ASSERT(RHASH_AR_TABLE_SIZE(hash) == 0);
HASH_ASSERT(RHASH_AR_TABLE_BOUND(hash) == 0);
- HASH_ASSERT(RHASH_TRANSIENT_P(hash) == 0);
}
static void
@@ -1055,6 +1037,7 @@ ar_insert(VALUE hash, st_data_t key, st_data_t value)
return -1;
}
+ HASH_ASSERT(RHASH_AR_TABLE(hash));
hash_ar_table(hash); /* prepare ltbl */
bin = ar_find_entry(hash, hash_value, key);
@@ -1064,6 +1047,7 @@ ar_insert(VALUE hash, st_data_t key, st_data_t value)
}
else if (bin >= RHASH_AR_TABLE_MAX_BOUND) {
bin = ar_compact_table(hash);
+ HASH_ASSERT(RHASH_AR_TABLE(hash));
hash_ar_table(hash);
}
HASH_ASSERT(bin < RHASH_AR_TABLE_MAX_BOUND);
@@ -1202,17 +1186,10 @@ ar_copy(VALUE hash1, VALUE hash2)
ar_table *new_tab = RHASH_AR_TABLE(hash1);
if (new_tab == NULL) {
- new_tab = (ar_table*) rb_transient_heap_alloc(hash1, sizeof(ar_table));
- if (new_tab != NULL) {
- RHASH_SET_TRANSIENT_FLAG(hash1);
- }
- else {
- RHASH_UNSET_TRANSIENT_FLAG(hash1);
- new_tab = (ar_table*)ruby_xmalloc(sizeof(ar_table));
- }
+ new_tab = ar_alloc_table(hash1);
}
- *new_tab = *old_tab;
+ memcpy(new_tab, old_tab, sizeof(ar_table));
RHASH(hash1)->ar_hint.word = RHASH(hash2)->ar_hint.word;
RHASH_AR_TABLE_BOUND_SET(hash1, RHASH_AR_TABLE_BOUND(hash2));
RHASH_AR_TABLE_SIZE_SET(hash1, RHASH_AR_TABLE_SIZE(hash2));
@@ -1236,33 +1213,6 @@ ar_clear(VALUE hash)
}
}
-#if USE_TRANSIENT_HEAP
-void
-rb_hash_transient_heap_evacuate(VALUE hash, int promote)
-{
- if (RHASH_TRANSIENT_P(hash)) {
- ar_table *new_tab;
- ar_table *old_tab = RHASH_AR_TABLE(hash);
-
- if (UNLIKELY(old_tab == NULL)) {
- return;
- }
- HASH_ASSERT(old_tab != NULL);
- if (! promote) {
- new_tab = rb_transient_heap_alloc(hash, sizeof(ar_table));
- if (new_tab == NULL) promote = true;
- }
- if (promote) {
- new_tab = ruby_xmalloc(sizeof(ar_table));
- RHASH_UNSET_TRANSIENT_FLAG(hash);
- }
- *new_tab = *old_tab;
- hash_ar_table_set(hash, new_tab);
- }
- hash_verify(hash);
-}
-#endif
-
typedef int st_foreach_func(st_data_t, st_data_t, st_data_t);
struct foreach_safe_arg {
@@ -1504,7 +1454,7 @@ static VALUE
hash_alloc_flags(VALUE klass, VALUE flags, VALUE ifnone)
{
const VALUE wb = (RGENGC_WB_PROTECTED_HASH ? FL_WB_PROTECTED : 0);
- NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, sizeof(struct RHash), 0);
+ NEWOBJ_OF(hash, struct RHash, klass, T_HASH | wb | flags, RHASH_EMBED_SIZE, 0);
RHASH_SET_IFNONE((VALUE)hash, ifnone);
@@ -1547,10 +1497,7 @@ rb_hash_new_with_size(st_index_t size)
if (size == 0) {
/* do nothing */
}
- else if (size <= RHASH_AR_TABLE_MAX_SIZE) {
- ar_alloc_table(ret);
- }
- else {
+ else if (size > RHASH_AR_TABLE_MAX_SIZE) {
RHASH_ST_TABLE_SET(ret, st_init_table_with_size(&objhash, size));
}
return ret;
@@ -2027,11 +1974,9 @@ rb_hash_rehash(VALUE hash)
rb_hash_modify_check(hash);
if (RHASH_AR_TABLE_P(hash)) {
tmp = hash_alloc(0);
- ar_alloc_table(tmp);
rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp);
ar_free_and_clear_table(hash);
ar_copy(hash, tmp);
- ar_free_and_clear_table(tmp);
}
else if (RHASH_ST_TABLE_P(hash)) {
st_table *old_tab = RHASH_ST_TABLE(hash);
diff --git a/internal/hash.h b/internal/hash.h
index 275c5167e4..4eea9afbce 100644
--- a/internal/hash.h
+++ b/internal/hash.h
@@ -42,15 +42,14 @@ enum ruby_rhash_flags {
struct RHash {
struct RBasic basic;
- union {
- st_table *st;
- struct ar_table_struct *ar; /* possibly 0 */
- } as;
const VALUE ifnone;
union {
ar_hint_t ary[RHASH_AR_TABLE_MAX_SIZE];
VALUE word;
} ar_hint;
+ union {
+ st_table *st;
+ } as;
};
#define RHASH(obj) ((struct RHash *)(obj))
@@ -86,6 +85,8 @@ int rb_hash_stlike_foreach_with_replace(VALUE hash, st_foreach_check_callback_fu
int rb_hash_stlike_update(VALUE hash, st_data_t key, st_update_callback_func *func, st_data_t arg);
VALUE rb_ident_hash_new_with_size(st_index_t size);
+size_t rb_hash_size_as_embedded(VALUE hash);
+
static inline unsigned RHASH_AR_TABLE_SIZE_RAW(VALUE h);
static inline VALUE RHASH_IFNONE(VALUE h);
static inline size_t RHASH_SIZE(VALUE h);
@@ -96,9 +97,6 @@ static inline struct ar_table_struct *RHASH_AR_TABLE(VALUE h);
static inline st_table *RHASH_ST_TABLE(VALUE h);
static inline size_t RHASH_ST_SIZE(VALUE h);
static inline void RHASH_ST_CLEAR(VALUE h);
-static inline bool RHASH_TRANSIENT_P(VALUE h);
-static inline void RHASH_SET_TRANSIENT_FLAG(VALUE h);
-static inline void RHASH_UNSET_TRANSIENT_FLAG(VALUE h);
RUBY_SYMBOL_EXPORT_BEGIN
/* hash.c (export) */
@@ -128,7 +126,7 @@ RHASH_AR_TABLE_P(VALUE h)
static inline struct ar_table_struct *
RHASH_AR_TABLE(VALUE h)
{
- return RHASH(h)->as.ar;
+ return (struct ar_table_struct *)((uintptr_t)h + offsetof(struct RHash, as.st));
}
static inline st_table *
@@ -187,30 +185,4 @@ RHASH_AR_TABLE_SIZE_RAW(VALUE h)
return (unsigned)ret;
}
-static inline bool
-RHASH_TRANSIENT_P(VALUE h)
-{
-#if USE_TRANSIENT_HEAP
- return FL_TEST_RAW(h, RHASH_TRANSIENT_FLAG);
-#else
- return false;
-#endif
-}
-
-static inline void
-RHASH_SET_TRANSIENT_FLAG(VALUE h)
-{
-#if USE_TRANSIENT_HEAP
- FL_SET_RAW(h, RHASH_TRANSIENT_FLAG);
-#endif
-}
-
-static inline void
-RHASH_UNSET_TRANSIENT_FLAG(VALUE h)
-{
-#if USE_TRANSIENT_HEAP
- FL_UNSET_RAW(h, RHASH_TRANSIENT_FLAG);
-#endif
-}
-
#endif /* INTERNAL_HASH_H */
diff --git a/ractor.c b/ractor.c
index 044d376235..77acf3a97f 100644
--- a/ractor.c
+++ b/ractor.c
@@ -3112,7 +3112,6 @@ obj_traverse_replace_rec(struct obj_traverse_replace_data *data)
#if USE_TRANSIENT_HEAP
void rb_ary_transient_heap_evacuate(VALUE ary, int promote);
void rb_obj_transient_heap_evacuate(VALUE obj, int promote);
-void rb_hash_transient_heap_evacuate(VALUE hash, int promote);
void rb_struct_transient_heap_evacuate(VALUE st, int promote);
#endif
@@ -3249,12 +3248,8 @@ obj_traverse_replace_i(VALUE obj, struct obj_traverse_replace_data *data)
RB_GC_GUARD(obj);
}
break;
-
case T_HASH:
{
-#if USE_TRANSIENT_HEAP
- if (data->move) rb_hash_transient_heap_evacuate(obj, TRUE);
-#endif
struct obj_traverse_replace_callback_data d = {
.stop = false,
.data = data,
diff --git a/test/ruby/test_gc_compact.rb b/test/ruby/test_gc_compact.rb
index bef2ba9605..b1b964589a 100644
--- a/test/ruby/test_gc_compact.rb
+++ b/test/ruby/test_gc_compact.rb
@@ -417,6 +417,26 @@ class TestGCCompact < Test::Unit::TestCase
end;
end
+ def test_moving_hashes_down_size_pools
+ omit if GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] == 1
+
+ assert_separately(%w[-robjspace], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV)
+ begin;
+ HASH_COUNT = 500
+
+ GC.verify_compaction_references(expand_heap: true, toward: :empty)
+
+ base_hash = { a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8 }
+ ary = HASH_COUNT.times.map { base_hash.dup }
+ ary.each { |h| h[:i] = 9 }
+
+ stats = GC.verify_compaction_references(expand_heap: true, toward: :empty)
+
+ assert_operator(stats[:moved_down][:T_HASH], :>=, HASH_COUNT)
+ assert_include(ObjectSpace.dump(ary[0]), '"slot_size":' + GC::INTERNAL_CONSTANTS[:BASE_SLOT_SIZE].to_s)
+ end;
+ end
+
def test_moving_objects_between_size_pools_keeps_shape_frozen_status
# [Bug #19536]
assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}")
diff --git a/test/ruby/test_shapes.rb b/test/ruby/test_shapes.rb
index 1d9159bac0..d9cce4a337 100644
--- a/test/ruby/test_shapes.rb
+++ b/test/ruby/test_shapes.rb
@@ -372,8 +372,10 @@ class TestShapes < Test::Unit::TestCase
assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of([]))
end
- def test_hash_has_root_shape
- assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of({}))
+ def test_hash_has_correct_pool_shape
+ # All hashes are now allocated their own ar_table, so start in a
+ # larger pool, and have already transitioned once.
+ assert_shape_equal(RubyVM::Shape.root_shape, RubyVM::Shape.of({}).parent)
end
def test_true_has_special_const_shape_id
diff --git a/transient_heap.c b/transient_heap.c
index 2340df4912..a3bf14fe0b 100644
--- a/transient_heap.c
+++ b/transient_heap.c
@@ -10,7 +10,6 @@
#include "internal.h"
#include "internal/array.h"
#include "internal/gc.h"
-#include "internal/hash.h"
#include "internal/sanitizers.h"
#include "internal/static_assert.h"
#include "internal/struct.h"
@@ -607,15 +606,6 @@ transient_heap_ptr(VALUE obj, int error)
ptr = rb_struct_const_heap_ptr(obj);
}
break;
- case T_HASH:
- if (RHASH_TRANSIENT_P(obj)) {
- TH_ASSERT(RHASH_AR_TABLE_P(obj));
- ptr = (VALUE *)(RHASH(obj)->as.ar);
- }
- else {
- ptr = NULL;
- }
- break;
default:
if (error) {
rb_bug("transient_heap_ptr: unknown obj %s\n", rb_obj_info(obj));
@@ -736,9 +726,6 @@ transient_heap_block_evacuate(struct transient_heap* theap, struct transient_hea
case T_STRUCT:
rb_struct_transient_heap_evacuate(obj, !TRANSIENT_HEAP_DEBUG_DONT_PROMOTE);
break;
- case T_HASH:
- rb_hash_transient_heap_evacuate(obj, !TRANSIENT_HEAP_DEBUG_DONT_PROMOTE);
- break;
default:
rb_bug("unsupported: %s\n", rb_obj_info(obj));
}
diff --git a/transient_heap.h b/transient_heap.h
index 4ac52aad07..6c6141f71a 100644
--- a/transient_heap.h
+++ b/transient_heap.h
@@ -42,7 +42,6 @@ int rb_transient_heap_managed_ptr_p(const void *ptr);
/* evacuate functions for each type */
void rb_ary_transient_heap_evacuate(VALUE ary, int promote);
void rb_obj_transient_heap_evacuate(VALUE obj, int promote);
-void rb_hash_transient_heap_evacuate(VALUE hash, int promote);
void rb_struct_transient_heap_evacuate(VALUE st, int promote);
#else /* USE_TRANSIENT_HEAP */
@@ -58,7 +57,6 @@ void rb_struct_transient_heap_evacuate(VALUE st, int promote);
#define rb_ary_transient_heap_evacuate(x, y) ((void)0)
#define rb_obj_transient_heap_evacuate(x, y) ((void)0)
-#define rb_hash_transient_heap_evacuate(x, y) ((void)0)
#define rb_struct_transient_heap_evacuate(x, y) ((void)0)
#endif /* USE_TRANSIENT_HEAP */