diff options
author | Matt Valentine-House <matt@eightbitraptor.com> | 2022-04-06 09:55:23 +0100 |
---|---|---|
committer | Aaron Patterson <aaron.patterson@gmail.com> | 2022-06-13 10:11:27 -0700 |
commit | 56cc3e99b6b9ec004255280337f6b8353f5e5b06 (patch) | |
tree | 2e5fd33e789155aa65a6b9329c334dff029b8349 | |
parent | f8502a26990c652a2c3c1131614230fec446ab25 (diff) | |
download | ruby-56cc3e99b6b9ec004255280337f6b8353f5e5b06.tar.gz |
Move String RVALUES between pools
And re-embed any strings that can now fit inside the slot they've been
moved to
-rw-r--r-- | gc.c | 102 | ||||
-rw-r--r-- | include/ruby/internal/core/rstring.h | 3 | ||||
-rw-r--r-- | internal/string.h | 4 | ||||
-rw-r--r-- | string.c | 77 | ||||
-rw-r--r-- | test/ruby/test_gc_compact.rb | 39 |
5 files changed, 194 insertions, 31 deletions
@@ -837,6 +837,8 @@ typedef struct rb_objspace { struct { size_t considered_count_table[T_MASK]; size_t moved_count_table[T_MASK]; + size_t moved_up_count_table[T_MASK]; + size_t moved_down_count_table[T_MASK]; size_t total_moved; } rcompactor; @@ -5091,7 +5093,7 @@ gc_setup_mark_bits(struct heap_page *page) } static int gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj); -static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size); +static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size); static void lock_page_body(rb_objspace_t *objspace, struct heap_page_body *body) @@ -5130,6 +5132,7 @@ unlock_page_body(rb_objspace_t *objspace, struct heap_page_body *body) static bool try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page, VALUE src) { + struct heap_page *src_page = GET_HEAP_PAGE(src); if (!free_page) { return false; } @@ -5150,12 +5153,16 @@ try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page, free_page->freelist = RANY(dest)->as.free.next; GC_ASSERT(RB_BUILTIN_TYPE(dest) == T_NONE); - GC_ASSERT(free_page->slot_size == GET_HEAP_PAGE(src)->slot_size); + if (src_page->slot_size > free_page->slot_size) { + objspace->rcompactor.moved_down_count_table[BUILTIN_TYPE(src)]++; + } else if (free_page->slot_size > src_page->slot_size) { + objspace->rcompactor.moved_up_count_table[BUILTIN_TYPE(src)]++; + } objspace->rcompactor.moved_count_table[BUILTIN_TYPE(src)]++; objspace->rcompactor.total_moved++; - gc_move(objspace, src, dest, free_page->slot_size); + gc_move(objspace, src, dest, src_page->slot_size, free_page->slot_size); gc_pin(objspace, src); free_page->free_slots--; } @@ -5907,7 +5914,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_ object = rb_gc_location(forwarding_object); - gc_move(objspace, object, forwarding_object, page->slot_size); + gc_move(objspace, object, forwarding_object, GET_HEAP_PAGE(object)->slot_size, page->slot_size); /* forwarding_object is now our actual object, and "object" * is the free slot for the original page */ struct heap_page *orig_page = GET_HEAP_PAGE(object); @@ -5976,6 +5983,8 @@ gc_compact_start(rb_objspace_t *objspace) memset(objspace->rcompactor.considered_count_table, 0, T_MASK * sizeof(size_t)); memset(objspace->rcompactor.moved_count_table, 0, T_MASK * sizeof(size_t)); + memset(objspace->rcompactor.moved_up_count_table, 0, T_MASK * sizeof(size_t)); + memset(objspace->rcompactor.moved_down_count_table, 0, T_MASK * sizeof(size_t)); /* Set up read barrier for pages containing MOVED objects */ install_handlers(); @@ -8224,14 +8233,34 @@ gc_compact_heap_cursors_met_p(rb_heap_t *heap) return heap->sweeping_page == heap->compact_cursor; } +static rb_size_pool_t * +gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, VALUE src) +{ + size_t obj_size; + + switch (BUILTIN_TYPE(src)) { + case T_STRING: + obj_size = rb_str_size_as_embedded(src); + if (rb_gc_size_allocatable_p(obj_size)){ + return &size_pools[size_pool_idx_for_size(obj_size)]; + } + else { + GC_ASSERT(!STR_EMBED_P(src)); + return &size_pools[0]; + } + default: + return src_pool; + } +} + static bool -gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src) +gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, rb_size_pool_t *size_pool, VALUE src) { GC_ASSERT(BUILTIN_TYPE(src) != T_MOVED); - rb_heap_t *dheap = heap; + rb_heap_t *dheap = SIZE_POOL_EDEN_HEAP(gc_compact_destination_pool(objspace, size_pool, src)); if (gc_compact_heap_cursors_met_p(dheap)) { - return false; + return dheap != heap; } while (!try_move(objspace, dheap, dheap->free_pages, src)) { struct gc_sweep_context ctx = { @@ -8254,7 +8283,7 @@ gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src) } static bool -gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) { +gc_compact_plane(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) { short slot_size = page->slot_size; short slot_bits = slot_size / BASE_SLOT_SIZE; GC_ASSERT(slot_bits > 0); @@ -8266,7 +8295,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b if (bitset & 1) { objspace->rcompactor.considered_count_table[BUILTIN_TYPE(vp)]++; - if (!gc_compact_move(objspace, heap, vp)) { + if (!gc_compact_move(objspace, heap, size_pool, vp)) { //the cursors met. bubble up return false; } @@ -8295,7 +8324,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h bitset = (mark_bits[0] & ~pin_bits[0]); bitset >>= NUM_IN_PAGE(p); if (bitset) { - if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page)) + if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page)) return false; } p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE; @@ -8303,7 +8332,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) { bitset = (mark_bits[j] & ~pin_bits[j]); if (bitset) { - if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page)) + if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page)) return false; } p += BITS_BITLENGTH * BASE_SLOT_SIZE; @@ -8347,7 +8376,6 @@ gc_sweep_compact(rb_objspace_t *objspace) struct heap_page *start_page = heap->compact_cursor; if (!gc_compact_page(objspace, size_pool, heap, start_page)) { - GC_ASSERT(heap->sweeping_page == heap->compact_cursor); lock_page_body(objspace, GET_PAGE_BODY(start_page->start)); continue; @@ -9626,7 +9654,7 @@ gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj) } static VALUE -gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size) +gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size) { int marked; int wb_unprotected; @@ -9676,8 +9704,8 @@ gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size) } /* Move the object */ - memcpy(dest, src, slot_size); - memset(src, 0, slot_size); + memcpy(dest, src, MIN(src_slot_size, slot_size)); + memset(src, 0, src_slot_size); /* Set bits for object in new location */ if (marking) { @@ -10271,23 +10299,31 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj) break; case T_STRING: - if (STR_SHARED_P(obj)) { + { #if USE_RVARGC - VALUE orig_shared = any->as.string.as.heap.aux.shared; #endif - UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared); + + if (STR_SHARED_P(obj)) { #if USE_RVARGC - VALUE shared = any->as.string.as.heap.aux.shared; - if (STR_EMBED_P(shared)) { - size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary; - GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary); - GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len); - any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset; - } + VALUE old_root = any->as.string.as.heap.aux.shared; #endif - } - break; + UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared); +#if USE_RVARGC + VALUE new_root = any->as.string.as.heap.aux.shared; + rb_str_update_shared_ary(obj, old_root, new_root); + + // if, after move the string is not embedded, and can fit in the + // slot it's been placed in, then re-embed it + if ((size_t)GET_HEAP_PAGE(obj)->slot_size >= rb_str_size_as_embedded(obj)) { + if (!STR_EMBED_P(obj) && rb_str_reembeddable_p(obj)) { + rb_str_make_embedded(obj); + } + } +#endif + } + break; + } case T_DATA: /* Call the compaction callback, if it exists */ { @@ -10479,6 +10515,8 @@ gc_compact_stats(VALUE self) VALUE h = rb_hash_new(); VALUE considered = rb_hash_new(); VALUE moved = rb_hash_new(); + VALUE moved_up = rb_hash_new(); + VALUE moved_down = rb_hash_new(); for (i=0; i<T_MASK; i++) { if (objspace->rcompactor.considered_count_table[i]) { @@ -10488,10 +10526,20 @@ gc_compact_stats(VALUE self) if (objspace->rcompactor.moved_count_table[i]) { rb_hash_aset(moved, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_count_table[i])); } + + if (objspace->rcompactor.moved_up_count_table[i]) { + rb_hash_aset(moved_up, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_up_count_table[i])); + } + + if (objspace->rcompactor.moved_down_count_table[i]) { + rb_hash_aset(moved_down, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_down_count_table[i])); + } } rb_hash_aset(h, ID2SYM(rb_intern("considered")), considered); rb_hash_aset(h, ID2SYM(rb_intern("moved")), moved); + rb_hash_aset(h, ID2SYM(rb_intern("moved_up")), moved_up); + rb_hash_aset(h, ID2SYM(rb_intern("moved_down")), moved_down); return h; } diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h index e394ab7dca..7b4953ab8c 100644 --- a/include/ruby/internal/core/rstring.h +++ b/include/ruby/internal/core/rstring.h @@ -556,6 +556,9 @@ RSTRING_LENINT(VALUE str) return rb_long2int(RSTRING_LEN(str)); } +bool +rb_str_shared_root_p(VALUE str); + /** * Convenient macro to obtain the contents and length at once. * diff --git a/internal/string.h b/internal/string.h index 18b01862f7..8fb9553d03 100644 --- a/internal/string.h +++ b/internal/string.h @@ -59,6 +59,10 @@ void rb_str_tmp_frozen_release(VALUE str, VALUE tmp); VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc); VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE); VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE); +void rb_str_make_embedded(VALUE); +size_t rb_str_size_as_embedded(VALUE); +bool rb_str_reembeddable_p(VALUE); +void rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root); RUBY_SYMBOL_EXPORT_END MJIT_SYMBOL_EXPORT_BEGIN @@ -221,17 +221,51 @@ str_embed_capa(VALUE str) #endif } +bool +rb_str_reembeddable_p(VALUE str) +{ + return !FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED); +} + static inline size_t -str_embed_size(long capa) +rb_str_embed_size(long capa) { return offsetof(struct RString, as.embed.ary) + capa; } +bool +rb_str_shared_root_p(VALUE str) +{ + return FL_TEST_RAW(str, STR_SHARED_ROOT); +} + +size_t +rb_str_size_as_embedded(VALUE str) +{ + size_t real_size; +#if USE_RVARGC + if (STR_EMBED_P(str)) { + real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str); + } + /* if the string is not currently embedded, but it can be embedded, how + * much space would it require */ + else if (rb_str_reembeddable_p(str)) { + real_size = rb_str_embed_size(RSTRING(str)->as.heap.len) + TERM_LEN(str); + } + else { +#endif + real_size = sizeof(struct RString); +#if USE_RVARGC + } +#endif + return real_size; +} + static inline bool STR_EMBEDDABLE_P(long len, long termlen) { #if USE_RVARGC - return rb_gc_size_allocatable_p(str_embed_size(len + termlen)); + return rb_gc_size_allocatable_p(rb_str_embed_size(len + termlen)); #else return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen; #endif @@ -265,6 +299,41 @@ rb_str_make_independent(VALUE str) } void +rb_str_make_embedded(VALUE str) { + RUBY_ASSERT(rb_str_reembeddable_p(str)); + RUBY_ASSERT(!STR_EMBED_P(str)); + + char *buf = RSTRING_PTR(str); + long len = RSTRING_LEN(str); + + STR_SET_EMBED(str); + STR_SET_EMBED_LEN(str, len); + + memmove(RSTRING_PTR(str), buf, len); + ruby_xfree(buf); +} + +void +rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root) +{ + // if the root location hasn't changed, we don't need to update + if (new_root == old_root) { + return; + } + + // if the root string isn't embedded, we don't need to touch the ponter. + // it already points to the shame shared buffer + if (!STR_EMBED_P(new_root)) { + return; + } + + size_t offset = (size_t)((uintptr_t)RSTRING(str)->as.heap.ptr - (uintptr_t)RSTRING(old_root)->as.embed.ary); + + RUBY_ASSERT(RSTRING(str)->as.heap.ptr >= RSTRING(old_root)->as.embed.ary); + RSTRING(str)->as.heap.ptr = RSTRING(new_root)->as.embed.ary + offset; +} + +void rb_debug_rstring_null_ptr(const char *func) { fprintf(stderr, "%s is returning NULL!! " @@ -849,7 +918,7 @@ str_alloc(VALUE klass, size_t size) static inline VALUE str_alloc_embed(VALUE klass, size_t capa) { - size_t size = str_embed_size(capa); + size_t size = rb_str_embed_size(capa); assert(rb_gc_size_allocatable_p(size)); #if !USE_RVARGC assert(size <= sizeof(struct RString)); @@ -1693,7 +1762,7 @@ ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size) static inline VALUE ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa) { - size_t size = str_embed_size(capa); + size_t size = rb_str_embed_size(capa); assert(rb_gc_size_allocatable_p(size)); #if !USE_RVARGC assert(size <= sizeof(struct RString)); diff --git a/test/ruby/test_gc_compact.rb b/test/ruby/test_gc_compact.rb index 70b4b6ea27..c312ac1ea2 100644 --- a/test/ruby/test_gc_compact.rb +++ b/test/ruby/test_gc_compact.rb @@ -208,4 +208,43 @@ class TestGCCompact < Test::Unit::TestCase assert_equal([:call, :line], results) end + + def test_moving_strings_between_size_pools + assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV) + begin; + moveables = [] + small_slots = [] + large_slots = [] + + # Ensure fragmentation in the large heap + base_slot_size = GC.stat_heap[0].fetch(:slot_size) + 500.times { + String.new(+"a" * base_slot_size).downcase + large_slots << String.new(+"a" * base_slot_size).downcase + } + + # Ensure fragmentation in the smaller heap + 500.times { + small_slots << Object.new + Object.new + } + + 500.times { + # strings are created as shared strings when initialized from literals + # use downcase to force the creation of an embedded string (it calls + # rb_str_new internally) + moveables << String.new(+"a" * base_slot_size).downcase + + moveables << String.new("a").downcase + } + moveables.map { |s| s << ("bc" * base_slot_size) } + moveables.map { |s| s.squeeze! } + stats = GC.compact + + moved_strings = (stats.dig(:moved_up, :T_STRING) || 0) + + (stats.dig(:moved_down, :T_STRING) || 0) + + assert_operator(moved_strings, :>, 0) + end; + end end |