summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Valentine-House <matt@eightbitraptor.com>2022-04-06 09:55:23 +0100
committerAaron Patterson <aaron.patterson@gmail.com>2022-06-13 10:11:27 -0700
commit56cc3e99b6b9ec004255280337f6b8353f5e5b06 (patch)
tree2e5fd33e789155aa65a6b9329c334dff029b8349
parentf8502a26990c652a2c3c1131614230fec446ab25 (diff)
downloadruby-56cc3e99b6b9ec004255280337f6b8353f5e5b06.tar.gz
Move String RVALUES between pools
And re-embed any strings that can now fit inside the slot they've been moved to
-rw-r--r--gc.c102
-rw-r--r--include/ruby/internal/core/rstring.h3
-rw-r--r--internal/string.h4
-rw-r--r--string.c77
-rw-r--r--test/ruby/test_gc_compact.rb39
5 files changed, 194 insertions, 31 deletions
diff --git a/gc.c b/gc.c
index 42ccae0d11..b3b44e87a1 100644
--- a/gc.c
+++ b/gc.c
@@ -837,6 +837,8 @@ typedef struct rb_objspace {
struct {
size_t considered_count_table[T_MASK];
size_t moved_count_table[T_MASK];
+ size_t moved_up_count_table[T_MASK];
+ size_t moved_down_count_table[T_MASK];
size_t total_moved;
} rcompactor;
@@ -5091,7 +5093,7 @@ gc_setup_mark_bits(struct heap_page *page)
}
static int gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj);
-static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size);
+static VALUE gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size);
static void
lock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
@@ -5130,6 +5132,7 @@ unlock_page_body(rb_objspace_t *objspace, struct heap_page_body *body)
static bool
try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page, VALUE src)
{
+ struct heap_page *src_page = GET_HEAP_PAGE(src);
if (!free_page) {
return false;
}
@@ -5150,12 +5153,16 @@ try_move(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *free_page,
free_page->freelist = RANY(dest)->as.free.next;
GC_ASSERT(RB_BUILTIN_TYPE(dest) == T_NONE);
- GC_ASSERT(free_page->slot_size == GET_HEAP_PAGE(src)->slot_size);
+ if (src_page->slot_size > free_page->slot_size) {
+ objspace->rcompactor.moved_down_count_table[BUILTIN_TYPE(src)]++;
+ } else if (free_page->slot_size > src_page->slot_size) {
+ objspace->rcompactor.moved_up_count_table[BUILTIN_TYPE(src)]++;
+ }
objspace->rcompactor.moved_count_table[BUILTIN_TYPE(src)]++;
objspace->rcompactor.total_moved++;
- gc_move(objspace, src, dest, free_page->slot_size);
+ gc_move(objspace, src, dest, src_page->slot_size, free_page->slot_size);
gc_pin(objspace, src);
free_page->free_slots--;
}
@@ -5907,7 +5914,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
object = rb_gc_location(forwarding_object);
- gc_move(objspace, object, forwarding_object, page->slot_size);
+ gc_move(objspace, object, forwarding_object, GET_HEAP_PAGE(object)->slot_size, page->slot_size);
/* forwarding_object is now our actual object, and "object"
* is the free slot for the original page */
struct heap_page *orig_page = GET_HEAP_PAGE(object);
@@ -5976,6 +5983,8 @@ gc_compact_start(rb_objspace_t *objspace)
memset(objspace->rcompactor.considered_count_table, 0, T_MASK * sizeof(size_t));
memset(objspace->rcompactor.moved_count_table, 0, T_MASK * sizeof(size_t));
+ memset(objspace->rcompactor.moved_up_count_table, 0, T_MASK * sizeof(size_t));
+ memset(objspace->rcompactor.moved_down_count_table, 0, T_MASK * sizeof(size_t));
/* Set up read barrier for pages containing MOVED objects */
install_handlers();
@@ -8224,14 +8233,34 @@ gc_compact_heap_cursors_met_p(rb_heap_t *heap)
return heap->sweeping_page == heap->compact_cursor;
}
+static rb_size_pool_t *
+gc_compact_destination_pool(rb_objspace_t *objspace, rb_size_pool_t *src_pool, VALUE src)
+{
+ size_t obj_size;
+
+ switch (BUILTIN_TYPE(src)) {
+ case T_STRING:
+ obj_size = rb_str_size_as_embedded(src);
+ if (rb_gc_size_allocatable_p(obj_size)){
+ return &size_pools[size_pool_idx_for_size(obj_size)];
+ }
+ else {
+ GC_ASSERT(!STR_EMBED_P(src));
+ return &size_pools[0];
+ }
+ default:
+ return src_pool;
+ }
+}
+
static bool
-gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
+gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, rb_size_pool_t *size_pool, VALUE src)
{
GC_ASSERT(BUILTIN_TYPE(src) != T_MOVED);
- rb_heap_t *dheap = heap;
+ rb_heap_t *dheap = SIZE_POOL_EDEN_HEAP(gc_compact_destination_pool(objspace, size_pool, src));
if (gc_compact_heap_cursors_met_p(dheap)) {
- return false;
+ return dheap != heap;
}
while (!try_move(objspace, dheap, dheap->free_pages, src)) {
struct gc_sweep_context ctx = {
@@ -8254,7 +8283,7 @@ gc_compact_move(rb_objspace_t *objspace, rb_heap_t *heap, VALUE src)
}
static bool
-gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
+gc_compact_plane(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page) {
short slot_size = page->slot_size;
short slot_bits = slot_size / BASE_SLOT_SIZE;
GC_ASSERT(slot_bits > 0);
@@ -8266,7 +8295,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b
if (bitset & 1) {
objspace->rcompactor.considered_count_table[BUILTIN_TYPE(vp)]++;
- if (!gc_compact_move(objspace, heap, vp)) {
+ if (!gc_compact_move(objspace, heap, size_pool, vp)) {
//the cursors met. bubble up
return false;
}
@@ -8295,7 +8324,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
bitset = (mark_bits[0] & ~pin_bits[0]);
bitset >>= NUM_IN_PAGE(p);
if (bitset) {
- if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
+ if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
return false;
}
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
@@ -8303,7 +8332,7 @@ gc_compact_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *h
for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
bitset = (mark_bits[j] & ~pin_bits[j]);
if (bitset) {
- if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
+ if (!gc_compact_plane(objspace, size_pool, heap, (uintptr_t)p, bitset, page))
return false;
}
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
@@ -8347,7 +8376,6 @@ gc_sweep_compact(rb_objspace_t *objspace)
struct heap_page *start_page = heap->compact_cursor;
if (!gc_compact_page(objspace, size_pool, heap, start_page)) {
- GC_ASSERT(heap->sweeping_page == heap->compact_cursor);
lock_page_body(objspace, GET_PAGE_BODY(start_page->start));
continue;
@@ -9626,7 +9654,7 @@ gc_is_moveable_obj(rb_objspace_t *objspace, VALUE obj)
}
static VALUE
-gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
+gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t src_slot_size, size_t slot_size)
{
int marked;
int wb_unprotected;
@@ -9676,8 +9704,8 @@ gc_move(rb_objspace_t *objspace, VALUE scan, VALUE free, size_t slot_size)
}
/* Move the object */
- memcpy(dest, src, slot_size);
- memset(src, 0, slot_size);
+ memcpy(dest, src, MIN(src_slot_size, slot_size));
+ memset(src, 0, src_slot_size);
/* Set bits for object in new location */
if (marking) {
@@ -10271,23 +10299,31 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj)
break;
case T_STRING:
- if (STR_SHARED_P(obj)) {
+ {
#if USE_RVARGC
- VALUE orig_shared = any->as.string.as.heap.aux.shared;
#endif
- UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
+
+ if (STR_SHARED_P(obj)) {
#if USE_RVARGC
- VALUE shared = any->as.string.as.heap.aux.shared;
- if (STR_EMBED_P(shared)) {
- size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary;
- GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary);
- GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len);
- any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset;
- }
+ VALUE old_root = any->as.string.as.heap.aux.shared;
#endif
- }
- break;
+ UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
+#if USE_RVARGC
+ VALUE new_root = any->as.string.as.heap.aux.shared;
+ rb_str_update_shared_ary(obj, old_root, new_root);
+
+ // if, after move the string is not embedded, and can fit in the
+ // slot it's been placed in, then re-embed it
+ if ((size_t)GET_HEAP_PAGE(obj)->slot_size >= rb_str_size_as_embedded(obj)) {
+ if (!STR_EMBED_P(obj) && rb_str_reembeddable_p(obj)) {
+ rb_str_make_embedded(obj);
+ }
+ }
+#endif
+ }
+ break;
+ }
case T_DATA:
/* Call the compaction callback, if it exists */
{
@@ -10479,6 +10515,8 @@ gc_compact_stats(VALUE self)
VALUE h = rb_hash_new();
VALUE considered = rb_hash_new();
VALUE moved = rb_hash_new();
+ VALUE moved_up = rb_hash_new();
+ VALUE moved_down = rb_hash_new();
for (i=0; i<T_MASK; i++) {
if (objspace->rcompactor.considered_count_table[i]) {
@@ -10488,10 +10526,20 @@ gc_compact_stats(VALUE self)
if (objspace->rcompactor.moved_count_table[i]) {
rb_hash_aset(moved, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_count_table[i]));
}
+
+ if (objspace->rcompactor.moved_up_count_table[i]) {
+ rb_hash_aset(moved_up, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_up_count_table[i]));
+ }
+
+ if (objspace->rcompactor.moved_down_count_table[i]) {
+ rb_hash_aset(moved_down, type_sym(i), SIZET2NUM(objspace->rcompactor.moved_down_count_table[i]));
+ }
}
rb_hash_aset(h, ID2SYM(rb_intern("considered")), considered);
rb_hash_aset(h, ID2SYM(rb_intern("moved")), moved);
+ rb_hash_aset(h, ID2SYM(rb_intern("moved_up")), moved_up);
+ rb_hash_aset(h, ID2SYM(rb_intern("moved_down")), moved_down);
return h;
}
diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h
index e394ab7dca..7b4953ab8c 100644
--- a/include/ruby/internal/core/rstring.h
+++ b/include/ruby/internal/core/rstring.h
@@ -556,6 +556,9 @@ RSTRING_LENINT(VALUE str)
return rb_long2int(RSTRING_LEN(str));
}
+bool
+rb_str_shared_root_p(VALUE str);
+
/**
* Convenient macro to obtain the contents and length at once.
*
diff --git a/internal/string.h b/internal/string.h
index 18b01862f7..8fb9553d03 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -59,6 +59,10 @@ void rb_str_tmp_frozen_release(VALUE str, VALUE tmp);
VALUE rb_setup_fake_str(struct RString *fake_str, const char *name, long len, rb_encoding *enc);
VALUE rb_str_upto_each(VALUE, VALUE, int, int (*each)(VALUE, VALUE), VALUE);
VALUE rb_str_upto_endless_each(VALUE, int (*each)(VALUE, VALUE), VALUE);
+void rb_str_make_embedded(VALUE);
+size_t rb_str_size_as_embedded(VALUE);
+bool rb_str_reembeddable_p(VALUE);
+void rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root);
RUBY_SYMBOL_EXPORT_END
MJIT_SYMBOL_EXPORT_BEGIN
diff --git a/string.c b/string.c
index 84af905deb..4dc462f5c4 100644
--- a/string.c
+++ b/string.c
@@ -221,17 +221,51 @@ str_embed_capa(VALUE str)
#endif
}
+bool
+rb_str_reembeddable_p(VALUE str)
+{
+ return !FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
+}
+
static inline size_t
-str_embed_size(long capa)
+rb_str_embed_size(long capa)
{
return offsetof(struct RString, as.embed.ary) + capa;
}
+bool
+rb_str_shared_root_p(VALUE str)
+{
+ return FL_TEST_RAW(str, STR_SHARED_ROOT);
+}
+
+size_t
+rb_str_size_as_embedded(VALUE str)
+{
+ size_t real_size;
+#if USE_RVARGC
+ if (STR_EMBED_P(str)) {
+ real_size = rb_str_embed_size(RSTRING(str)->as.embed.len) + TERM_LEN(str);
+ }
+ /* if the string is not currently embedded, but it can be embedded, how
+ * much space would it require */
+ else if (rb_str_reembeddable_p(str)) {
+ real_size = rb_str_embed_size(RSTRING(str)->as.heap.len) + TERM_LEN(str);
+ }
+ else {
+#endif
+ real_size = sizeof(struct RString);
+#if USE_RVARGC
+ }
+#endif
+ return real_size;
+}
+
static inline bool
STR_EMBEDDABLE_P(long len, long termlen)
{
#if USE_RVARGC
- return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
+ return rb_gc_size_allocatable_p(rb_str_embed_size(len + termlen));
#else
return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen;
#endif
@@ -265,6 +299,41 @@ rb_str_make_independent(VALUE str)
}
void
+rb_str_make_embedded(VALUE str) {
+ RUBY_ASSERT(rb_str_reembeddable_p(str));
+ RUBY_ASSERT(!STR_EMBED_P(str));
+
+ char *buf = RSTRING_PTR(str);
+ long len = RSTRING_LEN(str);
+
+ STR_SET_EMBED(str);
+ STR_SET_EMBED_LEN(str, len);
+
+ memmove(RSTRING_PTR(str), buf, len);
+ ruby_xfree(buf);
+}
+
+void
+rb_str_update_shared_ary(VALUE str, VALUE old_root, VALUE new_root)
+{
+ // if the root location hasn't changed, we don't need to update
+ if (new_root == old_root) {
+ return;
+ }
+
+ // if the root string isn't embedded, we don't need to touch the ponter.
+ // it already points to the shame shared buffer
+ if (!STR_EMBED_P(new_root)) {
+ return;
+ }
+
+ size_t offset = (size_t)((uintptr_t)RSTRING(str)->as.heap.ptr - (uintptr_t)RSTRING(old_root)->as.embed.ary);
+
+ RUBY_ASSERT(RSTRING(str)->as.heap.ptr >= RSTRING(old_root)->as.embed.ary);
+ RSTRING(str)->as.heap.ptr = RSTRING(new_root)->as.embed.ary + offset;
+}
+
+void
rb_debug_rstring_null_ptr(const char *func)
{
fprintf(stderr, "%s is returning NULL!! "
@@ -849,7 +918,7 @@ str_alloc(VALUE klass, size_t size)
static inline VALUE
str_alloc_embed(VALUE klass, size_t capa)
{
- size_t size = str_embed_size(capa);
+ size_t size = rb_str_embed_size(capa);
assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC
assert(size <= sizeof(struct RString));
@@ -1693,7 +1762,7 @@ ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size)
static inline VALUE
ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa)
{
- size_t size = str_embed_size(capa);
+ size_t size = rb_str_embed_size(capa);
assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC
assert(size <= sizeof(struct RString));
diff --git a/test/ruby/test_gc_compact.rb b/test/ruby/test_gc_compact.rb
index 70b4b6ea27..c312ac1ea2 100644
--- a/test/ruby/test_gc_compact.rb
+++ b/test/ruby/test_gc_compact.rb
@@ -208,4 +208,43 @@ class TestGCCompact < Test::Unit::TestCase
assert_equal([:call, :line], results)
end
+
+ def test_moving_strings_between_size_pools
+ assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}", timeout: 10, signal: :SEGV)
+ begin;
+ moveables = []
+ small_slots = []
+ large_slots = []
+
+ # Ensure fragmentation in the large heap
+ base_slot_size = GC.stat_heap[0].fetch(:slot_size)
+ 500.times {
+ String.new(+"a" * base_slot_size).downcase
+ large_slots << String.new(+"a" * base_slot_size).downcase
+ }
+
+ # Ensure fragmentation in the smaller heap
+ 500.times {
+ small_slots << Object.new
+ Object.new
+ }
+
+ 500.times {
+ # strings are created as shared strings when initialized from literals
+ # use downcase to force the creation of an embedded string (it calls
+ # rb_str_new internally)
+ moveables << String.new(+"a" * base_slot_size).downcase
+
+ moveables << String.new("a").downcase
+ }
+ moveables.map { |s| s << ("bc" * base_slot_size) }
+ moveables.map { |s| s.squeeze! }
+ stats = GC.compact
+
+ moved_strings = (stats.dig(:moved_up, :T_STRING) || 0) +
+ (stats.dig(:moved_down, :T_STRING) || 0)
+
+ assert_operator(moved_strings, :>, 0)
+ end;
+ end
end