summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2021-08-26 10:06:32 -0400
committerPeter Zhu <peter@peterzhu.ca>2021-10-25 13:26:23 -0400
commita5b6598192c30187b19b892af3110a46f6a70d76 (patch)
tree4620f69a10659deb6f278b36c10ec7915194573e
parent6374be5a8188ff5ed2c70b9f1d76672c87a0eda7 (diff)
downloadruby-a5b6598192c30187b19b892af3110a46f6a70d76.tar.gz
[Feature #18239] Implement VWA for strings
This commit adds support for embedded strings with variable capacity and uses Variable Width Allocation to allocate strings.
-rw-r--r--debug.c2
-rw-r--r--ext/-test-/string/capacity.c9
-rw-r--r--ext/-test-/string/cstr.c10
-rw-r--r--gc.c145
-rw-r--r--gc.rb10
-rw-r--r--include/ruby/internal/config.h4
-rw-r--r--include/ruby/internal/core/rstring.h20
-rw-r--r--internal/gc.h27
-rwxr-xr-xmisc/lldb_cruby.py3
-rw-r--r--ruby.c7
-rw-r--r--spec/ruby/optional/capi/string_spec.rb12
-rw-r--r--string.c323
-rw-r--r--test/-ext-/string/test_capacity.rb37
-rw-r--r--test/-ext-/string/test_rb_str_dup.rb6
-rw-r--r--test/objspace/test_objspace.rb4
-rw-r--r--transcode.c4
16 files changed, 452 insertions, 171 deletions
diff --git a/debug.c b/debug.c
index 52bd0f7fb7..a5e6ce475a 100644
--- a/debug.c
+++ b/debug.c
@@ -56,7 +56,9 @@ const union {
enum ruby_robject_consts robject_consts;
enum ruby_rmodule_flags rmodule_flags;
enum ruby_rstring_flags rstring_flags;
+#if !USE_RVARGC
enum ruby_rstring_consts rstring_consts;
+#endif
enum ruby_rarray_flags rarray_flags;
enum ruby_rarray_consts rarray_consts;
enum {
diff --git a/ext/-test-/string/capacity.c b/ext/-test-/string/capacity.c
index cb8d2c2b3a..33b2023fd3 100644
--- a/ext/-test-/string/capacity.c
+++ b/ext/-test-/string/capacity.c
@@ -4,10 +4,11 @@
static VALUE
bug_str_capacity(VALUE klass, VALUE str)
{
- return
- STR_EMBED_P(str) ? INT2FIX(RSTRING_EMBED_LEN_MAX) : \
- STR_SHARED_P(str) ? INT2FIX(0) : \
- LONG2FIX(RSTRING(str)->as.heap.aux.capa);
+ if (!STR_EMBED_P(str) && STR_SHARED_P(str)) {
+ return INT2FIX(0);
+ }
+
+ return LONG2FIX(rb_str_capacity(str));
}
void
diff --git a/ext/-test-/string/cstr.c b/ext/-test-/string/cstr.c
index 4f837998d7..1eadb8b4fd 100644
--- a/ext/-test-/string/cstr.c
+++ b/ext/-test-/string/cstr.c
@@ -62,9 +62,13 @@ bug_str_unterminated_substring(VALUE str, VALUE vbeg, VALUE vlen)
if (RSTRING_LEN(str) < beg + len) rb_raise(rb_eIndexError, "end: %ld", beg + len);
str = rb_str_new_shared(str);
if (STR_EMBED_P(str)) {
+#if USE_RVARGC
+ RSTRING(str)->as.embed.len = (short)len;
+#else
RSTRING(str)->basic.flags &= ~RSTRING_EMBED_LEN_MASK;
RSTRING(str)->basic.flags |= len << RSTRING_EMBED_LEN_SHIFT;
- memmove(RSTRING(str)->as.ary, RSTRING(str)->as.ary + beg, len);
+#endif
+ memmove(RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.ary + beg, len);
}
else {
RSTRING(str)->as.heap.ptr += beg;
@@ -112,7 +116,11 @@ bug_str_s_cstr_noembed(VALUE self, VALUE str)
Check_Type(str, T_STRING);
FL_SET((str2), STR_NOEMBED);
memcpy(buf, RSTRING_PTR(str), capacity);
+#if USE_RVARGC
+ RBASIC(str2)->flags &= ~(STR_SHARED | FL_USER5 | FL_USER6);
+#else
RBASIC(str2)->flags &= ~RSTRING_EMBED_LEN_MASK;
+#endif
RSTRING(str2)->as.heap.aux.capa = capacity;
RSTRING(str2)->as.heap.ptr = buf;
RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
diff --git a/gc.c b/gc.c
index 04337e4440..0c739ba709 100644
--- a/gc.c
+++ b/gc.c
@@ -888,6 +888,7 @@ static const bool USE_MMAP_ALIGNED_ALLOC = false;
#endif
struct heap_page {
+ short slot_size;
short total_slots;
short free_slots;
short pinned_slots;
@@ -1849,7 +1850,7 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj
if (RGENGC_CHECK_MODE &&
/* obj should belong to page */
!(&page->start[0] <= (RVALUE *)obj &&
- (uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size)) &&
+ (uintptr_t)obj < ((uintptr_t)page->start + (page->total_slots * page->slot_size)) &&
obj % sizeof(RVALUE) == 0)) {
rb_bug("heap_page_add_freeobj: %p is not rvalue.", (void *)p);
}
@@ -1938,7 +1939,7 @@ heap_pages_free_unused_pages(rb_objspace_t *objspace)
}
struct heap_page *hipage = heap_pages_sorted[heap_allocated_pages - 1];
- uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->size_pool->slot_size);
+ uintptr_t himem = (uintptr_t)hipage->start + (hipage->total_slots * hipage->slot_size);
GC_ASSERT(himem <= (uintptr_t)heap_pages_himem);
heap_pages_himem = (RVALUE *)himem;
@@ -2034,6 +2035,7 @@ heap_page_allocate(rb_objspace_t *objspace, rb_size_pool_t *size_pool)
page->start = (RVALUE *)start;
page->total_slots = limit;
+ page->slot_size = size_pool->slot_size;
page->size_pool = size_pool;
page_body->header.page = page;
@@ -2091,7 +2093,6 @@ heap_add_page(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea
{
/* Adding to eden heap during incremental sweeping is forbidden */
GC_ASSERT(!(heap == SIZE_POOL_EDEN_HEAP(size_pool) && heap->sweeping_page));
- GC_ASSERT(page->size_pool == size_pool);
page->flags.in_tomb = (heap == SIZE_POOL_TOMB_HEAP(size_pool));
list_add_tail(&heap->pages, &page->page_node);
heap->total_pages++;
@@ -2324,18 +2325,37 @@ static inline void heap_add_freepage(rb_heap_t *heap, struct heap_page *page);
static struct heap_page *heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap);
static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page);
-#if USE_RVARGC
-void *
-rb_gc_rvargc_object_data(VALUE obj)
+size_t
+rb_gc_obj_slot_size(VALUE obj)
{
- return (void *)(obj + sizeof(RVALUE));
+ return GET_HEAP_PAGE(obj)->slot_size;
}
+
+static inline size_t
+size_pool_slot_size(char pool_id)
+{
+ GC_ASSERT(pool_id < SIZE_POOL_COUNT);
+
+ size_t slot_size = (1 << pool_id) * sizeof(RVALUE);
+
+#if RGENGC_CHECK_MODE
+ rb_objspace_t *objspace = &rb_objspace;
+ GC_ASSERT(size_pools[pool_id].slot_size == slot_size);
#endif
+ return slot_size;
+}
+
+bool
+rb_gc_size_allocatable_p(size_t size)
+{
+ return size <= size_pool_slot_size(SIZE_POOL_COUNT - 1);
+}
+
static inline VALUE
ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size)
{
- if (size != sizeof(RVALUE)) {
+ if (size > sizeof(RVALUE)) {
return Qfalse;
}
@@ -2409,6 +2429,25 @@ newobj_fill(VALUE obj, VALUE v1, VALUE v2, VALUE v3)
}
#if USE_RVARGC
+static inline rb_size_pool_t *
+size_pool_for_size(rb_objspace_t *objspace, size_t size)
+{
+ size_t slot_count = CEILDIV(size, sizeof(RVALUE));
+
+ /* size_pool_idx is ceil(log2(slot_count)) */
+ size_t size_pool_idx = 64 - nlz_int64(slot_count - 1);
+ if (size_pool_idx >= SIZE_POOL_COUNT) {
+ rb_bug("size_pool_for_size: allocation size too large");
+ }
+
+ rb_size_pool_t *size_pool = &size_pools[size_pool_idx];
+ GC_ASSERT(size_pool->slot_size >= (short)size);
+ GC_ASSERT(size_pool_idx == 0 || size_pools[size_pool_idx - 1].slot_size < (short)size);
+
+ return size_pool;
+}
+
+
static inline VALUE
heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap)
{
@@ -2430,25 +2469,6 @@ heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *
return (VALUE)p;
}
-
-static inline rb_size_pool_t *
-size_pool_for_size(rb_objspace_t *objspace, size_t size)
-{
- size_t slot_count = CEILDIV(size, sizeof(RVALUE));
-
- /* size_pool_idx is ceil(log2(slot_count)) */
- size_t size_pool_idx = 64 - nlz_int64(slot_count - 1);
- GC_ASSERT(size_pool_idx > 0);
- if (size_pool_idx >= SIZE_POOL_COUNT) {
- rb_bug("size_pool_for_size: allocation size too large");
- }
-
- rb_size_pool_t *size_pool = &size_pools[size_pool_idx];
- GC_ASSERT(size_pool->slot_size >= (short)size);
- GC_ASSERT(size_pools[size_pool_idx - 1].slot_size < (short)size);
-
- return size_pool;
-}
#endif
ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size));
@@ -2574,7 +2594,6 @@ VALUE
rb_wb_unprotected_newobj_of(VALUE klass, VALUE flags, size_t size)
{
GC_ASSERT((flags & FL_WB_PROTECTED) == 0);
- size = size + sizeof(RVALUE);
return newobj_of(klass, flags, 0, 0, 0, FALSE, size);
}
@@ -2582,7 +2601,6 @@ VALUE
rb_wb_protected_newobj_of(VALUE klass, VALUE flags, size_t size)
{
GC_ASSERT((flags & FL_WB_PROTECTED) == 0);
- size = size + sizeof(RVALUE);
return newobj_of(klass, flags, 0, 0, 0, TRUE, size);
}
@@ -2590,7 +2608,6 @@ VALUE
rb_ec_wb_protected_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, size_t size)
{
GC_ASSERT((flags & FL_WB_PROTECTED) == 0);
- size = size + sizeof(RVALUE);
return newobj_of_cr(rb_ec_ractor_ptr(ec), klass, flags, 0, 0, 0, TRUE, size);
}
@@ -2830,14 +2847,14 @@ is_pointer_to_heap(rb_objspace_t *objspace, void *ptr)
mid = (lo + hi) / 2;
page = heap_pages_sorted[mid];
if (page->start <= p) {
- if ((uintptr_t)p < ((uintptr_t)page->start + (page->total_slots * page->size_pool->slot_size))) {
+ if ((uintptr_t)p < ((uintptr_t)page->start + (page->total_slots * page->slot_size))) {
RB_DEBUG_COUNTER_INC(gc_isptr_maybe);
if (page->flags.in_tomb) {
return FALSE;
}
else {
- if ((NUM_IN_PAGE(p) * sizeof(RVALUE)) % page->size_pool->slot_size != 0) return FALSE;
+ if ((NUM_IN_PAGE(p) * sizeof(RVALUE)) % page->slot_size != 0) return FALSE;
return TRUE;
}
@@ -4183,7 +4200,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace)
/* run data/file object's finalizers */
for (i = 0; i < heap_allocated_pages; i++) {
struct heap_page *page = heap_pages_sorted[i];
- short stride = page->size_pool->slot_size;
+ short stride = page->slot_size;
uintptr_t p = (uintptr_t)page->start;
uintptr_t pend = p + page->total_slots * stride;
@@ -4780,13 +4797,13 @@ count_objects(int argc, VALUE *argv, VALUE os)
for (i = 0; i < heap_allocated_pages; i++) {
struct heap_page *page = heap_pages_sorted[i];
- short stride = page->size_pool->slot_size;
+ short stride = page->slot_size;
uintptr_t p = (uintptr_t)page->start;
uintptr_t pend = p + page->total_slots * stride;
for (;p < pend; p += stride) {
VALUE vp = (VALUE)p;
- GC_ASSERT((NUM_IN_PAGE(vp) * sizeof(RVALUE)) % page->size_pool->slot_size == 0);
+ GC_ASSERT((NUM_IN_PAGE(vp) * sizeof(RVALUE)) % page->slot_size == 0);
void *poisoned = asan_poisoned_object_p(vp);
asan_unpoison_object(vp, false);
@@ -4916,7 +4933,7 @@ try_move_in_plane(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa
from_freelist = true;
}
- gc_move(objspace, (VALUE)p, dest, page->size_pool->slot_size);
+ gc_move(objspace, (VALUE)p, dest, page->slot_size);
gc_pin(objspace, (VALUE)p);
heap->compact_cursor_index = (RVALUE *)p;
if (from_freelist) {
@@ -5216,7 +5233,7 @@ gc_fill_swept_page_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p,
struct heap_page * sweep_page = ctx->page;
if (bitset) {
- short slot_size = sweep_page->size_pool->slot_size;
+ short slot_size = sweep_page->slot_size;
short slot_bits = slot_size / sizeof(RVALUE);
do {
@@ -5307,7 +5324,7 @@ static inline void
gc_plane_sweep(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct gc_sweep_context *ctx)
{
struct heap_page * sweep_page = ctx->page;
- short slot_size = sweep_page->size_pool->slot_size;
+ short slot_size = sweep_page->slot_size;
short slot_bits = slot_size / sizeof(RVALUE);
GC_ASSERT(slot_bits > 0);
@@ -5385,7 +5402,6 @@ static inline void
gc_page_sweep(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap, struct gc_sweep_context *ctx)
{
struct heap_page *sweep_page = ctx->page;
- GC_ASSERT(sweep_page->size_pool == size_pool);
int i;
@@ -5603,27 +5619,31 @@ gc_sweep_finish_size_pool(rb_objspace_t *objspace, rb_size_pool_t *size_pool)
size_t min_free_slots = (size_t)(total_slots * gc_params.heap_free_slots_min_ratio);
if (swept_slots < min_free_slots) {
- if (is_full_marking(objspace)) {
- size_t extend_page_count = heap_extend_pages(objspace, swept_slots, total_slots, total_pages);
+ bool grow_heap = is_full_marking(objspace);
- if (extend_page_count > size_pool->allocatable_pages) {
- size_pool_allocatable_pages_set(objspace, size_pool, extend_page_count);
- }
-
- heap_increment(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool));
- }
- else {
+ if (!is_full_marking(objspace)) {
/* The heap is a growth heap if it freed more slots than had empty slots. */
bool is_growth_heap = size_pool->empty_slots == 0 ||
size_pool->freed_slots > size_pool->empty_slots;
- /* Only growth heaps are allowed to start a major GC. */
- if (is_growth_heap &&
- objspace->profile.count - objspace->rgengc.last_major_gc >= RVALUE_OLD_AGE) {
+ if (objspace->profile.count - objspace->rgengc.last_major_gc < RVALUE_OLD_AGE) {
+ grow_heap = TRUE;
+ }
+ else if (is_growth_heap) { /* Only growth heaps are allowed to start a major GC. */
objspace->rgengc.need_major_gc |= GPR_FLAG_MAJOR_BY_NOFREE;
size_pool->force_major_gc_count++;
}
}
+
+ if (grow_heap) {
+ size_t extend_page_count = heap_extend_pages(objspace, swept_slots, total_slots, total_pages);
+
+ if (extend_page_count > size_pool->allocatable_pages) {
+ size_pool_allocatable_pages_set(objspace, size_pool, extend_page_count);
+ }
+
+ heap_increment(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool));
+ }
}
}
#endif
@@ -5660,6 +5680,7 @@ gc_sweep_finish(rb_objspace_t *objspace)
else {
eden_heap->free_pages = eden_heap->pooled_pages;
}
+ eden_heap->pooled_pages = NULL;
objspace->rincgc.pooled_slots = 0;
}
#endif
@@ -5701,8 +5722,6 @@ gc_sweep_step(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *hea
#endif
do {
- GC_ASSERT(sweep_page->size_pool == size_pool);
-
RUBY_DEBUG_LOG("sweep_page:%p", (void *)sweep_page);
struct gc_sweep_context ctx = {
@@ -5831,7 +5850,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
bool from_freelist = FL_TEST_RAW(forwarding_object, FL_FROM_FREELIST);
object = rb_gc_location(forwarding_object);
- gc_move(objspace, object, forwarding_object, page->size_pool->slot_size);
+ gc_move(objspace, object, forwarding_object, page->slot_size);
/* forwarding_object is now our actual object, and "object"
* is the free slot for the original page */
struct heap_page *orig_page = GET_HEAP_PAGE(object);
@@ -7654,7 +7673,7 @@ gc_verify_heap_page(rb_objspace_t *objspace, struct heap_page *page, VALUE obj)
int remembered_old_objects = 0;
int free_objects = 0;
int zombie_objects = 0;
- int stride = page->size_pool->slot_size / sizeof(RVALUE);
+ int stride = page->slot_size / sizeof(RVALUE);
for (i=0; i<page->total_slots; i+=stride) {
VALUE val = (VALUE)&page->start[i];
@@ -7776,7 +7795,7 @@ gc_verify_internal_consistency_(rb_objspace_t *objspace)
/* check relations */
for (size_t i = 0; i < heap_allocated_pages; i++) {
struct heap_page *page = heap_pages_sorted[i];
- short slot_size = page->size_pool->slot_size;
+ short slot_size = page->slot_size;
uintptr_t start = (uintptr_t)page->start;
uintptr_t end = start + page->total_slots * slot_size;
@@ -10019,7 +10038,19 @@ gc_update_object_references(rb_objspace_t *objspace, VALUE obj)
case T_STRING:
if (STR_SHARED_P(obj)) {
+#if USE_RVARGC
+ VALUE orig_shared = any->as.string.as.heap.aux.shared;
+#endif
UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared);
+#if USE_RVARGC
+ VALUE shared = any->as.string.as.heap.aux.shared;
+ if (STR_EMBED_P(shared)) {
+ size_t offset = (size_t)any->as.string.as.heap.ptr - (size_t)RSTRING(orig_shared)->as.embed.ary;
+ GC_ASSERT(any->as.string.as.heap.ptr >= RSTRING(orig_shared)->as.embed.ary);
+ GC_ASSERT(offset <= (size_t)RSTRING(shared)->as.embed.len);
+ any->as.string.as.heap.ptr = RSTRING(shared)->as.embed.ary + offset;
+ }
+#endif
}
break;
@@ -13561,6 +13592,8 @@ Init_GC(void)
rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_SIZE")), SIZET2NUM(HEAP_PAGE_BITMAP_SIZE));
rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_BITMAP_PLANES")), SIZET2NUM(HEAP_PAGE_BITMAP_PLANES));
rb_hash_aset(gc_constants, ID2SYM(rb_intern("HEAP_PAGE_SIZE")), SIZET2NUM(HEAP_PAGE_SIZE));
+ rb_hash_aset(gc_constants, ID2SYM(rb_intern("SIZE_POOL_COUNT")), LONG2FIX(SIZE_POOL_COUNT));
+ rb_hash_aset(gc_constants, ID2SYM(rb_intern("RVARGC_MAX_ALLOCATE_SIZE")), LONG2FIX(size_pool_slot_size(SIZE_POOL_COUNT - 1)));
OBJ_FREEZE(gc_constants);
/* internal constants */
rb_define_const(rb_mGC, "INTERNAL_CONSTANTS", gc_constants);
diff --git a/gc.rb b/gc.rb
index c0459b0beb..e80d6635a7 100644
--- a/gc.rb
+++ b/gc.rb
@@ -256,6 +256,16 @@ module GC
def self.verify_compaction_references(toward: nil, double_heap: false)
Primitive.gc_verify_compaction_references(double_heap, toward == :empty)
end
+
+ # :nodoc:
+ # call-seq:
+ # GC.using_rvargc? -> true or false
+ #
+ # Returns true if using experimental feature Variable Width Allocation, false
+ # otherwise.
+ def self.using_rvargc?
+ GC::INTERNAL_CONSTANTS[:SIZE_POOL_COUNT] > 1
+ end
end
module ObjectSpace
diff --git a/include/ruby/internal/config.h b/include/ruby/internal/config.h
index b6134c6165..51f863fc29 100644
--- a/include/ruby/internal/config.h
+++ b/include/ruby/internal/config.h
@@ -146,4 +146,8 @@
# undef RBIMPL_TEST3
#endif /* HAVE_VA_ARGS_MACRO */
+#ifndef USE_RVARGC
+# define USE_RVARGC 0
+#endif
+
#endif /* RBIMPL_CONFIG_H */
diff --git a/include/ruby/internal/core/rstring.h b/include/ruby/internal/core/rstring.h
index f7d0539a0c..b7cf142156 100644
--- a/include/ruby/internal/core/rstring.h
+++ b/include/ruby/internal/core/rstring.h
@@ -42,9 +42,11 @@
/** @cond INTERNAL_MACRO */
#define RSTRING_NOEMBED RSTRING_NOEMBED
+#if !USE_RVARGC
#define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK
#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT
#define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX
+#endif
#define RSTRING_FSTR RSTRING_FSTR
#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
#define RSTRING_LEN RSTRING_LEN
@@ -160,6 +162,7 @@ enum ruby_rstring_flags {
*/
RSTRING_NOEMBED = RUBY_FL_USER1,
+#if !USE_RVARGC
/**
* When a string employs embedded strategy (see ::RSTRING_NOEMBED), these
* bits are used to store the number of bytes actually filled into
@@ -172,6 +175,7 @@ enum ruby_rstring_flags {
*/
RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 |
RUBY_FL_USER5 | RUBY_FL_USER6,
+#endif
/* Actually, string encodings are also encoded into the flags, using
* remaining bits.*/
@@ -198,6 +202,7 @@ enum ruby_rstring_flags {
RSTRING_FSTR = RUBY_FL_USER17
};
+#if !USE_RVARGC
/**
* This is an enum because GDB wants it (rather than a macro). People need not
* bother.
@@ -209,6 +214,7 @@ enum ruby_rstring_consts {
/** Max possible number of characters that can be embedded. */
RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1
};
+#endif
/**
* Ruby's String. A string in ruby conceptually has these information:
@@ -278,7 +284,17 @@ struct RString {
* here. Could be sufficiently large. In this case the length is
* encoded into the flags.
*/
+#if USE_RVARGC
+ short len;
+ /* This is a length 1 array because:
+ * 1. GCC has a bug that does not optimize C flexible array members
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
+ * 2. Zero length arrays are not supported by all compilers
+ */
+ char ary[1];
+#else
char ary[RSTRING_EMBED_LEN_MAX + 1];
+#endif
} embed;
} as;
};
@@ -407,9 +423,13 @@ RSTRING_EMBED_LEN(VALUE str)
RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED));
+#if USE_RVARGC
+ short f = RSTRING(str)->as.embed.len;
+#else
VALUE f = RBASIC(str)->flags;
f &= RSTRING_EMBED_LEN_MASK;
f >>= RSTRING_EMBED_LEN_SHIFT;
+#endif
return RBIMPL_CAST((long)f);
}
diff --git a/internal/gc.h b/internal/gc.h
index 233af1ce2c..49b12db2df 100644
--- a/internal/gc.h
+++ b/internal/gc.h
@@ -18,10 +18,6 @@
struct rb_execution_context_struct; /* in vm_core.h */
struct rb_objspace; /* in vm_core.h */
-#ifndef USE_RVARGC
-#define USE_RVARGC 0
-#endif
-
#ifdef NEWOBJ_OF
# undef NEWOBJ_OF
# undef RB_NEWOBJ_OF
@@ -30,22 +26,21 @@ struct rb_objspace; /* in vm_core.h */
#define RVALUE_SIZE (sizeof(struct RBasic) + sizeof(VALUE[RBIMPL_RVALUE_EMBED_LEN_MAX]))
-/* optimized version of NEWOBJ() */
-#define RB_NEWOBJ_OF(var, T, c, f) \
- T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
- rb_wb_protected_newobj_of((c), (f) & ~FL_WB_PROTECTED, RVALUE_SIZE) : \
- rb_wb_unprotected_newobj_of((c), (f), RVALUE_SIZE))
-
-#define RB_EC_NEWOBJ_OF(ec, var, T, c, f) \
- T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
- rb_ec_wb_protected_newobj_of((ec), (c), (f) & ~FL_WB_PROTECTED, RVALUE_SIZE) : \
- rb_wb_unprotected_newobj_of((c), (f), RVALUE_SIZE))
-
#define RB_RVARGC_NEWOBJ_OF(var, T, c, f, s) \
T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
rb_wb_protected_newobj_of((c), (f) & ~FL_WB_PROTECTED, s) : \
rb_wb_unprotected_newobj_of((c), (f), s))
+#define RB_RVARGC_EC_NEWOBJ_OF(ec, var, T, c, f, s) \
+ T *(var) = (T *)(((f) & FL_WB_PROTECTED) ? \
+ rb_ec_wb_protected_newobj_of((ec), (c), (f) & ~FL_WB_PROTECTED, s) : \
+ rb_wb_unprotected_newobj_of((c), (f), s))
+
+/* optimized version of NEWOBJ() */
+#define RB_NEWOBJ_OF(var, T, c, f) RB_RVARGC_NEWOBJ_OF(var, T, c, f, RVALUE_SIZE)
+
+#define RB_EC_NEWOBJ_OF(ec, var, T, c, f) RB_RVARGC_EC_NEWOBJ_OF(ec, var, T, c, f, RVALUE_SIZE)
+
#define NEWOBJ_OF(var, T, c, f) RB_NEWOBJ_OF((var), T, (c), (f))
#define RVARGC_NEWOBJ_OF(var, T, c, f, s) RB_RVARGC_NEWOBJ_OF((var), T, (c), (f), (s))
#define RB_OBJ_GC_FLAGS_MAX 6 /* used in ext/objspace */
@@ -102,6 +97,8 @@ static inline void *ruby_sized_xrealloc2_inlined(void *ptr, size_t new_count, si
static inline void ruby_sized_xfree_inlined(void *ptr, size_t size);
VALUE rb_class_allocate_instance(VALUE klass);
void rb_gc_ractor_newobj_cache_clear(rb_ractor_newobj_cache_t *newobj_cache);
+size_t rb_gc_obj_slot_size(VALUE obj);
+bool rb_gc_size_allocatable_p(size_t size);
RUBY_SYMBOL_EXPORT_BEGIN
/* gc.c (export) */
diff --git a/misc/lldb_cruby.py b/misc/lldb_cruby.py
index c046e7fbb1..b6ac231cee 100755
--- a/misc/lldb_cruby.py
+++ b/misc/lldb_cruby.py
@@ -190,6 +190,8 @@ def string2cstr(rstring):
cptr = int(rstring.GetValueForExpressionPath(".as.heap.ptr").value, 0)
clen = int(rstring.GetValueForExpressionPath(".as.heap.len").value, 0)
else:
+ # cptr = int(rstring.GetValueForExpressionPath(".as.embed.ary").location, 0)
+ # clen = int(rstring.GetValueForExpressionPath(".as.embed.len").value, 0)
cptr = int(rstring.GetValueForExpressionPath(".as.ary").location, 0)
clen = (flags & RSTRING_EMBED_LEN_MASK) >> RSTRING_EMBED_LEN_SHIFT
return cptr, clen
@@ -315,7 +317,6 @@ def lldb_inspect(debugger, target, result, val):
else:
len = val.GetValueForExpressionPath("->as.heap.len").GetValueAsSigned()
ptr = val.GetValueForExpressionPath("->as.heap.ptr")
- #print(val.GetValueForExpressionPath("->as.heap"), file=result)
result.write("T_ARRAY: %slen=%d" % (flaginfo, len))
if flags & RUBY_FL_USER1:
result.write(" (embed)")
diff --git a/ruby.c b/ruby.c
index ce0a96abd8..0c64d4e1ce 100644
--- a/ruby.c
+++ b/ruby.c
@@ -566,7 +566,12 @@ static VALUE
runtime_libruby_path(void)
{
#if defined _WIN32 || defined __CYGWIN__
- DWORD len = RSTRING_EMBED_LEN_MAX, ret;
+ DWORD len, ret;
+#if USE_RVARGC
+ len = 32;
+#else
+ len = RSTRING_EMBED_LEN_MAX;
+#endif
VALUE path;
VALUE wsopath = rb_str_new(0, len*sizeof(WCHAR));
WCHAR *wlibpath;
diff --git a/spec/ruby/optional/capi/string_spec.rb b/spec/ruby/optional/capi/string_spec.rb
index 5575ade07b..3cd88a7390 100644
--- a/spec/ruby/optional/capi/string_spec.rb
+++ b/spec/ruby/optional/capi/string_spec.rb
@@ -108,7 +108,7 @@ describe "C-API String function" do
it "returns a string with the given capacity" do
buf = @s.rb_str_buf_new(256, nil)
- @s.rb_str_capacity(buf).should == 256
+ @s.rb_str_capacity(buf).should >= 256
end
it "returns a string that can be appended to" do
@@ -682,27 +682,27 @@ describe "C-API String function" do
describe "rb_str_modify_expand" do
it "grows the capacity to bytesize + expand, not changing the bytesize" do
str = @s.rb_str_buf_new(256, "abcd")
- @s.rb_str_capacity(str).should == 256
+ @s.rb_str_capacity(str).should >= 256
@s.rb_str_set_len(str, 3)
str.bytesize.should == 3
@s.RSTRING_LEN(str).should == 3
- @s.rb_str_capacity(str).should == 256
+ @s.rb_str_capacity(str).should >= 256
@s.rb_str_modify_expand(str, 4)
str.bytesize.should == 3
@s.RSTRING_LEN(str).should == 3
- @s.rb_str_capacity(str).should == 7
+ @s.rb_str_capacity(str).should >= 7
@s.rb_str_modify_expand(str, 1024)
str.bytesize.should == 3
@s.RSTRING_LEN(str).should == 3
- @s.rb_str_capacity(str).should == 1027
+ @s.rb_str_capacity(str).should >= 1027
@s.rb_str_modify_expand(str, 1)
str.bytesize.should == 3
@s.RSTRING_LEN(str).should == 3
- @s.rb_str_capacity(str).should == 4
+ @s.rb_str_capacity(str).should >= 4
end
it "raises an error if the string is frozen" do
diff --git a/string.c b/string.c
index 48c92072d8..b815b12c54 100644
--- a/string.c
+++ b/string.c
@@ -106,14 +106,26 @@ VALUE rb_cSymbol;
#define STR_SET_NOEMBED(str) do {\
FL_SET((str), STR_NOEMBED);\
- STR_SET_EMBED_LEN((str), 0);\
+ if (USE_RVARGC) {\
+ FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
+ }\
+ else {\
+ STR_SET_EMBED_LEN((str), 0);\
+ }\
} while (0)
#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
-#define STR_SET_EMBED_LEN(str, n) do { \
+#if USE_RVARGC
+# define STR_SET_EMBED_LEN(str, n) do { \
+ assert(str_embed_capa(str) > (n));\
+ RSTRING(str)->as.embed.len = (n);\
+} while (0)
+#else
+# define STR_SET_EMBED_LEN(str, n) do { \
long tmp_n = (n);\
RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
} while (0)
+#endif
#define STR_SET_LEN(str, n) do { \
if (STR_EMBED_P(str)) {\
@@ -150,7 +162,7 @@ VALUE rb_cSymbol;
} while (0)
#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
if (STR_EMBED_P(str)) {\
- if (!STR_EMBEDDABLE_P(capacity, termlen)) {\
+ if (str_embed_capa(str) < capacity + termlen) {\
char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
const long tlen = RSTRING_LEN(str);\
memcpy(tmp, RSTRING_PTR(str), tlen);\
@@ -170,6 +182,8 @@ VALUE rb_cSymbol;
#define STR_SET_SHARED(str, shared_str) do { \
if (!FL_TEST(str, STR_FAKESTR)) { \
+ assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
+ assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
FL_SET((str), STR_SHARED); \
FL_SET((shared_str), STR_SHARED_ROOT); \
@@ -193,8 +207,32 @@ VALUE rb_cSymbol;
#define SHARABLE_SUBSTRING_P(beg, len, end) 1
#endif
-#define STR_EMBEDDABLE_P(len, termlen) \
- ((len) <= RSTRING_EMBED_LEN_MAX + 1 - (termlen))
+
+static inline long
+str_embed_capa(VALUE str)
+{
+#if USE_RVARGC
+ return rb_gc_obj_slot_size(str) - offsetof(struct RString, as.embed.ary);
+#else
+ return RSTRING_EMBED_LEN_MAX + 1;
+#endif
+}
+
+static inline size_t
+str_embed_size(long capa)
+{
+ return offsetof(struct RString, as.embed.ary) + capa;
+}
+
+static inline bool
+STR_EMBEDDABLE_P(long len, long termlen)
+{
+#if USE_RVARGC
+ return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
+#else
+ return len <= RSTRING_EMBED_LEN_MAX + 1 - termlen;
+#endif
+}
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str);
static VALUE str_new_frozen(VALUE klass, VALUE orig);
@@ -768,7 +806,11 @@ static size_t
str_capacity(VALUE str, const int termlen)
{
if (STR_EMBED_P(str)) {
+#if USE_RVARGC
+ return str_embed_capa(str) - termlen;
+#else
return (RSTRING_EMBED_LEN_MAX + 1 - termlen);
+#endif
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
return RSTRING(str)->as.heap.len;
@@ -793,17 +835,36 @@ must_not_null(const char *ptr)
}
static inline VALUE
-str_alloc(VALUE klass)
+str_alloc(VALUE klass, size_t size)
{
- NEWOBJ_OF(str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0));
+ assert(size > 0);
+ RVARGC_NEWOBJ_OF(str, struct RString, klass,
+ T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size);
return (VALUE)str;
}
static inline VALUE
+str_alloc_embed(VALUE klass, size_t capa)
+{
+ size_t size = str_embed_size(capa);
+ assert(rb_gc_size_allocatable_p(size));
+#if !USE_RVARGC
+ assert(size <= sizeof(struct RString));
+#endif
+ return str_alloc(klass, size);
+}
+
+static inline VALUE
+str_alloc_heap(VALUE klass)
+{
+ return str_alloc(klass, sizeof(struct RString));
+}
+
+static inline VALUE
empty_str_alloc(VALUE klass)
{
RUBY_DTRACE_CREATE_HOOK(STRING, 0);
- return str_alloc(klass);
+ return str_alloc_embed(klass, 0);
}
static VALUE
@@ -817,8 +878,14 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
RUBY_DTRACE_CREATE_HOOK(STRING, len);
- str = str_alloc(klass);
- if (!STR_EMBEDDABLE_P(len, termlen)) {
+ if (STR_EMBEDDABLE_P(len, termlen)) {
+ str = str_alloc_embed(klass, len + termlen);
+ if (len == 0) {
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
+ }
+ }
+ else {
+ str = str_alloc_heap(klass);
RSTRING(str)->as.heap.aux.capa = len;
/* :FIXME: @shyouhei guesses `len + termlen` is guaranteed to never
* integer overflow. If we can STATIC_ASSERT that, the following
@@ -827,9 +894,6 @@ str_new0(VALUE klass, const char *ptr, long len, int termlen)
rb_xmalloc_mul_add_mul(sizeof(char), len, sizeof(char), termlen);
STR_SET_NOEMBED(str);
}
- else if (len == 0) {
- ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
- }
if (ptr) {
memcpy(RSTRING_PTR(str), ptr, len);
}
@@ -931,7 +995,7 @@ str_new_static(VALUE klass, const char *ptr, long len, int encindex)
}
else {
RUBY_DTRACE_CREATE_HOOK(STRING, len);
- str = str_alloc(klass);
+ str = str_alloc_heap(klass);
RSTRING(str)->as.heap.len = len;
RSTRING(str)->as.heap.ptr = (char *)ptr;
RSTRING(str)->as.heap.aux.capa = len;
@@ -1228,8 +1292,8 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
long len;
RSTRING_GETMEM(str, ptr, len);
- if (STR_EMBEDDABLE_P(len, termlen)) {
- char *ptr2 = RSTRING(str2)->as.embed.ary;
+ if (str_embed_capa(str2) >= len + termlen) {
+ char *ptr2 = RSTRING(str2)->as.embed.ary;
STR_SET_EMBED(str2);
memcpy(ptr2, RSTRING_PTR(str), len);
STR_SET_EMBED_LEN(str2, len);
@@ -1245,6 +1309,7 @@ str_replace_shared_without_enc(VALUE str2, VALUE str)
root = rb_str_new_frozen(str);
RSTRING_GETMEM(root, ptr, len);
}
+ assert(OBJ_FROZEN(root));
if (!STR_EMBED_P(str2) && !FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
if (FL_TEST_RAW(str2, STR_SHARED_ROOT)) {
rb_fatal("about to free a possible shared root");
@@ -1273,7 +1338,7 @@ str_replace_shared(VALUE str2, VALUE str)
static VALUE
str_new_shared(VALUE klass, VALUE str)
{
- return str_replace_shared(str_alloc(klass), str);
+ return str_replace_shared(str_alloc_heap(klass), str);
}
VALUE
@@ -1336,25 +1401,53 @@ str_new_frozen(VALUE klass, VALUE orig)
}
static VALUE
+heap_str_make_shared(VALUE klass, VALUE orig)
+{
+ assert(!STR_EMBED_P(orig));
+ assert(!STR_SHARED_P(orig));
+
+ VALUE str = str_alloc_heap(klass);
+ STR_SET_NOEMBED(str);
+ RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
+ RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
+ RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
+ RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
+ RBASIC(orig)->flags &= ~STR_NOFREE;
+ STR_SET_SHARED(orig, str);
+ if (klass == 0)
+ FL_UNSET_RAW(str, STR_BORROWED);
+ return str;
+}
+
+static VALUE
str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
{
VALUE str;
- if (STR_EMBED_P(orig)) {
- str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
+ long len = RSTRING_LEN(orig);
+
+ if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(len, 1)) {
+ str = str_new(klass, RSTRING_PTR(orig), len);
+ assert(STR_EMBED_P(str));
}
else {
if (FL_TEST_RAW(orig, STR_SHARED)) {
VALUE shared = RSTRING(orig)->as.heap.aux.shared;
- long ofs = RSTRING(orig)->as.heap.ptr - RSTRING(shared)->as.heap.ptr;
- long rest = RSTRING(shared)->as.heap.len - ofs - RSTRING(orig)->as.heap.len;
+ long ofs = RSTRING(orig)->as.heap.ptr - RSTRING_PTR(shared);
+ long rest = RSTRING_LEN(shared) - ofs - RSTRING(orig)->as.heap.len;
+ assert(ofs >= 0);
+ assert(rest >= 0);
+ assert(ofs + rest <= RSTRING_LEN(shared));
+#if !USE_RVARGC
assert(!STR_EMBED_P(shared));
+#endif
assert(OBJ_FROZEN(shared));
if ((ofs > 0) || (rest > 0) ||
(klass != RBASIC(shared)->klass) ||
ENCODING_GET(shared) != ENCODING_GET(orig)) {
str = str_new_shared(klass, shared);
+ assert(!STR_EMBED_P(str));
RSTRING(str)->as.heap.ptr += ofs;
RSTRING(str)->as.heap.len -= ofs + rest;
}
@@ -1364,24 +1457,15 @@ str_new_frozen_buffer(VALUE klass, VALUE orig, int copy_encoding)
return shared;
}
}
- else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
- str = str_alloc(klass);
+ else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
+ str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
STR_SET_EMBED_LEN(str, RSTRING_LEN(orig));
TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
}
else {
- str = str_alloc(klass);
- STR_SET_NOEMBED(str);
- RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
- RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
- RSTRING(str)->as.heap.aux.capa = RSTRING(orig)->as.heap.aux.capa;
- RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
- RBASIC(orig)->flags &= ~STR_NOFREE;
- STR_SET_SHARED(orig, str);
- if (klass == 0)
- FL_UNSET_RAW(str, STR_BORROWED);
+ str = heap_str_make_shared(klass, orig);
}
}
@@ -1405,17 +1489,24 @@ str_new_empty_String(VALUE str)
}
#define STR_BUF_MIN_SIZE 63
+#if !USE_RVARGC
STATIC_ASSERT(STR_BUF_MIN_SIZE, STR_BUF_MIN_SIZE > RSTRING_EMBED_LEN_MAX);
+#endif
VALUE
rb_str_buf_new(long capa)
{
- VALUE str = str_alloc(rb_cString);
+ if (STR_EMBEDDABLE_P(capa, 1)) {
+ return str_alloc_embed(rb_cString, capa + 1);
+ }
+
+ VALUE str = str_alloc_heap(rb_cString);
- if (capa <= RSTRING_EMBED_LEN_MAX) return str;
+#if !USE_RVARGC
if (capa < STR_BUF_MIN_SIZE) {
capa = STR_BUF_MIN_SIZE;
}
+#endif
FL_SET(str, STR_NOEMBED);
RSTRING(str)->as.heap.aux.capa = capa;
RSTRING(str)->as.heap.ptr = ALLOC_N(char, (size_t)capa + 1);
@@ -1508,7 +1599,7 @@ str_shared_replace(VALUE str, VALUE str2)
str_discard(str);
termlen = rb_enc_mbminlen(enc);
- if (STR_EMBEDDABLE_P(RSTRING_LEN(str2), termlen)) {
+ if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
STR_SET_EMBED(str);
memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (size_t)RSTRING_LEN(str2) + termlen);
STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
@@ -1516,6 +1607,21 @@ str_shared_replace(VALUE str, VALUE str2)
ENC_CODERANGE_SET(str, cr);
}
else {
+#if USE_RVARGC
+ if (STR_EMBED_P(str2)) {
+ assert(!FL_TEST(str2, STR_SHARED));
+ long len = RSTRING(str2)->as.embed.len;
+ assert(len + termlen <= str_embed_capa(str2));
+
+ char *new_ptr = ALLOC_N(char, len + termlen);
+ memcpy(new_ptr, RSTRING(str2)->as.embed.ary, len + termlen);
+ RSTRING(str2)->as.heap.ptr = new_ptr;
+ RSTRING(str2)->as.heap.len = len;
+ RSTRING(str2)->as.heap.aux.capa = len;
+ STR_SET_NOEMBED(str2);
+ }
+#endif
+
STR_SET_NOEMBED(str);
FL_UNSET(str, STR_SHARED);
RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
@@ -1581,42 +1687,77 @@ str_replace(VALUE str, VALUE str2)
}
static inline VALUE
-ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass)
+ec_str_alloc(struct rb_execution_context_struct *ec, VALUE klass, size_t size)
{
- RB_EC_NEWOBJ_OF(ec, str, struct RString, klass, T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0));
+ assert(size > 0);
+ RB_RVARGC_EC_NEWOBJ_OF(ec, str, struct RString, klass,
+ T_STRING | (RGENGC_WB_PROTECTED_STRING ? FL_WB_PROTECTED : 0), size);
return (VALUE)str;
}
static inline VALUE
+ec_str_alloc_embed(struct rb_execution_context_struct *ec, VALUE klass, size_t capa)
+{
+ size_t size = str_embed_size(capa);
+ assert(rb_gc_size_allocatable_p(size));
+#if !USE_RVARGC
+ assert(size <= sizeof(struct RString));
+#endif
+ return ec_str_alloc(ec, klass, size);
+}
+
+static inline VALUE
+ec_str_alloc_heap(struct rb_execution_context_struct *ec, VALUE klass)
+{
+ return ec_str_alloc(ec, klass, sizeof(struct RString));
+}
+
+static inline VALUE
str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
{
- enum {embed_size = RSTRING_EMBED_LEN_MAX + 1};
const VALUE flag_mask =
+#if !USE_RVARGC
RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK |
- ENC_CODERANGE_MASK | ENCODING_MASK |
+#endif
+ ENC_CODERANGE_MASK | ENCODING_MASK |
FL_FREEZE
;
VALUE flags = FL_TEST_RAW(str, flag_mask);
int encidx = 0;
- MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
- char, embed_size);
- if (flags & STR_NOEMBED) {
+ if (STR_EMBED_P(str)) {
+ assert(str_embed_capa(dup) >= RSTRING_EMBED_LEN(str));
+ STR_SET_EMBED_LEN(dup, RSTRING_EMBED_LEN(str));
+ MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
+ char, RSTRING_EMBED_LEN(str));
+ flags &= ~RSTRING_NOEMBED;
+ }
+ else {
+ VALUE root = str;
if (FL_TEST_RAW(str, STR_SHARED)) {
- str = RSTRING(str)->as.heap.aux.shared;
+ root = RSTRING(str)->as.heap.aux.shared;
}
else if (UNLIKELY(!(flags & FL_FREEZE))) {
- str = str_new_frozen(klass, str);
+ root = str = str_new_frozen(klass, str);
flags = FL_TEST_RAW(str, flag_mask);
- }
- if (flags & STR_NOEMBED) {
- RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, str);
- flags |= STR_SHARED;
- }
- else {
- MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary,
- char, embed_size);
- }
+ }
+ assert(!STR_SHARED_P(root));
+ assert(RB_OBJ_FROZEN_RAW(root));
+#if USE_RVARGC
+ if (1) {
+#else
+ if (STR_EMBED_P(root)) {
+ MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(root)->as.embed.ary,
+ char, RSTRING_EMBED_LEN_MAX + 1);
+ }
+ else {
+#endif
+ RSTRING(dup)->as.heap.len = RSTRING_LEN(str);
+ RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
+ RB_OBJ_WRITE(dup, &RSTRING(dup)->as.heap.aux.shared, root);
+ flags |= RSTRING_NOEMBED | STR_SHARED;
+ }
}
+
if ((flags & ENCODING_MASK) == (ENCODING_INLINE_MAX<<ENCODING_SHIFT)) {
encidx = rb_enc_get_index(str);
flags &= ~ENCODING_MASK;
@@ -1629,14 +1770,28 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
static inline VALUE
ec_str_duplicate(struct rb_execution_context_struct *ec, VALUE klass, VALUE str)
{
- VALUE dup = ec_str_alloc(ec, klass);
+ VALUE dup;
+ if (FL_TEST(str, STR_NOEMBED)) {
+ dup = ec_str_alloc_heap(ec, klass);
+ }
+ else {
+ dup = ec_str_alloc_embed(ec, klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ }
+
return str_duplicate_setup(klass, str, dup);
}
static inline VALUE
str_duplicate(VALUE klass, VALUE str)
{
- VALUE dup = str_alloc(klass);
+ VALUE dup;
+ if (FL_TEST(str, STR_NOEMBED)) {
+ dup = str_alloc_heap(klass);
+ }
+ else {
+ dup = str_alloc_embed(klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ }
+
return str_duplicate_setup(klass, str, dup);
}
@@ -1745,7 +1900,12 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
str_modifiable(str);
if (STR_EMBED_P(str)) { /* make noembed always */
char *new_ptr = ALLOC_N(char, (size_t)capa + termlen);
+#if USE_RVARGC
+ assert(RSTRING(str)->as.embed.len + 1 <= str_embed_capa(str));
+ memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING(str)->as.embed.len + 1);
+#else
memcpy(new_ptr, RSTRING(str)->as.embed.ary, RSTRING_EMBED_LEN_MAX + 1);
+#endif
RSTRING(str)->as.heap.ptr = new_ptr;
}
else if (FL_TEST(str, STR_SHARED|STR_NOFREE)) {
@@ -2133,7 +2293,7 @@ rb_str_times(VALUE str, VALUE times)
return str_duplicate(rb_cString, str);
}
if (times == INT2FIX(0)) {
- str2 = str_alloc(rb_cString);
+ str2 = str_alloc_embed(rb_cString, 0);
rb_enc_copy(str2, str);
return str2;
}
@@ -2142,8 +2302,11 @@ rb_str_times(VALUE str, VALUE times)
rb_raise(rb_eArgError, "negative argument");
}
if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
- str2 = str_alloc(rb_cString);
- if (!STR_EMBEDDABLE_P(len, 1)) {
+ if (STR_EMBEDDABLE_P(len, 1)) {
+ str2 = str_alloc_embed(rb_cString, len + 1);
+ }
+ else {
+ str2 = str_alloc_heap(rb_cString);
RSTRING(str2)->as.heap.aux.capa = len;
RSTRING(str2)->as.heap.ptr = ZALLOC_N(char, (size_t)len + 1);
STR_SET_NOEMBED(str2);
@@ -2244,11 +2407,11 @@ str_make_independent_expand(VALUE str, long len, long expand, const int termlen)
if (len > capa) len = capa;
- if (!STR_EMBED_P(str) && STR_EMBEDDABLE_P(capa, termlen)) {
+ if (!STR_EMBED_P(str) && str_embed_capa(str) >= capa + termlen) {
ptr = RSTRING(str)->as.heap.ptr;
STR_SET_EMBED(str);
- memcpy(RSTRING(str)->as.embed.ary, ptr, len);
- TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
+ memcpy(RSTRING(str)->as.embed.ary, ptr, len);
+ TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
STR_SET_EMBED_LEN(str, len);
return;
}
@@ -2646,7 +2809,7 @@ rb_str_subseq(VALUE str, long beg, long len)
}
else {
str2 = rb_str_new(RSTRING_PTR(str)+beg, len);
- RB_GC_GUARD(str);
+ RB_GC_GUARD(str);
}
rb_enc_cr_str_copy_for_substr(str2, str);
@@ -2885,19 +3048,19 @@ rb_str_resize(VALUE str, long len)
const int termlen = TERM_LEN(str);
if (STR_EMBED_P(str)) {
if (len == slen) return str;
- if (STR_EMBEDDABLE_P(len, termlen)) {
+ if (str_embed_capa(str) >= len + termlen) {
STR_SET_EMBED_LEN(str, len);
- TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
+ TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
return str;
}
str_make_independent_expand(str, slen, len - slen, termlen);
}
- else if (STR_EMBEDDABLE_P(len, termlen)) {
+ else if (str_embed_capa(str) >= len + termlen) {
char *ptr = STR_HEAP_PTR(str);
STR_SET_EMBED(str);
if (slen > len) slen = len;
- if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
- TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
+ if (slen > 0) MEMCPY(RSTRING(str)->as.embed.ary, ptr, char, slen);
+ TERM_FILL(RSTRING(str)->as.embed.ary + len, termlen);
STR_SET_EMBED_LEN(str, len);
if (independent) ruby_xfree(ptr);
return str;
@@ -2925,7 +3088,9 @@ str_buf_cat(VALUE str, const char *ptr, long len)
long capa, total, olen, off = -1;
char *sptr;
const int termlen = TERM_LEN(str);
+#if !USE_RVARGC
assert(termlen < RSTRING_EMBED_LEN_MAX + 1); /* < (LONG_MAX/2) */
+#endif
RSTRING_GETMEM(str, sptr, olen);
if (ptr >= sptr && ptr <= sptr + olen) {
@@ -2934,8 +3099,8 @@ str_buf_cat(VALUE str, const char *ptr, long len)
rb_str_modify(str);
if (len == 0) return 0;
if (STR_EMBED_P(str)) {
- capa = RSTRING_EMBED_LEN_MAX + 1 - termlen;
- sptr = RSTRING(str)->as.embed.ary;
+ capa = str_embed_capa(str) - termlen;
+ sptr = RSTRING(str)->as.embed.ary;
olen = RSTRING_EMBED_LEN(str);
}
else {
@@ -4797,17 +4962,21 @@ rb_str_drop_bytes(VALUE str, long len)
str_modifiable(str);
if (len > olen) len = olen;
nlen = olen - len;
- if (STR_EMBEDDABLE_P(nlen, TERM_LEN(str))) {
+ if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
char *oldptr = ptr;
int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
STR_SET_EMBED(str);
STR_SET_EMBED_LEN(str, nlen);
- ptr = RSTRING(str)->as.embed.ary;
+ ptr = RSTRING(str)->as.embed.ary;
memmove(ptr, oldptr + len, nlen);
if (fl == STR_NOEMBED) xfree(oldptr);
}
else {
- if (!STR_SHARED_P(str)) rb_str_new_frozen(str);
+ if (!STR_SHARED_P(str)) {
+ VALUE shared = heap_str_make_shared(rb_obj_class(str), str);
+ rb_enc_cr_str_exact_copy(shared, str);
+ OBJ_FREEZE(shared);
+ }
ptr = RSTRING(str)->as.heap.ptr += len;
RSTRING(str)->as.heap.len = nlen;
}
@@ -10465,7 +10634,13 @@ rb_str_force_encoding(VALUE str, VALUE enc)
static VALUE
rb_str_b(VALUE str)
{
- VALUE str2 = str_alloc(rb_cString);
+ VALUE str2;
+ if (FL_TEST(str, STR_NOEMBED)) {
+ str2 = str_alloc_heap(rb_cString);
+ }
+ else {
+ str2 = str_alloc_embed(rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
+ }
str_replace_shared_without_enc(str2, str);
ENC_CODERANGE_CLEAR(str2);
return str2;
diff --git a/test/-ext-/string/test_capacity.rb b/test/-ext-/string/test_capacity.rb
index df59e76778..583c98fca4 100644
--- a/test/-ext-/string/test_capacity.rb
+++ b/test/-ext-/string/test_capacity.rb
@@ -4,13 +4,10 @@ require '-test-/string'
require 'rbconfig/sizeof'
class Test_StringCapacity < Test::Unit::TestCase
- def capa(str)
- Bug::String.capacity(str)
- end
-
def test_capacity_embedded
- size = RbConfig::SIZEOF['void*'] * 3 - 1
- assert_equal size, capa('foo')
+ assert_equal GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] - embed_header_size - 1, capa('foo')
+ assert_equal max_embed_len, capa('1' * max_embed_len)
+ assert_equal max_embed_len, capa('1' * (max_embed_len - 1))
end
def test_capacity_shared
@@ -18,7 +15,8 @@ class Test_StringCapacity < Test::Unit::TestCase
end
def test_capacity_normal
- assert_equal 128, capa('1'*128)
+ assert_equal max_embed_len + 1, capa('1' * (max_embed_len + 1))
+ assert_equal max_embed_len + 100, capa('1' * (max_embed_len + 100))
end
def test_s_new_capacity
@@ -39,7 +37,10 @@ class Test_StringCapacity < Test::Unit::TestCase
end
def test_literal_capacity
- s = "I am testing string literal capacity"
+ s = eval(%{
+ # frozen_string_literal: true
+ "#{"a" * (max_embed_len + 1)}"
+ })
assert_equal(s.length, capa(s))
end
@@ -51,9 +52,27 @@ class Test_StringCapacity < Test::Unit::TestCase
end
def test_capacity_fstring
- s = String.new("I am testing", capacity: 1000)
+ s = String.new("a" * max_embed_len, capacity: 1000)
s << "fstring capacity"
s = -s
assert_equal(s.length, capa(s))
end
+
+ private
+
+ def capa(str)
+ Bug::String.capacity(str)
+ end
+
+ def embed_header_size
+ if GC.using_rvargc?
+ 2 * RbConfig::SIZEOF['void*'] + RbConfig::SIZEOF['short']
+ else
+ 2 * RbConfig::SIZEOF['void*']
+ end
+ end
+
+ def max_embed_len
+ GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE] - embed_header_size - 1
+ end
end
diff --git a/test/-ext-/string/test_rb_str_dup.rb b/test/-ext-/string/test_rb_str_dup.rb
index 49b6af9598..c76a90252f 100644
--- a/test/-ext-/string/test_rb_str_dup.rb
+++ b/test/-ext-/string/test_rb_str_dup.rb
@@ -3,13 +3,15 @@ require '-test-/string'
class Test_RbStrDup < Test::Unit::TestCase
def test_nested_shared_non_frozen
- str = Bug::String.rb_str_dup(Bug::String.rb_str_dup("a" * 50))
+ orig_str = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE]
+ str = Bug::String.rb_str_dup(Bug::String.rb_str_dup(orig_str))
assert_send([Bug::String, :shared_string?, str])
assert_not_send([Bug::String, :sharing_with_shared?, str], '[Bug #15792]')
end
def test_nested_shared_frozen
- str = Bug::String.rb_str_dup(Bug::String.rb_str_dup("a" * 50).freeze)
+ orig_str = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE]
+ str = Bug::String.rb_str_dup(Bug::String.rb_str_dup(orig_str).freeze)
assert_send([Bug::String, :shared_string?, str])
assert_not_send([Bug::String, :sharing_with_shared?, str], '[Bug #15792]')
end
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 8ed04f59c9..a1954d56a9 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -29,12 +29,12 @@ class TestObjSpace < Test::Unit::TestCase
end
def test_memsize_of_root_shared_string
- a = "hello" * 5
+ a = "a" * GC::INTERNAL_CONSTANTS[:RVARGC_MAX_ALLOCATE_SIZE]
b = a.dup
c = nil
ObjectSpace.each_object(String) {|x| break c = x if x == a and x.frozen?}
rv_size = GC::INTERNAL_CONSTANTS[:RVALUE_SIZE]
- assert_equal([rv_size, rv_size, 26 + rv_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)})
+ assert_equal([rv_size, rv_size, a.length + 1 + rv_size], [a, b, c].map {|x| ObjectSpace.memsize_of(x)})
end
def test_argf_memsize
diff --git a/transcode.c b/transcode.c
index 0681288346..d7011443f8 100644
--- a/transcode.c
+++ b/transcode.c
@@ -3769,7 +3769,11 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
rb_str_modify(output);
if (NIL_P(output_bytesize_v)) {
+#if USE_RVARGC
+ output_bytesize = rb_str_capacity(output);
+#else
output_bytesize = RSTRING_EMBED_LEN_MAX;
+#endif
if (!NIL_P(input) && output_bytesize < RSTRING_LEN(input))
output_bytesize = RSTRING_LEN(input);
}