summaryrefslogtreecommitdiff
path: root/shape.c
diff options
context:
space:
mode:
authorJemma Issroff <jemmaissroff@gmail.com>2022-12-08 17:16:52 -0500
committerAaron Patterson <aaron.patterson@gmail.com>2022-12-15 10:06:04 -0800
commitc1ab6ddc9a6fa228caa5d26b118b54855051279c (patch)
treea3361c22480e38d798dfa975bdabf47a832a9fb0 /shape.c
parenta3d552aedd190b0f21a4f6479f0ef1d2ce90189b (diff)
downloadruby-c1ab6ddc9a6fa228caa5d26b118b54855051279c.tar.gz
Transition complex objects to "too complex" shape
When an object becomes "too complex" (in other words it has too many variations in the shape tree), we transition it to use a "too complex" shape and use a hash for storing instance variables. Without this patch, there were rare cases where shape tree growth could "explode" and cause performance degradation on what would otherwise have been cached fast paths. This patch puts a limit on shape tree growth, and gracefully degrades in the rare case where there could be a factorial growth in the shape tree. For example: ```ruby class NG; end HUGE_NUMBER.times do NG.new.instance_variable_set(:"@unique_ivar_#{_1}", 1) end ``` We consider objects to be "too complex" when the object's class has more than SHAPE_MAX_VARIATIONS (currently 8) leaf nodes in the shape tree and the object introduces a new variation (a new leaf node) associated with that class. For example, new variations on instances of the following class would be considered "too complex" because those instances create more than 8 leaves in the shape tree: ```ruby class Foo; end 9.times { Foo.new.instance_variable_set(":@uniq_#{_1}", 1) } ``` However, the following class is *not* too complex because it only has one leaf in the shape tree: ```ruby class Foo def initialize @a = @b = @c = @d = @e = @f = @g = @h = @i = nil end end 9.times { Foo.new } `` This case is rare, so we don't expect this change to impact performance of most applications, but it needs to be handled. Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org>
Diffstat (limited to 'shape.c')
-rw-r--r--shape.c156
1 files changed, 110 insertions, 46 deletions
diff --git a/shape.c b/shape.c
index 60bb427b44..ac9a016c00 100644
--- a/shape.c
+++ b/shape.c
@@ -99,13 +99,13 @@ rb_shape_get_shape_id(VALUE obj)
#else
switch (BUILTIN_TYPE(obj)) {
case T_OBJECT:
- return ROBJECT_SHAPE_ID(obj);
- break;
+ return ROBJECT_SHAPE_ID(obj);
+ break;
case T_CLASS:
case T_MODULE:
- return RCLASS_SHAPE_ID(obj);
+ return RCLASS_SHAPE_ID(obj);
default:
- return rb_generic_shape_id(obj);
+ return rb_generic_shape_id(obj);
}
#endif
}
@@ -130,50 +130,57 @@ rb_shape_get_shape(VALUE obj)
}
static rb_shape_t*
-get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type, bool * variation_created)
+get_next_shape_internal(rb_shape_t * shape, ID id, enum shape_type shape_type, bool * variation_created, bool new_shapes_allowed)
{
rb_shape_t *res = NULL;
- RB_VM_LOCK_ENTER();
- {
- bool had_edges = !!shape->edges;
-
- *variation_created = false;
-
- if (!shape->edges) {
- shape->edges = rb_id_table_create(0);
- }
- // Lookup the shape in edges - if there's already an edge and a corresponding shape for it,
- // we can return that. Otherwise, we'll need to get a new shape
- if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) {
- *variation_created = had_edges;
+ // There should never be outgoing edges from "too complex"
+ RUBY_ASSERT(rb_shape_id(shape) != OBJ_TOO_COMPLEX_SHAPE_ID);
- rb_shape_t * new_shape = rb_shape_alloc(id, shape);
+ *variation_created = false;
- new_shape->type = (uint8_t)shape_type;
- new_shape->capacity = shape->capacity;
+ if (new_shapes_allowed) {
+ RB_VM_LOCK_ENTER();
+ {
+ bool had_edges = !!shape->edges;
- switch (shape_type) {
- case SHAPE_IVAR:
- new_shape->next_iv_index = shape->next_iv_index + 1;
- break;
- case SHAPE_CAPACITY_CHANGE:
- case SHAPE_FROZEN:
- case SHAPE_T_OBJECT:
- new_shape->next_iv_index = shape->next_iv_index;
- break;
- case SHAPE_INITIAL_CAPACITY:
- case SHAPE_ROOT:
- rb_bug("Unreachable");
- break;
+ if (!shape->edges) {
+ shape->edges = rb_id_table_create(0);
}
- rb_id_table_insert(shape->edges, id, (VALUE)new_shape);
+ // Lookup the shape in edges - if there's already an edge and a corresponding shape for it,
+ // we can return that. Otherwise, we'll need to get a new shape
+ if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) {
+ *variation_created = had_edges;
+
+ rb_shape_t * new_shape = rb_shape_alloc(id, shape);
+
+ new_shape->type = (uint8_t)shape_type;
+ new_shape->capacity = shape->capacity;
+
+ switch (shape_type) {
+ case SHAPE_IVAR:
+ new_shape->next_iv_index = shape->next_iv_index + 1;
+ break;
+ case SHAPE_CAPACITY_CHANGE:
+ case SHAPE_FROZEN:
+ case SHAPE_T_OBJECT:
+ new_shape->next_iv_index = shape->next_iv_index;
+ break;
+ case SHAPE_OBJ_TOO_COMPLEX:
+ case SHAPE_INITIAL_CAPACITY:
+ case SHAPE_ROOT:
+ rb_bug("Unreachable");
+ break;
+ }
+
+ rb_id_table_insert(shape->edges, id, (VALUE)new_shape);
- res = new_shape;
+ res = new_shape;
+ }
}
+ RB_VM_LOCK_LEAVE();
}
- RB_VM_LOCK_LEAVE();
return res;
}
@@ -192,6 +199,7 @@ move_iv(VALUE obj, ID id, attr_index_t from, attr_index_t to)
RCLASS_IVPTR(obj)[to] = RCLASS_IVPTR(obj)[from];
break;
case T_OBJECT:
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
ROBJECT_IVPTR(obj)[to] = ROBJECT_IVPTR(obj)[from];
break;
default: {
@@ -242,7 +250,7 @@ remove_shape_recursive(VALUE obj, ID id, rb_shape_t * shape, VALUE * removed)
// has the same attributes as this shape.
if (new_parent) {
bool dont_care;
- rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care);
+ rb_shape_t * new_child = get_next_shape_internal(new_parent, shape->edge_name, shape->type, &dont_care, true);
new_child->capacity = shape->capacity;
if (new_child->type == SHAPE_IVAR) {
move_iv(obj, id, shape->next_iv_index - 1, new_child->next_iv_index - 1);
@@ -275,7 +283,7 @@ rb_shape_transition_shape_frozen(VALUE obj)
RUBY_ASSERT(shape);
RUBY_ASSERT(RB_OBJ_FROZEN(obj));
- if (rb_shape_frozen_shape_p(shape)) {
+ if (rb_shape_frozen_shape_p(shape) || rb_shape_obj_too_complex(obj)) {
return;
}
@@ -287,7 +295,7 @@ rb_shape_transition_shape_frozen(VALUE obj)
}
bool dont_care;
- next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN, &dont_care);
+ next_shape = get_next_shape_internal(shape, (ID)id_frozen, SHAPE_FROZEN, &dont_care, true);
RUBY_ASSERT(next_shape);
rb_shape_set_shape(obj, next_shape);
@@ -302,7 +310,7 @@ rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id)
{
RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id))));
bool dont_care;
- return get_next_shape_internal(shape, id, SHAPE_IVAR, &dont_care);
+ return get_next_shape_internal(shape, id, SHAPE_IVAR, &dont_care, true);
}
rb_shape_t *
@@ -310,8 +318,20 @@ rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id)
{
RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id))));
- bool variation_created;
- rb_shape_t * new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created);
+ bool allow_new_shape = true;
+
+ if (BUILTIN_TYPE(obj) == T_OBJECT) {
+ VALUE klass = rb_obj_class(obj);
+ allow_new_shape = RCLASS_EXT(klass)->variation_count < SHAPE_MAX_VARIATIONS;
+ }
+
+ bool variation_created = false;
+ rb_shape_t * new_shape = get_next_shape_internal(shape, id, SHAPE_IVAR, &variation_created, allow_new_shape);
+
+ if (!new_shape) {
+ RUBY_ASSERT(BUILTIN_TYPE(obj) == T_OBJECT);
+ new_shape = rb_shape_get_shape_by_id(OBJ_TOO_COMPLEX_SHAPE_ID);
+ }
// Check if we should update max_iv_count on the object's class
if (BUILTIN_TYPE(obj) == T_OBJECT) {
@@ -333,7 +353,7 @@ rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity)
{
ID edge_name = rb_make_temporary_id(new_capacity);
bool dont_care;
- rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care);
+ rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care, true);
new_shape->capacity = new_capacity;
return new_shape;
}
@@ -341,6 +361,10 @@ rb_shape_transition_shape_capa(rb_shape_t* shape, uint32_t new_capacity)
bool
rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value)
{
+ // It doesn't make sense to ask for the index of an IV that's stored
+ // on an object that is "too complex" as it uses a hash for storing IVs
+ RUBY_ASSERT(rb_shape_id(shape) != OBJ_TOO_COMPLEX_SHAPE_ID);
+
while (shape->parent_id != INVALID_SHAPE_ID) {
if (shape->edge_name == id) {
enum shape_type shape_type;
@@ -356,6 +380,7 @@ rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value)
case SHAPE_INITIAL_CAPACITY:
case SHAPE_T_OBJECT:
return false;
+ case SHAPE_OBJ_TOO_COMPLEX:
case SHAPE_FROZEN:
rb_bug("Ivar should not exist on transition\n");
}
@@ -448,11 +473,28 @@ rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape)
case SHAPE_INITIAL_CAPACITY:
case SHAPE_T_OBJECT:
break;
+ case SHAPE_OBJ_TOO_COMPLEX:
+ rb_bug("Unreachable\n");
+ break;
}
return midway_shape;
}
+bool
+rb_shape_obj_too_complex(VALUE obj)
+{
+ return rb_shape_get_shape_id(obj) == OBJ_TOO_COMPLEX_SHAPE_ID;
+}
+
+void
+rb_shape_set_too_complex(VALUE obj)
+{
+ RUBY_ASSERT(BUILTIN_TYPE(obj) == T_OBJECT);
+ RUBY_ASSERT(!rb_shape_obj_too_complex(obj));
+ rb_shape_set_shape_id(obj, OBJ_TOO_COMPLEX_SHAPE_ID);
+}
+
size_t
rb_shape_edges_count(rb_shape_t *shape)
{
@@ -519,6 +561,19 @@ rb_shape_capacity(VALUE self)
}
static VALUE
+rb_shape_too_complex(VALUE self)
+{
+ rb_shape_t * shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+ if (rb_shape_id(shape) == OBJ_TOO_COMPLEX_SHAPE_ID) {
+ return Qtrue;
+ }
+ else {
+ return Qfalse;
+ }
+}
+
+static VALUE
rb_shape_parent_id(VALUE self)
{
rb_shape_t * shape;
@@ -730,7 +785,7 @@ Init_default_shapes(void)
rb_shape_t * shape = rb_shape_get_shape_by_id(i);
bool dont_care;
rb_shape_t * t_object_shape =
- get_next_shape_internal(shape, id_t_object, SHAPE_T_OBJECT, &dont_care);
+ get_next_shape_internal(shape, id_t_object, SHAPE_T_OBJECT, &dont_care, true);
t_object_shape->edges = rb_id_table_create(0);
RUBY_ASSERT(rb_shape_id(t_object_shape) == (shape_id_t)(i + SIZE_POOL_COUNT));
}
@@ -740,10 +795,16 @@ Init_default_shapes(void)
#if RUBY_DEBUG
rb_shape_t * special_const_shape =
#endif
- get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN, &dont_care);
+ get_next_shape_internal(root, (ID)id_frozen, SHAPE_FROZEN, &dont_care, true);
RUBY_ASSERT(rb_shape_id(special_const_shape) == SPECIAL_CONST_SHAPE_ID);
RUBY_ASSERT(SPECIAL_CONST_SHAPE_ID == (GET_VM()->next_shape_id - 1));
RUBY_ASSERT(rb_shape_frozen_shape_p(special_const_shape));
+
+ rb_shape_t * hash_fallback_shape = rb_shape_alloc_with_parent_id(0, ROOT_SHAPE_ID);
+ hash_fallback_shape->type = SHAPE_OBJ_TOO_COMPLEX;
+ hash_fallback_shape->size_pool_index = 0;
+ RUBY_ASSERT(OBJ_TOO_COMPLEX_SHAPE_ID == (GET_VM()->next_shape_id - 1));
+ RUBY_ASSERT(rb_shape_id(hash_fallback_shape) == OBJ_TOO_COMPLEX_SHAPE_ID);
}
void
@@ -763,6 +824,7 @@ Init_shape(void)
rb_define_method(rb_cShape, "id", rb_wrapped_shape_id, 0);
rb_define_method(rb_cShape, "type", rb_shape_type, 0);
rb_define_method(rb_cShape, "capacity", rb_shape_capacity, 0);
+ rb_define_method(rb_cShape, "too_complex?", rb_shape_too_complex, 0);
rb_define_const(rb_cShape, "SHAPE_ROOT", INT2NUM(SHAPE_ROOT));
rb_define_const(rb_cShape, "SHAPE_IVAR", INT2NUM(SHAPE_IVAR));
rb_define_const(rb_cShape, "SHAPE_T_OBJECT", INT2NUM(SHAPE_T_OBJECT));
@@ -770,6 +832,8 @@ Init_shape(void)
rb_define_const(rb_cShape, "SHAPE_ID_NUM_BITS", INT2NUM(SHAPE_ID_NUM_BITS));
rb_define_const(rb_cShape, "SHAPE_FLAG_SHIFT", INT2NUM(SHAPE_FLAG_SHIFT));
rb_define_const(rb_cShape, "SPECIAL_CONST_SHAPE_ID", INT2NUM(SPECIAL_CONST_SHAPE_ID));
+ rb_define_const(rb_cShape, "OBJ_TOO_COMPLEX_SHAPE_ID", INT2NUM(OBJ_TOO_COMPLEX_SHAPE_ID));
+ rb_define_const(rb_cShape, "SHAPE_MAX_VARIATIONS", INT2NUM(SHAPE_MAX_VARIATIONS));
rb_define_singleton_method(rb_cShape, "transition_tree", shape_transition_tree, 0);
rb_define_singleton_method(rb_cShape, "find_by_id", rb_shape_find_by_id, 1);