summaryrefslogtreecommitdiff
path: root/shape.c
diff options
context:
space:
mode:
authorJemma Issroff <jemmaissroff@gmail.com>2022-09-23 13:54:42 -0400
committerAaron Patterson <tenderlove@ruby-lang.org>2022-09-28 08:26:21 -0700
commitd594a5a8bd0756f65c078fcf5ce0098250cba141 (patch)
tree3930e12366c80e7bcbc330fe880205a3d212b5aa /shape.c
parenta05b2614645594df896aaf44a2e5701ee7fb5fec (diff)
downloadruby-d594a5a8bd0756f65c078fcf5ce0098250cba141.tar.gz
This commit implements the Object Shapes technique in CRuby.
Object Shapes is used for accessing instance variables and representing the "frozenness" of objects. Object instances have a "shape" and the shape represents some attributes of the object (currently which instance variables are set and the "frozenness"). Shapes form a tree data structure, and when a new instance variable is set on an object, that object "transitions" to a new shape in the shape tree. Each shape has an ID that is used for caching. The shape structure is independent of class, so objects of different types can have the same shape. For example: ```ruby class Foo def initialize # Starts with shape id 0 @a = 1 # transitions to shape id 1 @b = 1 # transitions to shape id 2 end end class Bar def initialize # Starts with shape id 0 @a = 1 # transitions to shape id 1 @b = 1 # transitions to shape id 2 end end foo = Foo.new # `foo` has shape id 2 bar = Bar.new # `bar` has shape id 2 ``` Both `foo` and `bar` instances have the same shape because they both set instance variables of the same name in the same order. This technique can help to improve inline cache hits as well as generate more efficient machine code in JIT compilers. This commit also adds some methods for debugging shapes on objects. See `RubyVM::Shape` for more details. For more context on Object Shapes, see [Feature: #18776] Co-Authored-By: Aaron Patterson <tenderlove@ruby-lang.org> Co-Authored-By: Eileen M. Uchitelle <eileencodes@gmail.com> Co-Authored-By: John Hawthorn <john@hawthorn.email>
Diffstat (limited to 'shape.c')
-rw-r--r--shape.c530
1 files changed, 530 insertions, 0 deletions
diff --git a/shape.c b/shape.c
new file mode 100644
index 0000000000..82d284b8fe
--- /dev/null
+++ b/shape.c
@@ -0,0 +1,530 @@
+#include "vm_core.h"
+#include "vm_sync.h"
+#include "shape.h"
+#include "internal/class.h"
+#include "internal/symbol.h"
+#include "internal/variable.h"
+#include <stdbool.h>
+
+/*
+ * Shape getters
+ */
+static rb_shape_t*
+rb_shape_get_root_shape(void) {
+ return GET_VM()->root_shape;
+}
+
+shape_id_t
+rb_shape_id(rb_shape_t * shape)
+{
+ return (shape_id_t)(shape - GET_VM()->shape_list);
+}
+
+static rb_shape_t*
+rb_shape_get_frozen_root_shape(void) {
+ return GET_VM()->frozen_root_shape;
+}
+
+bool
+rb_shape_root_shape_p(rb_shape_t* shape) {
+ return shape == rb_shape_get_root_shape();
+}
+
+rb_shape_t*
+rb_shape_get_shape_by_id(shape_id_t shape_id)
+{
+ RUBY_ASSERT(shape_id != INVALID_SHAPE_ID);
+
+ rb_vm_t *vm = GET_VM();
+ rb_shape_t *shape = &vm->shape_list[shape_id];
+ return shape;
+}
+
+rb_shape_t*
+rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id)
+{
+ RUBY_ASSERT(shape_id != INVALID_SHAPE_ID);
+
+ rb_vm_t *vm = GET_VM();
+ rb_shape_t *shape = &vm->shape_list[shape_id];
+ return shape;
+}
+
+#if !SHAPE_IN_BASIC_FLAGS
+static inline shape_id_t
+RCLASS_SHAPE_ID(VALUE obj)
+{
+ return RCLASS_EXT(obj)->shape_id;
+}
+
+shape_id_t rb_generic_shape_id(VALUE obj);
+#endif
+
+shape_id_t
+rb_shape_get_shape_id(VALUE obj)
+{
+ if (RB_SPECIAL_CONST_P(obj)) {
+ return FROZEN_ROOT_SHAPE_ID;
+ }
+
+#if SHAPE_IN_BASIC_FLAGS
+ return RBASIC_SHAPE_ID(obj);
+#else
+ switch (BUILTIN_TYPE(obj)) {
+ case T_OBJECT:
+ return ROBJECT_SHAPE_ID(obj);
+ break;
+ case T_CLASS:
+ case T_MODULE:
+ return RCLASS_SHAPE_ID(obj);
+ default:
+ return rb_generic_shape_id(obj);
+ }
+#endif
+}
+
+rb_shape_t*
+rb_shape_get_shape(VALUE obj)
+{
+ return rb_shape_get_shape_by_id(rb_shape_get_shape_id(obj));
+}
+
+static rb_shape_t *
+rb_shape_lookup_id(rb_shape_t* shape, ID id, enum shape_type shape_type) {
+ while (shape->parent) {
+ if (shape->edge_name == id) {
+ // If the shape type is different, we don't
+ // want this to count as a "found" ID
+ if (shape_type == (enum shape_type)shape->type) {
+ return shape;
+ }
+ else {
+ return NULL;
+ }
+ }
+ shape = shape->parent;
+ }
+ return NULL;
+}
+
+static rb_shape_t*
+get_next_shape_internal(rb_shape_t* shape, ID id, VALUE obj, enum shape_type shape_type)
+{
+ rb_shape_t *res = NULL;
+ RUBY_ASSERT(SHAPE_FROZEN != (enum shape_type)shape->type);
+ RB_VM_LOCK_ENTER();
+ {
+ if (rb_shape_lookup_id(shape, id, shape_type)) {
+ // If shape already contains the ivar that is being set, we'll return shape
+ res = shape;
+ }
+ else {
+ if (!shape->edges) {
+ shape->edges = rb_id_table_create(0);
+ }
+
+ // Lookup the shape in edges - if there's already an edge and a corresponding shape for it,
+ // we can return that. Otherwise, we'll need to get a new shape
+ if (!rb_id_table_lookup(shape->edges, id, (VALUE *)&res)) {
+ // In this case, the shape exists, but the shape is garbage, so we need to recreate it
+ if (res) {
+ rb_id_table_delete(shape->edges, id);
+ res->parent = NULL;
+ }
+
+ rb_shape_t * new_shape = rb_shape_alloc(id, shape);
+
+ new_shape->type = (uint8_t)shape_type;
+
+ switch(shape_type) {
+ case SHAPE_IVAR:
+ new_shape->iv_count = new_shape->parent->iv_count + 1;
+
+ // Check if we should update max_iv_count on the object's class
+ if (BUILTIN_TYPE(obj) == T_OBJECT) {
+ VALUE klass = rb_obj_class(obj);
+ if (new_shape->iv_count > RCLASS_EXT(klass)->max_iv_count) {
+ RCLASS_EXT(klass)->max_iv_count = new_shape->iv_count;
+ }
+ }
+ break;
+ case SHAPE_IVAR_UNDEF:
+ new_shape->iv_count = new_shape->parent->iv_count;
+ break;
+ case SHAPE_ROOT:
+ rb_bug("Unreachable");
+ break;
+ case SHAPE_FROZEN:
+ break;
+ }
+
+ rb_id_table_insert(shape->edges, id, (VALUE)new_shape);
+
+ res = new_shape;
+ }
+ }
+ }
+ RB_VM_LOCK_LEAVE();
+ return res;
+}
+
+MJIT_FUNC_EXPORTED int
+rb_shape_frozen_shape_p(rb_shape_t* shape)
+{
+ return SHAPE_FROZEN == (enum shape_type)shape->type;
+}
+
+void
+rb_shape_transition_shape_remove_ivar(VALUE obj, ID id, rb_shape_t *shape)
+{
+ rb_shape_t* next_shape = get_next_shape_internal(shape, id, obj, SHAPE_IVAR_UNDEF);
+
+ if (shape == next_shape) {
+ return;
+ }
+
+ rb_shape_set_shape(obj, next_shape);
+}
+
+void
+rb_shape_transition_shape_frozen(VALUE obj)
+{
+ rb_shape_t* shape = rb_shape_get_shape(obj);
+ RUBY_ASSERT(shape);
+ RUBY_ASSERT(RB_OBJ_FROZEN(obj));
+
+ if (rb_shape_frozen_shape_p(shape)) {
+ return;
+ }
+
+ rb_shape_t* next_shape;
+
+ if (shape == rb_shape_get_root_shape()) {
+ switch(BUILTIN_TYPE(obj)) {
+ case T_OBJECT:
+ case T_CLASS:
+ case T_MODULE:
+ break;
+ default:
+ return;
+ }
+ next_shape = rb_shape_get_frozen_root_shape();
+ }
+ else {
+ static ID id_frozen;
+ if (!id_frozen) {
+ id_frozen = rb_make_internal_id();
+ }
+
+ next_shape = get_next_shape_internal(shape, (ID)id_frozen, obj, SHAPE_FROZEN);
+ }
+
+ RUBY_ASSERT(next_shape);
+ rb_shape_set_shape(obj, next_shape);
+}
+
+void
+rb_shape_transition_shape(VALUE obj, ID id, rb_shape_t *shape)
+{
+ rb_shape_t* next_shape = rb_shape_get_next(shape, obj, id);
+ if (shape == next_shape) {
+ return;
+ }
+ rb_shape_set_shape(obj, next_shape);
+}
+
+rb_shape_t*
+rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id)
+{
+ return get_next_shape_internal(shape, id, obj, SHAPE_IVAR);
+}
+
+bool
+rb_shape_get_iv_index(rb_shape_t * shape, ID id, attr_index_t *value) {
+ while (shape->parent) {
+ if (shape->edge_name == id) {
+ enum shape_type shape_type;
+ shape_type = (enum shape_type)shape->type;
+
+ switch(shape_type) {
+ case SHAPE_IVAR:
+ RUBY_ASSERT(shape->iv_count > 0);
+ *value = shape->iv_count - 1;
+ return true;
+ case SHAPE_IVAR_UNDEF:
+ case SHAPE_ROOT:
+ return false;
+ case SHAPE_FROZEN:
+ rb_bug("Ivar should not exist on frozen transition\n");
+ }
+ }
+ shape = shape->parent;
+ }
+ return false;
+}
+
+static rb_shape_t *
+shape_alloc(void)
+{
+ rb_vm_t *vm = GET_VM();
+ shape_id_t shape_id = vm->next_shape_id;
+ vm->next_shape_id++;
+
+ if (shape_id == MAX_SHAPE_ID) {
+ // TODO: Make an OutOfShapesError ??
+ rb_bug("Out of shapes\n");
+ }
+
+ return &GET_VM()->shape_list[shape_id];
+}
+
+rb_shape_t *
+rb_shape_alloc(ID edge_name, rb_shape_t * parent)
+{
+ rb_shape_t * shape = shape_alloc();
+
+ shape->edge_name = edge_name;
+ shape->iv_count = 0;
+ shape->parent = parent;
+
+ return shape;
+}
+
+MJIT_FUNC_EXPORTED void
+rb_shape_set_shape(VALUE obj, rb_shape_t* shape)
+{
+ rb_shape_set_shape_id(obj, rb_shape_id(shape));
+}
+
+VALUE rb_cShape;
+
+static void
+shape_mark(void *ptr)
+{
+ rb_gc_mark((VALUE)ptr);
+}
+
+/*
+ * Exposing Shape to Ruby via RubyVM.debug_shape
+ */
+static const rb_data_type_t shape_data_type = {
+ "Shape",
+ {shape_mark, NULL, NULL,},
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
+};
+
+static VALUE
+rb_wrapped_shape_id(VALUE self) {
+ rb_shape_t * shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+ return INT2NUM(rb_shape_id(shape));
+}
+
+static VALUE
+rb_shape_type(VALUE self) {
+ rb_shape_t * shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+ return INT2NUM(shape->type);
+}
+
+static VALUE
+rb_shape_parent_id(VALUE self)
+{
+ rb_shape_t * shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+ if (shape->parent) {
+ return INT2NUM(rb_shape_id(shape->parent));
+ }
+ else {
+ return Qnil;
+ }
+}
+
+static VALUE parse_key(ID key) {
+ if ((key & RUBY_ID_INTERNAL) == RUBY_ID_INTERNAL) {
+ return LONG2NUM(key);
+ } else {
+ return ID2SYM(key);
+ }
+}
+
+static VALUE
+rb_shape_t_to_rb_cShape(rb_shape_t *shape) {
+ union { const rb_shape_t *in; void *out; } deconst;
+ VALUE res;
+ deconst.in = shape;
+ res = TypedData_Wrap_Struct(rb_cShape, &shape_data_type, deconst.out);
+
+ return res;
+}
+
+static enum rb_id_table_iterator_result rb_edges_to_hash(ID key, VALUE value, void *ref)
+{
+ rb_hash_aset(*(VALUE *)ref, parse_key(key), rb_shape_t_to_rb_cShape((rb_shape_t*)value));
+ return ID_TABLE_CONTINUE;
+}
+
+static VALUE
+rb_shape_edges(VALUE self)
+{
+ rb_shape_t* shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+
+ VALUE hash = rb_hash_new();
+
+ if (shape->edges) {
+ rb_id_table_foreach(shape->edges, rb_edges_to_hash, &hash);
+ }
+
+ return hash;
+}
+
+static VALUE
+rb_shape_edge_name(VALUE self)
+{
+ rb_shape_t* shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+
+ if (shape->edge_name) {
+ return ID2SYM(shape->edge_name);
+ }
+ else {
+ return Qnil;
+ }
+}
+
+static VALUE
+rb_shape_iv_count(VALUE self)
+{
+ rb_shape_t* shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+
+ return INT2NUM(shape->iv_count);
+}
+
+static VALUE
+rb_shape_export_depth(VALUE self)
+{
+ rb_shape_t* shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+
+ unsigned int depth = 0;
+ while (shape->parent) {
+ depth++;
+ shape = shape->parent;
+ }
+ return INT2NUM(depth);
+}
+
+static VALUE
+rb_shape_parent(VALUE self)
+{
+ rb_shape_t * shape;
+ TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
+ if (shape->parent) {
+ return rb_shape_t_to_rb_cShape(shape->parent);
+ }
+ else {
+ return Qnil;
+ }
+}
+
+VALUE rb_shape_debug_shape(VALUE self, VALUE obj) {
+ return rb_shape_t_to_rb_cShape(rb_shape_get_shape(obj));
+}
+
+VALUE rb_shape_debug_root_shape(VALUE self) {
+ return rb_shape_t_to_rb_cShape(rb_shape_get_root_shape());
+}
+
+VALUE rb_shape_debug_frozen_root_shape(VALUE self) {
+ return rb_shape_t_to_rb_cShape(rb_shape_get_frozen_root_shape());
+}
+
+VALUE rb_obj_shape(rb_shape_t* shape);
+
+static enum rb_id_table_iterator_result collect_keys_and_values(ID key, VALUE value, void *ref)
+{
+ rb_hash_aset(*(VALUE *)ref, parse_key(key), rb_obj_shape((rb_shape_t*)value));
+ return ID_TABLE_CONTINUE;
+}
+
+static VALUE edges(struct rb_id_table* edges)
+{
+ VALUE hash = rb_hash_new();
+ if (edges)
+ rb_id_table_foreach(edges, collect_keys_and_values, &hash);
+ return hash;
+}
+
+VALUE rb_obj_shape(rb_shape_t* shape) {
+ VALUE rb_shape = rb_hash_new();
+
+ rb_hash_aset(rb_shape, ID2SYM(rb_intern("id")), INT2NUM(rb_shape_id(shape)));
+ rb_hash_aset(rb_shape, ID2SYM(rb_intern("edges")), edges(shape->edges));
+
+ if (shape == rb_shape_get_root_shape()) {
+ rb_hash_aset(rb_shape, ID2SYM(rb_intern("parent_id")), INT2NUM(ROOT_SHAPE_ID));
+ }
+ else {
+ rb_hash_aset(rb_shape, ID2SYM(rb_intern("parent_id")), INT2NUM(rb_shape_id(shape->parent)));
+ }
+
+ rb_hash_aset(rb_shape, ID2SYM(rb_intern("edge_name")), rb_id2str(shape->edge_name));
+ return rb_shape;
+}
+
+static VALUE shape_transition_tree(VALUE self) {
+ return rb_obj_shape(rb_shape_get_root_shape());
+}
+
+static VALUE shape_count(VALUE self) {
+ int shape_count = 0;
+ rb_vm_t *vm = GET_VM();
+ for(shape_id_t i = 0; i < vm->next_shape_id; i++) {
+ if(rb_shape_get_shape_by_id_without_assertion(i)) {
+ shape_count++;
+ }
+ }
+ return INT2NUM(shape_count);
+}
+
+static VALUE
+shape_max_shape_count(VALUE self)
+{
+ return INT2NUM(GET_VM()->next_shape_id);
+}
+
+VALUE
+rb_shape_flags_mask(void)
+{
+ return SHAPE_FLAG_MASK;
+}
+
+void
+Init_shape(void)
+{
+ rb_cShape = rb_define_class_under(rb_cRubyVM, "Shape", rb_cObject);
+ rb_undef_alloc_func(rb_cShape);
+
+ rb_define_method(rb_cShape, "parent_id", rb_shape_parent_id, 0);
+ rb_define_method(rb_cShape, "parent", rb_shape_parent, 0);
+ rb_define_method(rb_cShape, "edges", rb_shape_edges, 0);
+ rb_define_method(rb_cShape, "edge_name", rb_shape_edge_name, 0);
+ rb_define_method(rb_cShape, "iv_count", rb_shape_iv_count, 0);
+ rb_define_method(rb_cShape, "depth", rb_shape_export_depth, 0);
+ rb_define_method(rb_cShape, "id", rb_wrapped_shape_id, 0);
+ rb_define_method(rb_cShape, "type", rb_shape_type, 0);
+ rb_define_const(rb_cShape, "SHAPE_ROOT", INT2NUM(SHAPE_ROOT));
+ rb_define_const(rb_cShape, "SHAPE_IVAR", INT2NUM(SHAPE_IVAR));
+ rb_define_const(rb_cShape, "SHAPE_IVAR_UNDEF", INT2NUM(SHAPE_IVAR_UNDEF));
+ rb_define_const(rb_cShape, "SHAPE_FROZEN", INT2NUM(SHAPE_FROZEN));
+ rb_define_const(rb_cShape, "SHAPE_BITS", INT2NUM(SHAPE_BITS));
+
+ rb_define_module_function(rb_cRubyVM, "debug_shape_transition_tree", shape_transition_tree, 0);
+ rb_define_module_function(rb_cRubyVM, "debug_shape_count", shape_count, 0);
+ rb_define_singleton_method(rb_cRubyVM, "debug_shape", rb_shape_debug_shape, 1);
+ rb_define_singleton_method(rb_cRubyVM, "debug_max_shape_count", shape_max_shape_count, 0);
+ rb_define_singleton_method(rb_cRubyVM, "debug_root_shape", rb_shape_debug_root_shape, 0);
+ rb_define_singleton_method(rb_cRubyVM, "debug_frozen_root_shape", rb_shape_debug_frozen_root_shape, 0);
+}