From 73771e4b192f3db62efb854affdfc95babba1d35 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Tue, 6 Dec 2022 12:56:51 +0100 Subject: ObjectSpace.dump_all: dump shapes as well I see several arguments in doing so. First they use a non trivial amount of memory, so for various memory profiling/mapping tools it is relevant to have visibility of the space occupied by shapes. Then, some pathological code can create a tons of shape, so it is valuable to have a way to have a way to observe shapes without having to compile Ruby with `SHAPE_DEBUG=1`. And additionally it's likely much faster to dump then this way than to use `RubyVM::Shape`. There are however a few open questions: - Shapes can't respect the `since:` argument. Not sure what to do when it is provided. Would probably make sense to not dump them. - Maybe it would make more sense to have a separate `ObjectSpace.dump_shapes`? - Maybe instead `dump_all` should take a `shapes: false` argument? Additionally, `ObjectSpace.dump_shapes` is added for the use case of debugging the evolution of the shape tree. --- common.mk | 1 + ext/objspace/depend | 1 + ext/objspace/lib/objspace.rb | 65 +++++++++++++++++++++-- ext/objspace/objspace_dump.c | 118 ++++++++++++++++++++++++++++++++++++++--- shape.c | 79 ++++++++++++++++++--------- shape.h | 9 +++- test/objspace/test_objspace.rb | 15 +++++- 7 files changed, 248 insertions(+), 40 deletions(-) diff --git a/common.mk b/common.mk index 3c89be7f3f..9ac486411c 100644 --- a/common.mk +++ b/common.mk @@ -14217,6 +14217,7 @@ shape.$(OBJEXT): {$(VPATH)}shape.c shape.$(OBJEXT): {$(VPATH)}shape.h shape.$(OBJEXT): {$(VPATH)}st.h shape.$(OBJEXT): {$(VPATH)}subst.h +shape.$(OBJEXT): {$(VPATH)}symbol.h shape.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h shape.$(OBJEXT): {$(VPATH)}thread_native.h shape.$(OBJEXT): {$(VPATH)}variable.h diff --git a/ext/objspace/depend b/ext/objspace/depend index d830239b18..f83607236a 100644 --- a/ext/objspace/depend +++ b/ext/objspace/depend @@ -556,6 +556,7 @@ objspace_dump.o: $(top_srcdir)/node.h objspace_dump.o: $(top_srcdir)/ruby_assert.h objspace_dump.o: $(top_srcdir)/ruby_atomic.h objspace_dump.o: $(top_srcdir)/shape.h +objspace_dump.o: $(top_srcdir)/symbol.h objspace_dump.o: $(top_srcdir)/thread_pthread.h objspace_dump.o: $(top_srcdir)/vm_core.h objspace_dump.o: $(top_srcdir)/vm_opts.h diff --git a/ext/objspace/lib/objspace.rb b/ext/objspace/lib/objspace.rb index 0298b0646c..f8a66d8d32 100644 --- a/ext/objspace/lib/objspace.rb +++ b/ext/objspace/lib/objspace.rb @@ -6,6 +6,7 @@ module ObjectSpace class << self private :_dump private :_dump_all + private :_dump_shapes end module_function @@ -53,23 +54,38 @@ module ObjectSpace # # Dump the contents of the ruby heap as JSON. # + #. _full__ must be a boolean. If true all heap slots are dumped including the empty ones (T_NONE). + # # _since_ must be a non-negative integer or +nil+. # # If _since_ is a positive integer, only objects of that generation and # newer generations are dumped. The current generation can be accessed using - # GC::count. - # - # Objects that were allocated without object allocation tracing enabled + # GC::count. Objects that were allocated without object allocation tracing enabled # are ignored. See ::trace_object_allocations for more information and # examples. # # If _since_ is omitted or is +nil+, all objects are dumped. # + # _shapes_ must be a boolean or a non-negative integer. + # + # If _shapes_ is a positive integer, only shapes newer than the provided + # shape id are dumped. The current shape_id can be accessed using +RubyVM.stat(:next_shape_id)+. + # + # If _shapes_ is +false+, no shapes are dumped. + # + # To only dump objects allocated past a certain point you can combine _since_ and _shapes_: + # ObjectSpace.trace_object_allocations + # GC.start + # gc_generation = GC.count + # shape_generation = RubyVM.stat(:next_shape_id) + #. call_method_to_instrument + # ObjectSpace.dump_all(since: gc_generation, shapes: shape_generation) + # # This method is only expected to work with C Ruby. # This is an experimental method and is subject to change. # In particular, the function signature and output format are # not guaranteed to be compatible in future versions of ruby. - def dump_all(output: :file, full: false, since: nil) + def dump_all(output: :file, full: false, since: nil, shapes: true) out = case output when :file, nil require 'tempfile' @@ -84,7 +100,46 @@ module ObjectSpace raise ArgumentError, "wrong output option: #{output.inspect}" end - ret = _dump_all(out, full, since) + shapes = 0 if shapes == true + ret = _dump_all(out, full, since, shapes) + return nil if output == :stdout + ret + end + + # call-seq: + # ObjectSpace.dump_shapes([output: :file]) # => # + # ObjectSpace.dump_shapes(output: :stdout) # => nil + # ObjectSpace.dump_shapes(output: :string) # => "{...}\n{...}\n..." + # ObjectSpace.dump_shapes(output: + # File.open('shapes.json','w')) # => # + # ObjectSpace.dump_all(output: :string, + # since: 42) # => "{...}\n{...}\n..." + # + # Dump the contents of the ruby shape tree as JSON. + # + # If _shapes_ is a positive integer, only shapes newer than the provided + # shape id are dumped. The current shape_id can be accessed using +RubyVM.stat(:next_shape_id)+. + # + # This method is only expected to work with C Ruby. + # This is an experimental method and is subject to change. + # In particular, the function signature and output format are + # not guaranteed to be compatible in future versions of ruby. + def dump_shapes(output: :file, since: 0) + out = case output + when :file, nil + require 'tempfile' + Tempfile.create(%w(rubyshapes .json)) + when :stdout + STDOUT + when :string + +'' + when IO + output + else + raise ArgumentError, "wrong output option: #{output.inspect}" + end + + ret = _dump_shapes(out, since) return nil if output == :stdout ret end diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c index 41da368267..bcc258eb25 100644 --- a/ext/objspace/objspace_dump.c +++ b/ext/objspace/objspace_dump.c @@ -18,6 +18,8 @@ #include "internal/hash.h" #include "internal/string.h" #include "internal/sanitizers.h" +#include "symbol.h" +#include "shape.h" #include "node.h" #include "objspace.h" #include "ruby/debug.h" @@ -42,6 +44,7 @@ struct dump_config { unsigned int full_heap: 1; unsigned int partial_dump; size_t since; + size_t shapes_since; unsigned long buffer_len; char buffer[BUFFER_CAPACITY]; }; @@ -350,6 +353,20 @@ dump_append_string_content(struct dump_config *dc, VALUE obj) } } +static inline void +dump_append_id(struct dump_config *dc, ID id) +{ + if (is_instance_id(id)) { + dump_append_string_value(dc, rb_sym2str(ID2SYM(id))); + } + else { + dump_append(dc, "\"ID_INTERNAL("); + dump_append_sizet(dc, rb_id_to_serial(id)); + dump_append(dc, ")\""); + } +} + + static void dump_object(VALUE obj, struct dump_config *dc) { @@ -378,13 +395,16 @@ dump_object(VALUE obj, struct dump_config *dc) dump_append(dc, "{\"address\":"); dump_append_ref(dc, obj); - dump_append(dc, ", \"shape_id\":"); - dump_append_sizet(dc, rb_shape_get_shape_id(obj)); - dump_append(dc, ", \"type\":\""); dump_append(dc, obj_type(obj)); dump_append(dc, "\""); + size_t shape_id = rb_shape_get_shape_id(obj); + if (shape_id) { + dump_append(dc, ", \"shape_id\":"); + dump_append_sizet(dc, shape_id); + } + dump_append(dc, ", \"slot_size\":"); dump_append_sizet(dc, dc->cur_page_slot_size); @@ -622,7 +642,7 @@ root_obj_i(const char *category, VALUE obj, void *data) } static void -dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since) +dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since, VALUE shapes) { dc->full_heap = 0; @@ -648,6 +668,8 @@ dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since) else { dc->partial_dump = 0; } + + dc->shapes_since = RTEST(shapes) ? NUM2SIZET(shapes) : 0; } static VALUE @@ -672,18 +694,81 @@ objspace_dump(VALUE os, VALUE obj, VALUE output) dc.cur_page_slot_size = rb_gc_obj_slot_size(obj); } - dump_output(&dc, output, Qnil, Qnil); + dump_output(&dc, output, Qnil, Qnil, Qnil); dump_object(obj, &dc); return dump_result(&dc); } +static void +shape_i(rb_shape_t *shape, void *data) +{ + struct dump_config *dc = (struct dump_config *)data; + + size_t shape_id = rb_shape_id(shape); + if (shape_id < dc->shapes_since) { + return; + } + + dump_append(dc, "{\"address\":"); + dump_append_ref(dc, (VALUE)shape); + + dump_append(dc, ", \"type\":\"SHAPE\", \"id\":"); + dump_append_sizet(dc, shape_id); + + if (shape->type != SHAPE_ROOT) { + dump_append(dc, ", \"parent_id\":"); + dump_append_lu(dc, shape->parent_id); + } + + dump_append(dc, ", \"depth\":"); + dump_append_sizet(dc, rb_shape_depth(shape)); + + dump_append(dc, ", \"shape_type\":"); + switch(shape->type) { + case SHAPE_ROOT: + dump_append(dc, "\"ROOT\""); + break; + case SHAPE_IVAR: + dump_append(dc, "\"IVAR\""); + + dump_append(dc, ",\"edge_name\":"); + dump_append_id(dc, shape->edge_name); + + break; + case SHAPE_FROZEN: + dump_append(dc, "\"FROZEN\""); + break; + case SHAPE_CAPACITY_CHANGE: + dump_append(dc, "\"CAPACITY_CHANGE\""); + dump_append(dc, ", \"capacity\":"); + dump_append_sizet(dc, shape->capacity); + break; + case SHAPE_INITIAL_CAPACITY: + dump_append(dc, "\"INITIAL_CAPACITY\""); + break; + case SHAPE_T_OBJECT: + dump_append(dc, "\"T_OBJECT\""); + break; + default: + rb_bug("[objspace] unexpected shape type"); + } + + dump_append(dc, ", \"edges\":"); + dump_append_sizet(dc, rb_shape_edges_count(shape)); + + dump_append(dc, ", \"memsize\":"); + dump_append_sizet(dc, rb_shape_memsize(shape)); + + dump_append(dc, "}\n"); +} + static VALUE -objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since) +objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since, VALUE shapes) { struct dump_config dc = {0,}; - dump_output(&dc, output, full, since); + dump_output(&dc, output, full, since, shapes); if (!dc.partial_dump || dc.since == 0) { /* dump roots */ @@ -691,12 +776,28 @@ objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since) if (dc.roots) dump_append(&dc, "]}\n"); } + if (RTEST(shapes)) { + rb_shape_each_shape(shape_i, &dc); + } + /* dump all objects */ rb_objspace_each_objects(heap_i, &dc); return dump_result(&dc); } +static VALUE +objspace_dump_shapes(VALUE os, VALUE output, VALUE shapes) +{ + struct dump_config dc = {0,}; + dump_output(&dc, output, Qfalse, Qnil, shapes); + + if (RTEST(shapes)) { + rb_shape_each_shape(shape_i, &dc); + } + return dump_result(&dc); +} + void Init_objspace_dump(VALUE rb_mObjSpace) { @@ -706,7 +807,8 @@ Init_objspace_dump(VALUE rb_mObjSpace) #endif rb_define_module_function(rb_mObjSpace, "_dump", objspace_dump, 2); - rb_define_module_function(rb_mObjSpace, "_dump_all", objspace_dump_all, 3); + rb_define_module_function(rb_mObjSpace, "_dump_all", objspace_dump_all, 4); + rb_define_module_function(rb_mObjSpace, "_dump_shapes", objspace_dump_shapes, 2); /* force create static IDs */ rb_obj_gc_flags(rb_mObjSpace, 0, 0); diff --git a/shape.c b/shape.c index 973a8a6328..41d3cfcf84 100644 --- a/shape.c +++ b/shape.c @@ -2,6 +2,8 @@ #include "vm_sync.h" #include "shape.h" #include "gc.h" +#include "symbol.h" +#include "id_table.h" #include "internal/class.h" #include "internal/symbol.h" #include "internal/variable.h" @@ -37,6 +39,17 @@ rb_shape_root_shape_p(rb_shape_t* shape) return shape == rb_shape_get_root_shape(); } +void +rb_shape_each_shape(each_shape_callback callback, void *data) +{ + rb_shape_t *cursor = rb_shape_get_root_shape(); + rb_shape_t *end = rb_shape_get_shape_by_id(GET_VM()->next_shape_id); + while (cursor < end) { + callback(cursor, data); + cursor += 1; + } +} + rb_shape_t* rb_shape_get_shape_by_id(shape_id_t shape_id) { @@ -97,10 +110,10 @@ rb_shape_get_shape_id(VALUE obj) #endif } -unsigned int +size_t rb_shape_depth(rb_shape_t * shape) { - unsigned int depth = 1; + size_t depth = 1; while (shape->parent_id != INVALID_SHAPE_ID) { depth++; @@ -285,6 +298,7 @@ rb_shape_transition_shape_frozen(VALUE obj) rb_shape_t * rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id) { + RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id)))); return get_next_shape_internal(shape, id, SHAPE_IVAR); } @@ -428,16 +442,45 @@ rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape) return midway_shape; } +size_t +rb_shape_edges_count(rb_shape_t *shape) +{ + if (shape->edges) { + return rb_id_table_size(shape->edges); + } + return 0; +} + +size_t +rb_shape_memsize(rb_shape_t *shape) +{ + size_t memsize = sizeof(rb_shape_t); + if (shape->edges) { + memsize += rb_id_table_memsize(shape->edges); + } + return memsize; +} + #if SHAPE_DEBUG VALUE rb_cShape; +static size_t +shape_memsize(const void *shape_ptr) +{ + return rb_shape_memsize((rb_shape_t *)shape_ptr); +} + /* * Exposing Shape to Ruby via RubyVM.debug_shape */ static const rb_data_type_t shape_data_type = { - "Shape", - {NULL, NULL, NULL,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED + .wrap_struct_name = "Shape", + .function = { + .dmark = NULL, + .dfree = NULL, + .dsize = shape_memsize, + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED }; static VALUE @@ -480,12 +523,10 @@ rb_shape_parent_id(VALUE self) static VALUE parse_key(ID key) { - if ((key & RUBY_ID_INTERNAL) == RUBY_ID_INTERNAL) { - return LONG2NUM(key); - } - else { + if (is_instance_id(key)) { return ID2SYM(key); } + return LONG2NUM(key); } static VALUE @@ -527,17 +568,13 @@ rb_shape_edge_name(VALUE self) rb_shape_t* shape; TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); - if ((shape->edge_name & (ID_INTERNAL)) == ID_INTERNAL) { - return INT2NUM(shape->capacity); - } - else { - if (shape->edge_name) { + if (shape->edge_name) { + if (is_instance_id(shape->edge_name)) { return ID2SYM(shape->edge_name); } - else { - return Qnil; - } + return INT2NUM(shape->capacity); } + return Qnil; } static VALUE @@ -563,13 +600,7 @@ rb_shape_export_depth(VALUE self) { rb_shape_t* shape; TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); - - unsigned int depth = 0; - while (shape->parent_id != INVALID_SHAPE_ID) { - depth++; - shape = rb_shape_get_parent(shape); - } - return INT2NUM(depth); + return SIZET2NUM(rb_shape_depth(shape)); } static VALUE diff --git a/shape.h b/shape.h index 96feae99fd..ddb870f5e7 100644 --- a/shape.h +++ b/shape.h @@ -124,7 +124,6 @@ bool rb_shape_root_shape_p(rb_shape_t* shape); rb_shape_t * rb_shape_get_root_shape(void); uint8_t rb_shape_id_num_bits(void); int32_t rb_shape_id_offset(void); -unsigned int rb_shape_depth(rb_shape_t * shape); rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id); rb_shape_t * rb_shape_get_parent(rb_shape_t * shape); @@ -184,4 +183,12 @@ bool rb_shape_set_shape_id(VALUE obj, shape_id_t shape_id); VALUE rb_obj_debug_shape(VALUE self, VALUE obj); VALUE rb_shape_flags_mask(void); +RUBY_SYMBOL_EXPORT_BEGIN +typedef void each_shape_callback(rb_shape_t * shape, void *data); +void rb_shape_each_shape(each_shape_callback callback, void *data); +size_t rb_shape_memsize(rb_shape_t *shape); +size_t rb_shape_edges_count(rb_shape_t *shape); +size_t rb_shape_depth(rb_shape_t *shape); +RUBY_SYMBOL_EXPORT_END + #endif diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index 1eded6a439..7eda077260 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -414,7 +414,7 @@ class TestObjSpace < Test::Unit::TestCase @obj1 = Object.new GC.start @obj2 = Object.new - ObjectSpace.dump_all(output: :stdout, since: gc_gen) + ObjectSpace.dump_all(output: :stdout, since: gc_gen, shapes: false) end p dump_my_heap_please @@ -422,7 +422,7 @@ class TestObjSpace < Test::Unit::TestCase assert_equal 'nil', output.pop since = output.shift.to_i assert_operator output.size, :>, 0 - generations = output.map { |l| JSON.parse(l)["generation"] }.uniq.sort + generations = output.map { |l| JSON.parse(l) }.map { |o| o["generation"] }.uniq.sort assert_equal [since, since + 1], generations end end @@ -479,6 +479,7 @@ class TestObjSpace < Test::Unit::TestCase output.each { |l| obj = JSON.parse(l) next if obj["type"] == "ROOT" + next if obj["type"] == "SHAPE" assert_not_nil obj["slot_size"] assert_equal 0, obj["slot_size"] % GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] @@ -794,6 +795,16 @@ class TestObjSpace < Test::Unit::TestCase assert_equal name, JSON.parse(dump)["method"], dump end + def test_dump_shapes + json = ObjectSpace.dump_shapes(output: :string) + json.each_line do |line| + assert_include(line, '"type":"SHAPE"') + end + + assert_empty ObjectSpace.dump_shapes(output: :string, since: RubyVM.stat(:next_shape_id)) + assert_equal 2, ObjectSpace.dump_shapes(output: :string, since: RubyVM.stat(:next_shape_id) - 2).lines.size + end + private def utf8_❨╯°□°❩╯︵┻━┻ -- cgit v1.2.1