summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <byroot@ruby-lang.org>2022-12-06 12:56:51 +0100
committerJean Boussier <jean.boussier@gmail.com>2022-12-08 18:46:16 +0100
commit73771e4b192f3db62efb854affdfc95babba1d35 (patch)
tree6f15d7fea885b5e639c82b51d5c3fd50b9200735
parentb19490f75dd790f2f886df2c05ed8fba947326a9 (diff)
downloadruby-73771e4b192f3db62efb854affdfc95babba1d35.tar.gz
ObjectSpace.dump_all: dump shapes as well
I see several arguments in doing so. First they use a non trivial amount of memory, so for various memory profiling/mapping tools it is relevant to have visibility of the space occupied by shapes. Then, some pathological code can create a tons of shape, so it is valuable to have a way to have a way to observe shapes without having to compile Ruby with `SHAPE_DEBUG=1`. And additionally it's likely much faster to dump then this way than to use `RubyVM::Shape`. There are however a few open questions: - Shapes can't respect the `since:` argument. Not sure what to do when it is provided. Would probably make sense to not dump them. - Maybe it would make more sense to have a separate `ObjectSpace.dump_shapes`? - Maybe instead `dump_all` should take a `shapes: false` argument? Additionally, `ObjectSpace.dump_shapes` is added for the use case of debugging the evolution of the shape tree.
-rw-r--r--common.mk1
-rw-r--r--ext/objspace/depend1
-rw-r--r--ext/objspace/lib/objspace.rb65
-rw-r--r--ext/objspace/objspace_dump.c118
-rw-r--r--shape.c79
-rw-r--r--shape.h9
-rw-r--r--test/objspace/test_objspace.rb15
7 files changed, 248 insertions, 40 deletions
diff --git a/common.mk b/common.mk
index 3c89be7f3f..9ac486411c 100644
--- a/common.mk
+++ b/common.mk
@@ -14217,6 +14217,7 @@ shape.$(OBJEXT): {$(VPATH)}shape.c
shape.$(OBJEXT): {$(VPATH)}shape.h
shape.$(OBJEXT): {$(VPATH)}st.h
shape.$(OBJEXT): {$(VPATH)}subst.h
+shape.$(OBJEXT): {$(VPATH)}symbol.h
shape.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h
shape.$(OBJEXT): {$(VPATH)}thread_native.h
shape.$(OBJEXT): {$(VPATH)}variable.h
diff --git a/ext/objspace/depend b/ext/objspace/depend
index d830239b18..f83607236a 100644
--- a/ext/objspace/depend
+++ b/ext/objspace/depend
@@ -556,6 +556,7 @@ objspace_dump.o: $(top_srcdir)/node.h
objspace_dump.o: $(top_srcdir)/ruby_assert.h
objspace_dump.o: $(top_srcdir)/ruby_atomic.h
objspace_dump.o: $(top_srcdir)/shape.h
+objspace_dump.o: $(top_srcdir)/symbol.h
objspace_dump.o: $(top_srcdir)/thread_pthread.h
objspace_dump.o: $(top_srcdir)/vm_core.h
objspace_dump.o: $(top_srcdir)/vm_opts.h
diff --git a/ext/objspace/lib/objspace.rb b/ext/objspace/lib/objspace.rb
index 0298b0646c..f8a66d8d32 100644
--- a/ext/objspace/lib/objspace.rb
+++ b/ext/objspace/lib/objspace.rb
@@ -6,6 +6,7 @@ module ObjectSpace
class << self
private :_dump
private :_dump_all
+ private :_dump_shapes
end
module_function
@@ -53,23 +54,38 @@ module ObjectSpace
#
# Dump the contents of the ruby heap as JSON.
#
+ #. _full__ must be a boolean. If true all heap slots are dumped including the empty ones (T_NONE).
+ #
# _since_ must be a non-negative integer or +nil+.
#
# If _since_ is a positive integer, only objects of that generation and
# newer generations are dumped. The current generation can be accessed using
- # GC::count.
- #
- # Objects that were allocated without object allocation tracing enabled
+ # GC::count. Objects that were allocated without object allocation tracing enabled
# are ignored. See ::trace_object_allocations for more information and
# examples.
#
# If _since_ is omitted or is +nil+, all objects are dumped.
#
+ # _shapes_ must be a boolean or a non-negative integer.
+ #
+ # If _shapes_ is a positive integer, only shapes newer than the provided
+ # shape id are dumped. The current shape_id can be accessed using +RubyVM.stat(:next_shape_id)+.
+ #
+ # If _shapes_ is +false+, no shapes are dumped.
+ #
+ # To only dump objects allocated past a certain point you can combine _since_ and _shapes_:
+ # ObjectSpace.trace_object_allocations
+ # GC.start
+ # gc_generation = GC.count
+ # shape_generation = RubyVM.stat(:next_shape_id)
+ #. call_method_to_instrument
+ # ObjectSpace.dump_all(since: gc_generation, shapes: shape_generation)
+ #
# This method is only expected to work with C Ruby.
# This is an experimental method and is subject to change.
# In particular, the function signature and output format are
# not guaranteed to be compatible in future versions of ruby.
- def dump_all(output: :file, full: false, since: nil)
+ def dump_all(output: :file, full: false, since: nil, shapes: true)
out = case output
when :file, nil
require 'tempfile'
@@ -84,7 +100,46 @@ module ObjectSpace
raise ArgumentError, "wrong output option: #{output.inspect}"
end
- ret = _dump_all(out, full, since)
+ shapes = 0 if shapes == true
+ ret = _dump_all(out, full, since, shapes)
+ return nil if output == :stdout
+ ret
+ end
+
+ # call-seq:
+ # ObjectSpace.dump_shapes([output: :file]) # => #<File:/tmp/rubyshapes20131125-88469-laoj3v.json>
+ # ObjectSpace.dump_shapes(output: :stdout) # => nil
+ # ObjectSpace.dump_shapes(output: :string) # => "{...}\n{...}\n..."
+ # ObjectSpace.dump_shapes(output:
+ # File.open('shapes.json','w')) # => #<File:shapes.json>
+ # ObjectSpace.dump_all(output: :string,
+ # since: 42) # => "{...}\n{...}\n..."
+ #
+ # Dump the contents of the ruby shape tree as JSON.
+ #
+ # If _shapes_ is a positive integer, only shapes newer than the provided
+ # shape id are dumped. The current shape_id can be accessed using +RubyVM.stat(:next_shape_id)+.
+ #
+ # This method is only expected to work with C Ruby.
+ # This is an experimental method and is subject to change.
+ # In particular, the function signature and output format are
+ # not guaranteed to be compatible in future versions of ruby.
+ def dump_shapes(output: :file, since: 0)
+ out = case output
+ when :file, nil
+ require 'tempfile'
+ Tempfile.create(%w(rubyshapes .json))
+ when :stdout
+ STDOUT
+ when :string
+ +''
+ when IO
+ output
+ else
+ raise ArgumentError, "wrong output option: #{output.inspect}"
+ end
+
+ ret = _dump_shapes(out, since)
return nil if output == :stdout
ret
end
diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c
index 41da368267..bcc258eb25 100644
--- a/ext/objspace/objspace_dump.c
+++ b/ext/objspace/objspace_dump.c
@@ -18,6 +18,8 @@
#include "internal/hash.h"
#include "internal/string.h"
#include "internal/sanitizers.h"
+#include "symbol.h"
+#include "shape.h"
#include "node.h"
#include "objspace.h"
#include "ruby/debug.h"
@@ -42,6 +44,7 @@ struct dump_config {
unsigned int full_heap: 1;
unsigned int partial_dump;
size_t since;
+ size_t shapes_since;
unsigned long buffer_len;
char buffer[BUFFER_CAPACITY];
};
@@ -350,6 +353,20 @@ dump_append_string_content(struct dump_config *dc, VALUE obj)
}
}
+static inline void
+dump_append_id(struct dump_config *dc, ID id)
+{
+ if (is_instance_id(id)) {
+ dump_append_string_value(dc, rb_sym2str(ID2SYM(id)));
+ }
+ else {
+ dump_append(dc, "\"ID_INTERNAL(");
+ dump_append_sizet(dc, rb_id_to_serial(id));
+ dump_append(dc, ")\"");
+ }
+}
+
+
static void
dump_object(VALUE obj, struct dump_config *dc)
{
@@ -378,13 +395,16 @@ dump_object(VALUE obj, struct dump_config *dc)
dump_append(dc, "{\"address\":");
dump_append_ref(dc, obj);
- dump_append(dc, ", \"shape_id\":");
- dump_append_sizet(dc, rb_shape_get_shape_id(obj));
-
dump_append(dc, ", \"type\":\"");
dump_append(dc, obj_type(obj));
dump_append(dc, "\"");
+ size_t shape_id = rb_shape_get_shape_id(obj);
+ if (shape_id) {
+ dump_append(dc, ", \"shape_id\":");
+ dump_append_sizet(dc, shape_id);
+ }
+
dump_append(dc, ", \"slot_size\":");
dump_append_sizet(dc, dc->cur_page_slot_size);
@@ -622,7 +642,7 @@ root_obj_i(const char *category, VALUE obj, void *data)
}
static void
-dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since)
+dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since, VALUE shapes)
{
dc->full_heap = 0;
@@ -648,6 +668,8 @@ dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since)
else {
dc->partial_dump = 0;
}
+
+ dc->shapes_since = RTEST(shapes) ? NUM2SIZET(shapes) : 0;
}
static VALUE
@@ -672,18 +694,81 @@ objspace_dump(VALUE os, VALUE obj, VALUE output)
dc.cur_page_slot_size = rb_gc_obj_slot_size(obj);
}
- dump_output(&dc, output, Qnil, Qnil);
+ dump_output(&dc, output, Qnil, Qnil, Qnil);
dump_object(obj, &dc);
return dump_result(&dc);
}
+static void
+shape_i(rb_shape_t *shape, void *data)
+{
+ struct dump_config *dc = (struct dump_config *)data;
+
+ size_t shape_id = rb_shape_id(shape);
+ if (shape_id < dc->shapes_since) {
+ return;
+ }
+
+ dump_append(dc, "{\"address\":");
+ dump_append_ref(dc, (VALUE)shape);
+
+ dump_append(dc, ", \"type\":\"SHAPE\", \"id\":");
+ dump_append_sizet(dc, shape_id);
+
+ if (shape->type != SHAPE_ROOT) {
+ dump_append(dc, ", \"parent_id\":");
+ dump_append_lu(dc, shape->parent_id);
+ }
+
+ dump_append(dc, ", \"depth\":");
+ dump_append_sizet(dc, rb_shape_depth(shape));
+
+ dump_append(dc, ", \"shape_type\":");
+ switch(shape->type) {
+ case SHAPE_ROOT:
+ dump_append(dc, "\"ROOT\"");
+ break;
+ case SHAPE_IVAR:
+ dump_append(dc, "\"IVAR\"");
+
+ dump_append(dc, ",\"edge_name\":");
+ dump_append_id(dc, shape->edge_name);
+
+ break;
+ case SHAPE_FROZEN:
+ dump_append(dc, "\"FROZEN\"");
+ break;
+ case SHAPE_CAPACITY_CHANGE:
+ dump_append(dc, "\"CAPACITY_CHANGE\"");
+ dump_append(dc, ", \"capacity\":");
+ dump_append_sizet(dc, shape->capacity);
+ break;
+ case SHAPE_INITIAL_CAPACITY:
+ dump_append(dc, "\"INITIAL_CAPACITY\"");
+ break;
+ case SHAPE_T_OBJECT:
+ dump_append(dc, "\"T_OBJECT\"");
+ break;
+ default:
+ rb_bug("[objspace] unexpected shape type");
+ }
+
+ dump_append(dc, ", \"edges\":");
+ dump_append_sizet(dc, rb_shape_edges_count(shape));
+
+ dump_append(dc, ", \"memsize\":");
+ dump_append_sizet(dc, rb_shape_memsize(shape));
+
+ dump_append(dc, "}\n");
+}
+
static VALUE
-objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since)
+objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since, VALUE shapes)
{
struct dump_config dc = {0,};
- dump_output(&dc, output, full, since);
+ dump_output(&dc, output, full, since, shapes);
if (!dc.partial_dump || dc.since == 0) {
/* dump roots */
@@ -691,12 +776,28 @@ objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since)
if (dc.roots) dump_append(&dc, "]}\n");
}
+ if (RTEST(shapes)) {
+ rb_shape_each_shape(shape_i, &dc);
+ }
+
/* dump all objects */
rb_objspace_each_objects(heap_i, &dc);
return dump_result(&dc);
}
+static VALUE
+objspace_dump_shapes(VALUE os, VALUE output, VALUE shapes)
+{
+ struct dump_config dc = {0,};
+ dump_output(&dc, output, Qfalse, Qnil, shapes);
+
+ if (RTEST(shapes)) {
+ rb_shape_each_shape(shape_i, &dc);
+ }
+ return dump_result(&dc);
+}
+
void
Init_objspace_dump(VALUE rb_mObjSpace)
{
@@ -706,7 +807,8 @@ Init_objspace_dump(VALUE rb_mObjSpace)
#endif
rb_define_module_function(rb_mObjSpace, "_dump", objspace_dump, 2);
- rb_define_module_function(rb_mObjSpace, "_dump_all", objspace_dump_all, 3);
+ rb_define_module_function(rb_mObjSpace, "_dump_all", objspace_dump_all, 4);
+ rb_define_module_function(rb_mObjSpace, "_dump_shapes", objspace_dump_shapes, 2);
/* force create static IDs */
rb_obj_gc_flags(rb_mObjSpace, 0, 0);
diff --git a/shape.c b/shape.c
index 973a8a6328..41d3cfcf84 100644
--- a/shape.c
+++ b/shape.c
@@ -2,6 +2,8 @@
#include "vm_sync.h"
#include "shape.h"
#include "gc.h"
+#include "symbol.h"
+#include "id_table.h"
#include "internal/class.h"
#include "internal/symbol.h"
#include "internal/variable.h"
@@ -37,6 +39,17 @@ rb_shape_root_shape_p(rb_shape_t* shape)
return shape == rb_shape_get_root_shape();
}
+void
+rb_shape_each_shape(each_shape_callback callback, void *data)
+{
+ rb_shape_t *cursor = rb_shape_get_root_shape();
+ rb_shape_t *end = rb_shape_get_shape_by_id(GET_VM()->next_shape_id);
+ while (cursor < end) {
+ callback(cursor, data);
+ cursor += 1;
+ }
+}
+
rb_shape_t*
rb_shape_get_shape_by_id(shape_id_t shape_id)
{
@@ -97,10 +110,10 @@ rb_shape_get_shape_id(VALUE obj)
#endif
}
-unsigned int
+size_t
rb_shape_depth(rb_shape_t * shape)
{
- unsigned int depth = 1;
+ size_t depth = 1;
while (shape->parent_id != INVALID_SHAPE_ID) {
depth++;
@@ -285,6 +298,7 @@ rb_shape_transition_shape_frozen(VALUE obj)
rb_shape_t *
rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id)
{
+ RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id))));
return get_next_shape_internal(shape, id, SHAPE_IVAR);
}
@@ -428,16 +442,45 @@ rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape)
return midway_shape;
}
+size_t
+rb_shape_edges_count(rb_shape_t *shape)
+{
+ if (shape->edges) {
+ return rb_id_table_size(shape->edges);
+ }
+ return 0;
+}
+
+size_t
+rb_shape_memsize(rb_shape_t *shape)
+{
+ size_t memsize = sizeof(rb_shape_t);
+ if (shape->edges) {
+ memsize += rb_id_table_memsize(shape->edges);
+ }
+ return memsize;
+}
+
#if SHAPE_DEBUG
VALUE rb_cShape;
+static size_t
+shape_memsize(const void *shape_ptr)
+{
+ return rb_shape_memsize((rb_shape_t *)shape_ptr);
+}
+
/*
* Exposing Shape to Ruby via RubyVM.debug_shape
*/
static const rb_data_type_t shape_data_type = {
- "Shape",
- {NULL, NULL, NULL,},
- 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
+ .wrap_struct_name = "Shape",
+ .function = {
+ .dmark = NULL,
+ .dfree = NULL,
+ .dsize = shape_memsize,
+ },
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
};
static VALUE
@@ -480,12 +523,10 @@ rb_shape_parent_id(VALUE self)
static VALUE
parse_key(ID key)
{
- if ((key & RUBY_ID_INTERNAL) == RUBY_ID_INTERNAL) {
- return LONG2NUM(key);
- }
- else {
+ if (is_instance_id(key)) {
return ID2SYM(key);
}
+ return LONG2NUM(key);
}
static VALUE
@@ -527,17 +568,13 @@ rb_shape_edge_name(VALUE self)
rb_shape_t* shape;
TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
- if ((shape->edge_name & (ID_INTERNAL)) == ID_INTERNAL) {
- return INT2NUM(shape->capacity);
- }
- else {
- if (shape->edge_name) {
+ if (shape->edge_name) {
+ if (is_instance_id(shape->edge_name)) {
return ID2SYM(shape->edge_name);
}
- else {
- return Qnil;
- }
+ return INT2NUM(shape->capacity);
}
+ return Qnil;
}
static VALUE
@@ -563,13 +600,7 @@ rb_shape_export_depth(VALUE self)
{
rb_shape_t* shape;
TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
-
- unsigned int depth = 0;
- while (shape->parent_id != INVALID_SHAPE_ID) {
- depth++;
- shape = rb_shape_get_parent(shape);
- }
- return INT2NUM(depth);
+ return SIZET2NUM(rb_shape_depth(shape));
}
static VALUE
diff --git a/shape.h b/shape.h
index 96feae99fd..ddb870f5e7 100644
--- a/shape.h
+++ b/shape.h
@@ -124,7 +124,6 @@ bool rb_shape_root_shape_p(rb_shape_t* shape);
rb_shape_t * rb_shape_get_root_shape(void);
uint8_t rb_shape_id_num_bits(void);
int32_t rb_shape_id_offset(void);
-unsigned int rb_shape_depth(rb_shape_t * shape);
rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id);
rb_shape_t * rb_shape_get_parent(rb_shape_t * shape);
@@ -184,4 +183,12 @@ bool rb_shape_set_shape_id(VALUE obj, shape_id_t shape_id);
VALUE rb_obj_debug_shape(VALUE self, VALUE obj);
VALUE rb_shape_flags_mask(void);
+RUBY_SYMBOL_EXPORT_BEGIN
+typedef void each_shape_callback(rb_shape_t * shape, void *data);
+void rb_shape_each_shape(each_shape_callback callback, void *data);
+size_t rb_shape_memsize(rb_shape_t *shape);
+size_t rb_shape_edges_count(rb_shape_t *shape);
+size_t rb_shape_depth(rb_shape_t *shape);
+RUBY_SYMBOL_EXPORT_END
+
#endif
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index 1eded6a439..7eda077260 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -414,7 +414,7 @@ class TestObjSpace < Test::Unit::TestCase
@obj1 = Object.new
GC.start
@obj2 = Object.new
- ObjectSpace.dump_all(output: :stdout, since: gc_gen)
+ ObjectSpace.dump_all(output: :stdout, since: gc_gen, shapes: false)
end
p dump_my_heap_please
@@ -422,7 +422,7 @@ class TestObjSpace < Test::Unit::TestCase
assert_equal 'nil', output.pop
since = output.shift.to_i
assert_operator output.size, :>, 0
- generations = output.map { |l| JSON.parse(l)["generation"] }.uniq.sort
+ generations = output.map { |l| JSON.parse(l) }.map { |o| o["generation"] }.uniq.sort
assert_equal [since, since + 1], generations
end
end
@@ -479,6 +479,7 @@ class TestObjSpace < Test::Unit::TestCase
output.each { |l|
obj = JSON.parse(l)
next if obj["type"] == "ROOT"
+ next if obj["type"] == "SHAPE"
assert_not_nil obj["slot_size"]
assert_equal 0, obj["slot_size"] % GC::INTERNAL_CONSTANTS[:RVALUE_SIZE]
@@ -794,6 +795,16 @@ class TestObjSpace < Test::Unit::TestCase
assert_equal name, JSON.parse(dump)["method"], dump
end
+ def test_dump_shapes
+ json = ObjectSpace.dump_shapes(output: :string)
+ json.each_line do |line|
+ assert_include(line, '"type":"SHAPE"')
+ end
+
+ assert_empty ObjectSpace.dump_shapes(output: :string, since: RubyVM.stat(:next_shape_id))
+ assert_equal 2, ObjectSpace.dump_shapes(output: :string, since: RubyVM.stat(:next_shape_id) - 2).lines.size
+ end
+
private
def utf8_❨╯°□°❩╯︵┻━┻