diff options
author | Peter Zhu <peter@peterzhu.ca> | 2023-02-24 09:20:14 -0500 |
---|---|---|
committer | Peter Zhu <peter@peterzhu.ca> | 2023-02-24 14:10:09 -0500 |
commit | 3e098224077e8c43a1d8c2070b26ffdfda422780 (patch) | |
tree | ac701b8c89d90f3e6cd632ce22d0713d149ba945 | |
parent | d2631c427ee723f6136ac1e08dd3c9c5b04c6725 (diff) | |
download | ruby-3e098224077e8c43a1d8c2070b26ffdfda422780.tar.gz |
Fix incorrect line numbers in GC hook
If the previous instruction is not a leaf instruction, then the PC was
incremented before the instruction was ran (meaning the currently
executing instruction is actually the previous instruction), so we
should not increment the PC otherwise we will calculate the source
line for the next instruction.
This bug can be reproduced in the following script:
```
require "objspace"
ObjectSpace.trace_object_allocations_start
a =
1.0 / 0.0
p [ObjectSpace.allocation_sourceline(a), ObjectSpace.allocation_sourcefile(a)]
```
Which outputs: [4, "test.rb"]
This is incorrect because the object was allocated on line 10 and not
line 4. The behaviour is correct when we use a leaf instruction (e.g.
if we replaced `1.0 / 0.0` with `"hello"`), then the output is:
[10, "test.rb"].
[Bug #19456]
-rw-r--r-- | common.mk | 1 | ||||
-rw-r--r-- | compile.c | 12 | ||||
-rw-r--r-- | gc.c | 21 | ||||
-rw-r--r-- | internal/compile.h | 2 | ||||
-rw-r--r-- | test/objspace/test_objspace.rb | 8 | ||||
-rw-r--r-- | tool/ruby_vm/views/_leaf_helpers.erb | 25 | ||||
-rw-r--r-- | yjit.c | 7 | ||||
-rw-r--r-- | yjit/src/cruby_bindings.inc.rs | 2 |
8 files changed, 68 insertions, 10 deletions
@@ -6791,6 +6791,7 @@ gc.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h gc.$(OBJEXT): $(top_srcdir)/internal/bignum.h gc.$(OBJEXT): $(top_srcdir)/internal/bits.h gc.$(OBJEXT): $(top_srcdir)/internal/class.h +gc.$(OBJEXT): $(top_srcdir)/internal/compile.h gc.$(OBJEXT): $(top_srcdir)/internal/compilers.h gc.$(OBJEXT): $(top_srcdir)/internal/complex.h gc.$(OBJEXT): $(top_srcdir)/internal/cont.h @@ -10264,6 +10264,18 @@ dump_disasm_list_with_cursor(const LINK_ELEMENT *link, const LINK_ELEMENT *curr, fflush(stdout); } +bool +rb_insns_leaf_p(int i) +{ + return insn_leaf_p(i); +} + +int +rb_insn_len(VALUE insn) +{ + return insn_len(insn); +} + const char * rb_insns_name(int i) { @@ -100,6 +100,7 @@ #include "id_table.h" #include "internal.h" #include "internal/class.h" +#include "internal/compile.h" #include "internal/complex.h" #include "internal/cont.h" #include "internal/error.h" @@ -2485,8 +2486,24 @@ gc_event_hook_body(rb_execution_context_t *ec, rb_objspace_t *objspace, const rb { const VALUE *pc = ec->cfp->pc; if (pc && VM_FRAME_RUBYFRAME_P(ec->cfp)) { - /* increment PC because source line is calculated with PC-1 */ - ec->cfp->pc++; + int prev_opcode = rb_vm_insn_addr2opcode((void *)*ec->cfp->iseq->body->iseq_encoded); + for (const VALUE *insn = ec->cfp->iseq->body->iseq_encoded; insn < pc; insn += rb_insn_len(prev_opcode)) { + prev_opcode = rb_vm_insn_addr2opcode((void *)*insn); + } + + /* If the previous instruction is a leaf instruction, then the PC is + * the currently executing instruction. We should increment the PC + * because the source line is calculated with PC-1 in calc_pos. + * + * If the previous instruction is not a leaf instruction, then the PC + * was incremented before the instruction was ran (meaning the + * currently executing instruction is actually the previous + * instruction), so we should not increment the PC otherwise we will + * calculate the source line for the next instruction. + */ + if (rb_insns_leaf_p(prev_opcode)) { + ec->cfp->pc++; + } } EXEC_EVENT_HOOK(ec, event, ec->cfp->self, 0, 0, 0, data); ec->cfp->pc = pc; diff --git a/internal/compile.h b/internal/compile.h index d32c2233c9..8670785b7b 100644 --- a/internal/compile.h +++ b/internal/compile.h @@ -17,6 +17,8 @@ struct rb_iseq_struct; /* in vm_core.h */ /* compile.c */ int rb_dvar_defined(ID, const struct rb_iseq_struct *); int rb_local_defined(ID, const struct rb_iseq_struct *); +bool rb_insns_leaf_p(int i); +int rb_insn_len(VALUE insn); const char *rb_insns_name(int i); VALUE rb_insns_name_array(void); int rb_iseq_cdhash_cmp(VALUE val, VALUE lit); diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index a053717f9a..eabfff7876 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -216,6 +216,14 @@ class TestObjSpace < Test::Unit::TestCase assert_equal(c3, ObjectSpace.allocation_generation(o3)) assert_equal(self.class.name, ObjectSpace.allocation_class_path(o3)) assert_equal(__method__, ObjectSpace.allocation_method_id(o3)) + + # [Bug #19456] + o4 = + # This line intentionally left blank + # This line intentionally left blank + 1.0 / 0.0; line4 = __LINE__; c4 = GC.count + assert_equal(__FILE__, ObjectSpace.allocation_sourcefile(o4)) + assert_equal(line4, ObjectSpace.allocation_sourceline(o4)) } end diff --git a/tool/ruby_vm/views/_leaf_helpers.erb b/tool/ruby_vm/views/_leaf_helpers.erb index 1735db2196..ed2b5dec49 100644 --- a/tool/ruby_vm/views/_leaf_helpers.erb +++ b/tool/ruby_vm/views/_leaf_helpers.erb @@ -10,6 +10,31 @@ #include "iseq.h" +extern const bool rb_vm_insn_leaf_p[]; + +#ifdef RUBY_VM_INSNS_INFO +const bool rb_vm_insn_leaf_p[] = { +% RubyVM::Instructions.each_slice(20) do |insns| + <%= insns.map do |insn| + if insn.is_a?(RubyVM::BareInstructions) + insn.always_leaf? ? '1' : '0' + else + '0' + end + end.join(', ') + %>, +% end +}; +#endif + +CONSTFUNC(MAYBE_UNUSED(static bool insn_leaf_p(VALUE insn))); + +bool +insn_leaf_p(VALUE insn) +{ + return rb_vm_insn_leaf_p[insn]; +} + // This is used to tell MJIT that this insn would be leaf if CHECK_INTS didn't exist. // It should be used only when RUBY_VM_CHECK_INTS is directly written in insns.def. static bool leafness_of_check_ints = false; @@ -480,13 +480,6 @@ rb_insn_name(VALUE insn) return insn_name(insn); } -// Query the instruction length in bytes for YARV opcode insn -int -rb_insn_len(VALUE insn) -{ - return insn_len(insn); -} - unsigned int rb_vm_ci_argc(const struct rb_callinfo *ci) { diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 59d065dabd..5940398f4a 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1181,6 +1181,7 @@ extern "C" { key: st_data_t, pval: *mut st_data_t, ) -> ::std::os::raw::c_int; + pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int; pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int; pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int; pub fn rb_iseq_line_no(iseq: *const rb_iseq_t, pos: usize) -> ::std::os::raw::c_uint; @@ -1221,7 +1222,6 @@ extern "C" { pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char; - pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int; pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID; pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint; |