summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zhu <peter@peterzhu.ca>2023-02-24 09:20:14 -0500
committerPeter Zhu <peter@peterzhu.ca>2023-02-24 14:10:09 -0500
commit3e098224077e8c43a1d8c2070b26ffdfda422780 (patch)
treeac701b8c89d90f3e6cd632ce22d0713d149ba945
parentd2631c427ee723f6136ac1e08dd3c9c5b04c6725 (diff)
downloadruby-3e098224077e8c43a1d8c2070b26ffdfda422780.tar.gz
Fix incorrect line numbers in GC hook
If the previous instruction is not a leaf instruction, then the PC was incremented before the instruction was ran (meaning the currently executing instruction is actually the previous instruction), so we should not increment the PC otherwise we will calculate the source line for the next instruction. This bug can be reproduced in the following script: ``` require "objspace" ObjectSpace.trace_object_allocations_start a = 1.0 / 0.0 p [ObjectSpace.allocation_sourceline(a), ObjectSpace.allocation_sourcefile(a)] ``` Which outputs: [4, "test.rb"] This is incorrect because the object was allocated on line 10 and not line 4. The behaviour is correct when we use a leaf instruction (e.g. if we replaced `1.0 / 0.0` with `"hello"`), then the output is: [10, "test.rb"]. [Bug #19456]
-rw-r--r--common.mk1
-rw-r--r--compile.c12
-rw-r--r--gc.c21
-rw-r--r--internal/compile.h2
-rw-r--r--test/objspace/test_objspace.rb8
-rw-r--r--tool/ruby_vm/views/_leaf_helpers.erb25
-rw-r--r--yjit.c7
-rw-r--r--yjit/src/cruby_bindings.inc.rs2
8 files changed, 68 insertions, 10 deletions
diff --git a/common.mk b/common.mk
index e141239ef0..6fac4bdca9 100644
--- a/common.mk
+++ b/common.mk
@@ -6791,6 +6791,7 @@ gc.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h
gc.$(OBJEXT): $(top_srcdir)/internal/bignum.h
gc.$(OBJEXT): $(top_srcdir)/internal/bits.h
gc.$(OBJEXT): $(top_srcdir)/internal/class.h
+gc.$(OBJEXT): $(top_srcdir)/internal/compile.h
gc.$(OBJEXT): $(top_srcdir)/internal/compilers.h
gc.$(OBJEXT): $(top_srcdir)/internal/complex.h
gc.$(OBJEXT): $(top_srcdir)/internal/cont.h
diff --git a/compile.c b/compile.c
index 3849a66a07..cdea789afd 100644
--- a/compile.c
+++ b/compile.c
@@ -10264,6 +10264,18 @@ dump_disasm_list_with_cursor(const LINK_ELEMENT *link, const LINK_ELEMENT *curr,
fflush(stdout);
}
+bool
+rb_insns_leaf_p(int i)
+{
+ return insn_leaf_p(i);
+}
+
+int
+rb_insn_len(VALUE insn)
+{
+ return insn_len(insn);
+}
+
const char *
rb_insns_name(int i)
{
diff --git a/gc.c b/gc.c
index 75770d0c2f..dcd248f8aa 100644
--- a/gc.c
+++ b/gc.c
@@ -100,6 +100,7 @@
#include "id_table.h"
#include "internal.h"
#include "internal/class.h"
+#include "internal/compile.h"
#include "internal/complex.h"
#include "internal/cont.h"
#include "internal/error.h"
@@ -2485,8 +2486,24 @@ gc_event_hook_body(rb_execution_context_t *ec, rb_objspace_t *objspace, const rb
{
const VALUE *pc = ec->cfp->pc;
if (pc && VM_FRAME_RUBYFRAME_P(ec->cfp)) {
- /* increment PC because source line is calculated with PC-1 */
- ec->cfp->pc++;
+ int prev_opcode = rb_vm_insn_addr2opcode((void *)*ec->cfp->iseq->body->iseq_encoded);
+ for (const VALUE *insn = ec->cfp->iseq->body->iseq_encoded; insn < pc; insn += rb_insn_len(prev_opcode)) {
+ prev_opcode = rb_vm_insn_addr2opcode((void *)*insn);
+ }
+
+ /* If the previous instruction is a leaf instruction, then the PC is
+ * the currently executing instruction. We should increment the PC
+ * because the source line is calculated with PC-1 in calc_pos.
+ *
+ * If the previous instruction is not a leaf instruction, then the PC
+ * was incremented before the instruction was ran (meaning the
+ * currently executing instruction is actually the previous
+ * instruction), so we should not increment the PC otherwise we will
+ * calculate the source line for the next instruction.
+ */
+ if (rb_insns_leaf_p(prev_opcode)) {
+ ec->cfp->pc++;
+ }
}
EXEC_EVENT_HOOK(ec, event, ec->cfp->self, 0, 0, 0, data);
ec->cfp->pc = pc;
diff --git a/internal/compile.h b/internal/compile.h
index d32c2233c9..8670785b7b 100644
--- a/internal/compile.h
+++ b/internal/compile.h
@@ -17,6 +17,8 @@ struct rb_iseq_struct; /* in vm_core.h */
/* compile.c */
int rb_dvar_defined(ID, const struct rb_iseq_struct *);
int rb_local_defined(ID, const struct rb_iseq_struct *);
+bool rb_insns_leaf_p(int i);
+int rb_insn_len(VALUE insn);
const char *rb_insns_name(int i);
VALUE rb_insns_name_array(void);
int rb_iseq_cdhash_cmp(VALUE val, VALUE lit);
diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb
index a053717f9a..eabfff7876 100644
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@@ -216,6 +216,14 @@ class TestObjSpace < Test::Unit::TestCase
assert_equal(c3, ObjectSpace.allocation_generation(o3))
assert_equal(self.class.name, ObjectSpace.allocation_class_path(o3))
assert_equal(__method__, ObjectSpace.allocation_method_id(o3))
+
+ # [Bug #19456]
+ o4 =
+ # This line intentionally left blank
+ # This line intentionally left blank
+ 1.0 / 0.0; line4 = __LINE__; c4 = GC.count
+ assert_equal(__FILE__, ObjectSpace.allocation_sourcefile(o4))
+ assert_equal(line4, ObjectSpace.allocation_sourceline(o4))
}
end
diff --git a/tool/ruby_vm/views/_leaf_helpers.erb b/tool/ruby_vm/views/_leaf_helpers.erb
index 1735db2196..ed2b5dec49 100644
--- a/tool/ruby_vm/views/_leaf_helpers.erb
+++ b/tool/ruby_vm/views/_leaf_helpers.erb
@@ -10,6 +10,31 @@
#include "iseq.h"
+extern const bool rb_vm_insn_leaf_p[];
+
+#ifdef RUBY_VM_INSNS_INFO
+const bool rb_vm_insn_leaf_p[] = {
+% RubyVM::Instructions.each_slice(20) do |insns|
+ <%= insns.map do |insn|
+ if insn.is_a?(RubyVM::BareInstructions)
+ insn.always_leaf? ? '1' : '0'
+ else
+ '0'
+ end
+ end.join(', ')
+ %>,
+% end
+};
+#endif
+
+CONSTFUNC(MAYBE_UNUSED(static bool insn_leaf_p(VALUE insn)));
+
+bool
+insn_leaf_p(VALUE insn)
+{
+ return rb_vm_insn_leaf_p[insn];
+}
+
// This is used to tell MJIT that this insn would be leaf if CHECK_INTS didn't exist.
// It should be used only when RUBY_VM_CHECK_INTS is directly written in insns.def.
static bool leafness_of_check_ints = false;
diff --git a/yjit.c b/yjit.c
index e00eda4581..683e8ae366 100644
--- a/yjit.c
+++ b/yjit.c
@@ -480,13 +480,6 @@ rb_insn_name(VALUE insn)
return insn_name(insn);
}
-// Query the instruction length in bytes for YARV opcode insn
-int
-rb_insn_len(VALUE insn)
-{
- return insn_len(insn);
-}
-
unsigned int
rb_vm_ci_argc(const struct rb_callinfo *ci)
{
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index 59d065dabd..5940398f4a 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -1181,6 +1181,7 @@ extern "C" {
key: st_data_t,
pval: *mut st_data_t,
) -> ::std::os::raw::c_int;
+ pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int;
pub fn rb_iseq_line_no(iseq: *const rb_iseq_t, pos: usize) -> ::std::os::raw::c_uint;
@@ -1221,7 +1222,6 @@ extern "C" {
pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
pub fn rb_yjit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t;
pub fn rb_insn_name(insn: VALUE) -> *const ::std::os::raw::c_char;
- pub fn rb_insn_len(insn: VALUE) -> ::std::os::raw::c_int;
pub fn rb_vm_ci_argc(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;
pub fn rb_vm_ci_mid(ci: *const rb_callinfo) -> ID;
pub fn rb_vm_ci_flag(ci: *const rb_callinfo) -> ::std::os::raw::c_uint;