diff options
author | John Hawthorn <john@hawthorn.email> | 2022-08-10 10:35:48 -0700 |
---|---|---|
committer | John Hawthorn <john@hawthorn.email> | 2022-09-01 15:20:49 -0700 |
commit | 679ef34586e7a43151865cb7f33a3253d815f7cf (patch) | |
tree | 1f46e901c2c77438e050585e9e9708492cc985a6 /compile.c | |
parent | 7064d259bc20050d467874e5622082c29529a2d3 (diff) | |
download | ruby-679ef34586e7a43151865cb7f33a3253d815f7cf.tar.gz |
New constant caching insn: opt_getconstant_path
Previously YARV bytecode implemented constant caching by having a pair
of instructions, opt_getinlinecache and opt_setinlinecache, wrapping a
series of getconstant calls (with putobject providing supporting
arguments).
This commit replaces that pattern with a new instruction,
opt_getconstant_path, handling both getting/setting the inline cache and
fetching the constant on a cache miss.
This is implemented by storing the full constant path as a
null-terminated array of IDs inside of the IC structure. idNULL is used
to signal an absolute constant reference.
$ ./miniruby --dump=insns -e '::Foo::Bar::Baz'
== disasm: #<ISeq:<main>@-e:1 (1,0)-(1,13)> (catch: FALSE)
0000 opt_getconstant_path <ic:0 ::Foo::Bar::Baz> ( 1)[Li]
0002 leave
The motivation for this is that we had increasingly found the need to
disassemble the instructions between the opt_getinlinecache and
opt_setinlinecache in order to determine the constant we are fetching,
or otherwise store metadata.
This disassembly was done:
* In opt_setinlinecache, to register the IC against the constant names
it is using for granular invalidation.
* In rb_iseq_free, to unregister the IC from the invalidation table.
* In YJIT to find the position of a opt_getinlinecache instruction to
invalidate it when the cache is populated
* In YJIT to register the constant names being used for invalidation.
With this change we no longe need disassemly for these (in fact
rb_iseq_each is now unused), as the list of constant names being
referenced is held in the IC. This should also make it possible to make
more optimizations in the future.
This may also reduce the size of iseqs, as previously each segment
required 32 bytes (on 64-bit platforms) for each constant segment. This
implementation only stores one ID per-segment.
There should be no significant performance change between this and the
previous implementation. Previously opt_getinlinecache was a "leaf"
instruction, but it included a jump (almost always to a separate cache
line). Now opt_getconstant_path is a non-leaf (it may
raise/autoload/call const_missing) but it does not jump. These seem to
even out.
Diffstat (limited to 'compile.c')
-rw-r--r-- | compile.c | 205 |
1 files changed, 137 insertions, 68 deletions
@@ -2251,6 +2251,30 @@ add_adjust_info(struct iseq_insn_info_entry *insns_info, unsigned int *positions return TRUE; } +static ID * +array_to_idlist(VALUE arr) +{ + RUBY_ASSERT(RB_TYPE_P(arr, T_ARRAY)); + long size = RARRAY_LEN(arr); + ID *ids = (ID *)ALLOC_N(ID, size + 1); + for (int i = 0; i < size; i++) { + VALUE sym = RARRAY_AREF(arr, i); + ids[i] = SYM2ID(sym); + } + ids[size] = 0; + return ids; +} + +static VALUE +idlist_to_array(const ID *ids) +{ + VALUE arr = rb_ary_new(); + while (*ids) { + rb_ary_push(arr, ID2SYM(*ids++)); + } + return arr; +} + /** ruby insn object list -> raw instruction sequence */ @@ -2433,6 +2457,21 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) } /* [ TS_IVC | TS_ICVARC | TS_ISE | TS_IC ] */ case TS_IC: /* inline cache: constants */ + { + unsigned int ic_index = ISEQ_COMPILE_DATA(iseq)->ic_index++; + IC ic = &ISEQ_IS_ENTRY_START(body, type)[ic_index].ic_cache; + if (UNLIKELY(ic_index >= body->ic_size)) { + BADINSN_DUMP(anchor, &iobj->link, 0); + COMPILE_ERROR(iseq, iobj->insn_info.line_no, + "iseq_set_sequence: ic_index overflow: index: %d, size: %d", + ic_index, ISEQ_IS_SIZE(body)); + } + + ic->segments = array_to_idlist(operands[j]); + + generated_iseq[code_index + 1 + j] = (VALUE)ic; + } + break; case TS_ISE: /* inline storage entry: `once` insn */ case TS_ICVARC: /* inline cvar cache */ case TS_IVC: /* inline ivar cache */ @@ -2447,11 +2486,6 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor) } generated_iseq[code_index + 1 + j] = (VALUE)ic; - if (insn == BIN(opt_getinlinecache) && type == TS_IC) { - // Store the instruction index for opt_getinlinecache on the IC for - // YJIT to invalidate code when opt_setinlinecache runs. - ic->get_insn_idx = (unsigned int)code_index; - } break; } case TS_CALLDATA: @@ -5233,6 +5267,30 @@ compile_massign(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, return COMPILE_OK; } +static VALUE +collect_const_segments(rb_iseq_t *iseq, const NODE *node) +{ + VALUE arr = rb_ary_new(); + for (;;) + { + switch (nd_type(node)) { + case NODE_CONST: + rb_ary_unshift(arr, ID2SYM(node->nd_vid)); + return arr; + case NODE_COLON3: + rb_ary_unshift(arr, ID2SYM(node->nd_mid)); + rb_ary_unshift(arr, ID2SYM(idNULL)); + return arr; + case NODE_COLON2: + rb_ary_unshift(arr, ID2SYM(node->nd_mid)); + node = node->nd_head; + break; + default: + return Qfalse; + } + } +} + static int compile_const_prefix(rb_iseq_t *iseq, const NODE *const node, LINK_ANCHOR *const pref, LINK_ANCHOR *const body) @@ -8970,37 +9028,31 @@ compile_match(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, i static int compile_colon2(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int popped) { - const int line = nd_line(node); if (rb_is_const_id(node->nd_mid)) { /* constant */ - LABEL *lend = NEW_LABEL(line); - int ic_index = ISEQ_BODY(iseq)->ic_size++; - - DECL_ANCHOR(pref); - DECL_ANCHOR(body); - - INIT_ANCHOR(pref); - INIT_ANCHOR(body); - CHECK(compile_const_prefix(iseq, node, pref, body)); - if (LIST_INSN_SIZE_ZERO(pref)) { - if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) { - ADD_INSN2(ret, node, opt_getinlinecache, lend, INT2FIX(ic_index)); - } - else { + VALUE segments; + if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache && + (segments = collect_const_segments(iseq, node))) { + ISEQ_BODY(iseq)->ic_size++; + ADD_INSN1(ret, node, opt_getconstant_path, segments); + RB_OBJ_WRITTEN(iseq, Qundef, segments); + } else { + /* constant */ + DECL_ANCHOR(pref); + DECL_ANCHOR(body); + + INIT_ANCHOR(pref); + INIT_ANCHOR(body); + CHECK(compile_const_prefix(iseq, node, pref, body)); + if (LIST_INSN_SIZE_ZERO(pref)) { ADD_INSN(ret, node, putnil); + ADD_SEQ(ret, body); } - - ADD_SEQ(ret, body); - - if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) { - ADD_INSN1(ret, node, opt_setinlinecache, INT2FIX(ic_index)); - ADD_LABEL(ret, lend); + else { + ADD_SEQ(ret, pref); + ADD_SEQ(ret, body); } } - else { - ADD_SEQ(ret, pref); - ADD_SEQ(ret, body); - } } else { /* function call */ @@ -9017,25 +9069,18 @@ compile_colon2(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, static int compile_colon3(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int popped) { - const int line = nd_line(node); - LABEL *lend = NEW_LABEL(line); - int ic_index = ISEQ_BODY(iseq)->ic_size++; - debugi("colon3#nd_mid", node->nd_mid); /* add cache insn */ if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) { - ADD_INSN2(ret, node, opt_getinlinecache, lend, INT2FIX(ic_index)); - ADD_INSN(ret, node, pop); - } - - ADD_INSN1(ret, node, putobject, rb_cObject); - ADD_INSN1(ret, node, putobject, Qtrue); - ADD_INSN1(ret, node, getconstant, ID2SYM(node->nd_mid)); - - if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) { - ADD_INSN1(ret, node, opt_setinlinecache, INT2FIX(ic_index)); - ADD_LABEL(ret, lend); + ISEQ_BODY(iseq)->ic_size++; + VALUE segments = rb_ary_new_from_args(2, ID2SYM(idNULL), ID2SYM(node->nd_mid)); + ADD_INSN1(ret, node, opt_getconstant_path, segments); + RB_OBJ_WRITTEN(iseq, Qundef, segments); + } else { + ADD_INSN1(ret, node, putobject, rb_cObject); + ADD_INSN1(ret, node, putobject, Qtrue); + ADD_INSN1(ret, node, getconstant, ID2SYM(node->nd_mid)); } if (popped) { @@ -9536,18 +9581,14 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no case NODE_CONST:{ debugi("nd_vid", node->nd_vid); - if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) { - LABEL *lend = NEW_LABEL(line); - int ic_index = body->ic_size++; - - ADD_INSN2(ret, node, opt_getinlinecache, lend, INT2FIX(ic_index)); - ADD_INSN1(ret, node, putobject, Qtrue); - ADD_INSN1(ret, node, getconstant, ID2SYM(node->nd_vid)); - ADD_INSN1(ret, node, opt_setinlinecache, INT2FIX(ic_index)); - ADD_LABEL(ret, lend); - } - else { - ADD_INSN(ret, node, putnil); + if (ISEQ_COMPILE_DATA(iseq)->option->inline_const_cache) { + body->ic_size++; + VALUE segments = rb_ary_new_from_args(1, ID2SYM(node->nd_vid)); + ADD_INSN1(ret, node, opt_getconstant_path, segments); + RB_OBJ_WRITTEN(iseq, Qundef, segments); + } + else { + ADD_INSN(ret, node, putnil); ADD_INSN1(ret, node, putobject, Qtrue); ADD_INSN1(ret, node, getconstant, ID2SYM(node->nd_vid)); } @@ -10032,10 +10073,16 @@ insn_data_to_s_detail(INSN *iobj) rb_str_concat(str, opobj_inspect(OPERAND_AT(iobj, j))); break; case TS_IC: /* inline cache */ + rb_str_concat(str, opobj_inspect(OPERAND_AT(iobj, j))); + break; case TS_IVC: /* inline ivar cache */ + rb_str_catf(str, "<ivc:%d>", FIX2INT(OPERAND_AT(iobj, j))); + break; case TS_ICVARC: /* inline cvar cache */ + rb_str_catf(str, "<icvarc:%d>", FIX2INT(OPERAND_AT(iobj, j))); + break; case TS_ISE: /* inline storage entry */ - rb_str_catf(str, "<ic:%d>", FIX2INT(OPERAND_AT(iobj, j))); + rb_str_catf(str, "<ise:%d>", FIX2INT(OPERAND_AT(iobj, j))); break; case TS_CALLDATA: /* we store these as call infos at compile time */ { @@ -10431,9 +10478,20 @@ iseq_build_from_ary_body(rb_iseq_t *iseq, LINK_ANCHOR *const anchor, } break; case TS_IC: - argv[j] = op; - if (NUM2UINT(op) >= ISEQ_BODY(iseq)->ic_size) { - ISEQ_BODY(iseq)->ic_size = NUM2INT(op) + 1; + { + VALUE segments = rb_ary_new(); + op = rb_to_array_type(op); + + for (int i = 0; i < RARRAY_LEN(op); i++) { + VALUE sym = RARRAY_AREF(op, i); + sym = rb_to_symbol_type(sym); + rb_ary_push(segments, sym); + } + + RB_GC_GUARD(op); + argv[j] = segments; + RB_OBJ_WRITTEN(iseq, Qundef, segments); + ISEQ_BODY(iseq)->ic_size++; } break; case TS_IVC: /* inline ivar cache */ @@ -10627,6 +10685,7 @@ rb_iseq_mark_insn_storage(struct iseq_compile_data_storage *storage) case TS_CDHASH: case TS_ISEQ: case TS_VALUE: + case TS_IC: // constant path array case TS_CALLDATA: // ci is stored. { VALUE op = OPERAND_AT(iobj, j); @@ -11255,6 +11314,12 @@ ibf_dump_code(struct ibf_dump *dump, const rb_iseq_t *iseq) wv = (VALUE)ibf_dump_iseq(dump, (const rb_iseq_t *)op); break; case TS_IC: + { + IC ic = (IC)op; + VALUE arr = idlist_to_array(ic->segments); + wv = ibf_dump_object(dump, arr); + } + break; case TS_ISE: case TS_IVC: case TS_ICVARC: @@ -11299,6 +11364,7 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod struct rb_iseq_constant_body *load_body = ISEQ_BODY(iseq); struct rb_call_data *cd_entries = load_body->call_data; + int ic_index = 0; iseq_bits_t * mark_offset_bits; @@ -11315,7 +11381,6 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod for (code_index=0; code_index<iseq_size;) { /* opcode */ const VALUE insn = code[code_index] = ibf_load_small_value(load, &reading_pos); - const unsigned int insn_index = code_index; const char *types = insn_op_types(insn); int op_index; @@ -11370,6 +11435,16 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod break; } case TS_IC: + { + VALUE op = ibf_load_small_value(load, &reading_pos); + VALUE arr = ibf_load_object(load, op); + + IC ic = &ISEQ_IS_IC_ENTRY(load_body, ic_index++); + ic->segments = array_to_idlist(arr); + + code[code_index] = (VALUE)ic; + } + break; case TS_ISE: case TS_ICVARC: case TS_IVC: @@ -11378,12 +11453,6 @@ ibf_load_code(const struct ibf_load *load, rb_iseq_t *iseq, ibf_offset_t bytecod ISE ic = ISEQ_IS_ENTRY_START(load_body, operand_type) + op; code[code_index] = (VALUE)ic; - - if (insn == BIN(opt_getinlinecache) && operand_type == TS_IC) { - // Store the instruction index for opt_getinlinecache on the IC for - // YJIT to invalidate code when opt_setinlinecache runs. - ic->ic_cache.get_insn_idx = insn_index; - } } break; case TS_CALLDATA: |