/* ** Snapshot handling. ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h */ #define lj_snap_c #define LUA_CORE #include "lj_obj.h" #if LJ_HASJIT #include "lj_gc.h" #include "lj_state.h" #include "lj_frame.h" #include "lj_bc.h" #include "lj_ir.h" #include "lj_jit.h" #include "lj_iropt.h" #include "lj_trace.h" #include "lj_snap.h" #include "lj_target.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) /* -- Snapshot buffer allocation ------------------------------------------ */ /* Grow snapshot buffer. */ void lj_snap_grow_buf_(jit_State *J, MSize need) { MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; if (need > maxsnap) lj_trace_err(J, LJ_TRERR_SNAPOV); lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); J->cur.snap = J->snapbuf; } /* Grow snapshot map buffer. */ void lj_snap_grow_map_(jit_State *J, MSize need) { if (need < 2*J->sizesnapmap) need = 2*J->sizesnapmap; else if (need < 64) need = 64; J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); J->cur.snapmap = J->snapmapbuf; J->sizesnapmap = need; } /* -- Snapshot generation ------------------------------------------------- */ /* Add all modified slots to the snapshot. */ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) { IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */ BCReg s; MSize n = 0; for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = IR(ref); if (!(sn & (SNAP_CONT|SNAP_FRAME)) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { /* No need to snapshot unmodified non-inherited slots. */ if (!(ir->op2 & IRSLOAD_INHERIT)) continue; /* No need to restore readonly slots and unmodified non-parent slots. */ if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) sn |= SNAP_NORESTORE; } if (LJ_SOFTFP && irt_isnum(ir->t)) sn |= SNAP_SOFTFPNUM; map[n++] = sn; } } return n; } /* Add frame links at the end of the snapshot. */ static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) { cTValue *frame = J->L->base - 1; cTValue *lim = J->L->base - J->baseslot; cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; MSize f = 0; map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { map[f++] = SNAP_MKPC(frame_pc(frame)); frame = frame_prevl(frame); if (frame + funcproto(frame_func(frame))->framesize > ftop) ftop = frame + funcproto(frame_func(frame))->framesize; } else if (frame_iscont(frame)) { map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame)); frame = frame_prevd(frame); } else { lua_assert(!frame_isc(frame)); map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); frame = frame_prevd(frame); } } lua_assert(f == (MSize)(1 + J->framedepth)); return (BCReg)(ftop - lim); } /* Take a snapshot of the current stack. */ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) { BCReg nslots = J->baseslot + J->maxslot; MSize nent; SnapEntry *p; /* Conservative estimate. */ lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); snap->mapofs = (uint16_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; snap->nent = (uint8_t)nent; snap->nslots = (uint8_t)nslots; snap->count = 0; J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + 1 + J->framedepth); } /* Add or merge a snapshot. */ void lj_snap_add(jit_State *J) { MSize nsnap = J->cur.nsnap; MSize nsnapmap = J->cur.nsnapmap; /* Merge if no ins. inbetween or if requested and no guard inbetween. */ if (J->mergesnap ? !irt_isguard(J->guardemit) : (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { nsnapmap = J->cur.snap[--nsnap].mapofs; } else { lj_snap_grow_buf(J, nsnap+1); J->cur.nsnap = (uint16_t)(nsnap+1); } J->mergesnap = 0; J->guardemit.irt = 0; snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); } /* -- Snapshot modification ----------------------------------------------- */ #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA) /* Find unused slots with reaching-definitions bytecode data-flow analysis. */ static BCReg snap_usedef(jit_State *J, uint8_t *udf, const BCIns *pc, BCReg maxslot) { BCReg s; GCobj *o; if (maxslot == 0) return 0; #ifdef LUAJIT_USE_VALGRIND /* Avoid errors for harmless reads beyond maxslot. */ memset(udf, 1, SNAP_USEDEF_SLOTS); #else memset(udf, 1, maxslot); #endif /* Treat open upvalues as used. */ o = gcref(J->L->openupval); while (o) { if (uvval(gco2uv(o)) < J->L->base) break; udf[uvval(gco2uv(o)) - J->L->base] = 0; o = gcref(o->gch.nextgc); } #define USE_SLOT(s) udf[(s)] &= ~1 #define DEF_SLOT(s) udf[(s)] *= 3 /* Scan through following bytecode and check for uses/defs. */ lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); for (;;) { BCIns ins = *pc++; BCOp op = bc_op(ins); switch (bcmode_b(op)) { case BCMvar: USE_SLOT(bc_b(ins)); break; default: break; } switch (bcmode_c(op)) { case BCMvar: USE_SLOT(bc_c(ins)); break; case BCMrbase: lua_assert(op == BC_CAT); for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); for (; s < maxslot; s++) DEF_SLOT(s); break; case BCMjump: handle_jump: { BCReg minslot = bc_a(ins); if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT; else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1; else if (op == BC_UCLO) { pc += bc_j(ins); break; } for (s = minslot; s < maxslot; s++) DEF_SLOT(s); return minslot < maxslot ? minslot : maxslot; } case BCMlit: if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { goto handle_jump; } else if (bc_isret(op)) { BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1); for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); for (; s < top; s++) USE_SLOT(s); for (; s < maxslot; s++) DEF_SLOT(s); return 0; } break; case BCMfunc: return maxslot; /* NYI: will abort, anyway. */ default: break; } switch (bcmode_a(op)) { case BCMvar: USE_SLOT(bc_a(ins)); break; case BCMdst: if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins)); break; case BCMbase: if (op >= BC_CALLM && op <= BC_VARG) { BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? maxslot : (bc_a(ins) + bc_c(ins)); s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); for (; s < top; s++) USE_SLOT(s); for (; s < maxslot; s++) DEF_SLOT(s); if (op == BC_CALLT || op == BC_CALLMT) { for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); return 0; } } else if (op == BC_KNIL) { for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s); } else if (op == BC_TSETM) { for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s); } break; default: break; } lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); } #undef USE_SLOT #undef DEF_SLOT return 0; /* unreachable */ } /* Purge dead slots before the next snapshot. */ void lj_snap_purge(jit_State *J) { uint8_t udf[SNAP_USEDEF_SLOTS]; BCReg maxslot = J->maxslot; BCReg s = snap_usedef(J, udf, J->pc, maxslot); for (; s < maxslot; s++) if (udf[s] != 0) J->base[s] = 0; /* Purge dead slots. */ } /* Shrink last snapshot. */ void lj_snap_shrink(jit_State *J) { SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; SnapEntry *map = &J->cur.snapmap[snap->mapofs]; MSize n, m, nlim, nent = snap->nent; uint8_t udf[SNAP_USEDEF_SLOTS]; BCReg maxslot = J->maxslot; BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); BCReg baseslot = J->baseslot; maxslot += baseslot; minslot += baseslot; snap->nslots = (uint8_t)maxslot; for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */ BCReg s = snap_slot(map[n]); if (s < minslot || (s < maxslot && udf[s-baseslot] == 0)) map[m++] = map[n]; /* Only copy used slots. */ } snap->nent = (uint8_t)m; nlim = J->cur.nsnapmap - snap->mapofs - 1; while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */ J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */ } /* -- Snapshot access ----------------------------------------------------- */ /* Initialize a Bloom Filter with all renamed refs. ** There are very few renames (often none), so the filter has ** very few bits set. This makes it suitable for negative filtering. */ static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim) { BloomFilter rfilt = 0; IRIns *ir; for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) if (ir->op2 <= lim) bloomset(rfilt, ir->op1); return rfilt; } /* Process matching renames to find the original RegSP. */ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) { IRIns *ir; for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) if (ir->op1 == ref && ir->op2 <= lim) rs = ir->prev; return rs; } /* Convert a snapshot into a linear slot -> RegSP map. ** Note: unused slots are not initialized! */ void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno, int hi) { SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); if (!irref_isk(ref) && ((LJ_SOFTFP && hi) ? (ref++, (sn & SNAP_SOFTFPNUM)) : 1)) { IRIns *ir = &T->ir[ref]; uint32_t rs = ir->prev; if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); rsmap[snap_slot(sn)] = (uint16_t)rs; } } } /* Restore interpreter state from exit state with the help of a snapshot. */ const BCIns *lj_snap_restore(jit_State *J, void *exptr) { ExitState *ex = (ExitState *)exptr; SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ GCtrace *T = traceref(J, J->parent); SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; int32_t ftsz0; TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); const BCIns *pc = snap_pc(map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ setcframe_pc(cframe_raw(L->cframe), pc+1); /* Make sure the stack is big enough for the slots from the snapshot. */ if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { L->top = curr_topL(L); lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize); } /* Fill stack slots with data from the registers and spill slots. */ frame = L->base-1; ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; IRRef ref = snap_ref(sn); BCReg s = snap_slot(sn); TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ IRIns *ir = &T->ir[ref]; if (irref_isk(ref)) { /* Restore constant slot. */ lj_ir_kvalue(L, o, ir); } else if (!(sn & SNAP_NORESTORE)) { IRType1 t = ir->t; RegSP rs = ir->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref))) rs = snap_renameref(T, snapno, ref, rs); if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ int32_t *sps = &ex->spill[regsp_spill(rs)]; if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { o->u32.lo = (uint32_t)*sps; } else if (irt_isinteger(t)) { setintV(o, *sps); #if !LJ_SOFTFP } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif #if LJ_64 } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; #endif } else { lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ setgcrefi(o->gcr, *sps); setitype(o, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); lua_assert(ra_hasreg(r)); if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { o->u32.lo = (uint32_t)ex->gpr[r-RID_MIN_GPR]; } else if (irt_isinteger(t)) { setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); #if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); #endif #if LJ_64 } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; #endif } else { if (!irt_ispri(t)) setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); setitype(o, irt_toitype(t)); } } if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { rs = (ir+1)->prev; if (LJ_UNLIKELY(bloomtest(rfilt, ref+1))) rs = snap_renameref(T, snapno, ref+1, rs); o->u32.hi = (ra_hasspill(regsp_spill(rs))) ? (uint32_t)*&ex->spill[regsp_spill(rs)] : (uint32_t)ex->gpr[regsp_reg(rs)-RID_MIN_GPR]; } } if ((sn & (SNAP_CONT|SNAP_FRAME))) { /* Overwrite tag with frame link. */ o->fr.tp.ftsz = s != 0 ? (int32_t)*flinks-- : ftsz0; L->base = o+1; } } switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: L->top = frame + snap->nslots; break; default: L->top = curr_topL(L); break; } lua_assert(map + nent == flinks); return pc; } #undef IR #endif