diff options
author | Mike Pall <mike> | 2009-12-08 20:35:29 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2009-12-08 20:35:29 +0100 |
commit | 3f1f9e11f4f699ae94182d4cba158092f434a7f6 (patch) | |
tree | 88fbb674a21a1d554d4b1ee9d4ef2c5fed6a1d88 /src | |
parent | 5287b9326479ea2b7dddd6f642673e58e5a7f354 (diff) | |
download | luajit2-3f1f9e11f4f699ae94182d4cba158092f434a7f6.tar.gz |
Fast forward to sync public repo.
Compile math.sinh(), math.cosh(), math.tanh() and math.random().
Compile various io.*() functions.
Drive the GC forward on string allocations in the parser.
Improve KNUM fuse vs. load heuristics.
Add abstract C call handling to IR.
Diffstat (limited to 'src')
38 files changed, 1153 insertions, 743 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep index 1fb81e27..779ee545 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -21,8 +21,9 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \ - lj_libdef.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \ + lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h lj_traceerr.h \ + lj_lib.h lj_libdef.h lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \ lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \ @@ -45,9 +46,9 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \ - lj_target.h lj_target_x86.h + lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ + lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_asm.h \ + lj_vm.h lj_target.h lj_target_x86.h lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ @@ -67,8 +68,8 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \ lj_ir.h lj_dispatch.h lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ - lj_traceerr.h + lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ + lj_bc.h lj_traceerr.h lj_lib.h lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ diff --git a/src/buildvm.c b/src/buildvm.c index b3738db4..4aba39d4 100644 --- a/src/buildvm.c +++ b/src/buildvm.c @@ -215,12 +215,19 @@ IRFPMDEF(FPMNAME) }; const char *const irfield_names[] = { -#define FLNAME(name, type, field) #name, +#define FLNAME(name, ofs) #name, IRFLDEF(FLNAME) #undef FLNAME NULL }; +const char *const ircall_names[] = { +#define IRCALLNAME(name, nargs, kind, type, flags) #name, +IRCALLDEF(IRCALLNAME) +#undef IRCALLNAME + NULL +}; + static const char *const trace_errors[] = { #define TREDEF(name, msg) msg, #include "lj_traceerr.h" @@ -269,6 +276,11 @@ static void emit_vmdef(BuildCtx *ctx) } fprintf(ctx->fp, "}\n\n"); + fprintf(ctx->fp, "ircall = {\n[0]="); + for (i = 0; ircall_names[i]; i++) + fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); + fprintf(ctx->fp, "}\n\n"); + fprintf(ctx->fp, "traceerr = {\n[0]="); for (i = 0; trace_errors[i]; i++) fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); diff --git a/src/buildvm.h b/src/buildvm.h index e55527fd..53c820ad 100644 --- a/src/buildvm.h +++ b/src/buildvm.h @@ -102,5 +102,6 @@ extern const char *const bc_names[]; extern const char *const ir_names[]; extern const char *const irfpm_names[]; extern const char *const irfield_names[]; +extern const char *const ircall_names[]; #endif diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c index 5daab13b..31b6f61e 100644 --- a/src/buildvm_asm.c +++ b/src/buildvm_asm.c @@ -26,6 +26,14 @@ static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n) static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r) { const char *sym = ctx->extnames[r->sym]; + const char *p = strchr(sym, '@'); + char buf[80]; + if (p) { + /* Always strip fastcall suffix. Wrong for (unused) COFF on Win32. */ + strncpy(buf, sym, p-sym); + buf[p-sym] = '\0'; + sym = buf; + } switch (ctx->mode) { case BUILD_elfasm: if (r->type) diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c index 271118e0..77af3dc5 100644 --- a/src/buildvm_fold.c +++ b/src/buildvm_fold.c @@ -107,6 +107,10 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) for (i = 0; irfield_names[i]; i++) if (!strcmp(irfield_names[i], p+5)) return i; + } else if (allowlit && !strncmp(p, "IRCALL_", 7)) { + for (i = 0; ircall_names[i]; i++) + if (!strcmp(ircall_names[i], p+7)) + return i; } else if (allowany && !strcmp("any", p)) { return 0xff; } else { diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c index 1a8661bf..a24ae727 100644 --- a/src/buildvm_peobj.c +++ b/src/buildvm_peobj.c @@ -85,6 +85,7 @@ typedef struct PEsymaux { #define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */ #define PEOBJ_RELOC_DIR32 0x06 #define PEOBJ_SYM_PREFIX "_" +#define PEOBJ_SYMF_PREFIX "@" #elif LJ_TARGET_X64 #define PEOBJ_ARCH_TARGET 0x8664 #define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ @@ -260,7 +261,18 @@ void emit_peobj(BuildCtx *ctx) emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT); for (i = 0; ctx->extnames[i]; i++) { - sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]); + const char *sym = ctx->extnames[i]; + const char *p = strchr(sym, '@'); + if (p) { +#ifdef PEOBJ_SYMF_PREFIX + sprintf(name, PEOBJ_SYMF_PREFIX "%s", sym); +#else + strncpy(name, sym, p-sym); + name[p-sym] = '\0'; +#endif + } else { + sprintf(name, PEOBJ_SYM_PREFIX "%s", sym); + } emit_peobj_sym(ctx, name, 0, PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); } diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 09cfa6dc..e857a6be 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc @@ -30,6 +30,9 @@ |.define RD, RC |.define RDL, RCL | +|.define FCARG1, ecx // Fastcall arguments. +|.define FCARG2, edx +| |// Type definitions. Some of these are only used for documentation. |.type L, lua_State |.type GL, global_State @@ -1066,7 +1069,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | mov RB, LJ_TNUMX |7: | not RB - | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)] + | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] | jmp <2 | |.ffunc_2 setmetatable @@ -1126,17 +1129,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | jmp ->fff_res1 |3: // Handle numbers inline, unless a number base metatable is present. | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback - | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0 + | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 | jne ->fff_fallback | ffgccheck // Caveat: uses label 1. | mov L:RB, SAVE_L - | mov ARG1, L:RB - | mov ARG2, RA | mov L:RB->base, RA // Add frame since C call can throw. | mov [RA-4], PC | mov SAVE_PC, PC // Redundant (but a defined value). | mov ARG3, BASE // Save BASE. - | call extern lj_str_fromnum // (lua_State *L, lua_Number *np) + | mov FCARG2, RA // Caveat: FCARG2 == BASE + | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA + | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | // GCstr returned in eax (RC). | mov RA, L:RB->base | mov BASE, ARG3 @@ -1762,11 +1765,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |.ffunc_1 table_getn | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback - | mov TAB:RB, [RA] - | mov ARG1, TAB:RB - | mov RB, RA // Save RA and BASE. - | mov ARG2, BASE - | call extern lj_tab_len // (GCtab *t) + | mov ARG2, BASE // Save RA and BASE. + | mov RB, RA + | mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA + | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) | // Length of table returned in eax (RC). | mov ARG1, RC | mov RA, RB // Restore RA and BASE. @@ -2512,10 +2514,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | ins_next |2: | checktab RD, ->vmeta_len - | mov TAB:RD, [BASE+RD*8] - | mov ARG1, TAB:RD + | mov TAB:FCARG1, [BASE+RD*8] | mov RB, BASE // Save BASE. - | call extern lj_tab_len // (GCtab *t) + | call extern lj_tab_len@4 // (GCtab *t) | // Length of table returned in eax (RC). | mov ARG1, RC | mov BASE, RB // Restore BASE. @@ -2665,66 +2666,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | ins_next break; case BC_USETV: +#define TV2MARKOFS \ + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) | ins_AD // RA = upvalue #, RD = src - | // Really ugly code due to the lack of a 4th free register. | mov LFUNC:RB, [BASE-8] | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) - | jnz >4 - |1: - | mov RA, [BASE+RD*8] - |2: + | cmp byte UPVAL:RB->closed, 0 | mov RB, UPVAL:RB->v + | mov RA, [BASE+RD*8] | mov RD, [BASE+RD*8+4] | mov [RB], RA | mov [RB+4], RD - |3: + | jz >1 + | // Check barrier for closed upvalue. + | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) + | jnz >2 + |1: | ins_next | - |4: // Upvalue is black. Check if new value is collectable and white. - | mov RA, [BASE+RD*8+4] - | sub RA, LJ_TISGCV - | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v) + |2: // Upvalue is black. Check if new value is collectable and white. + | sub RD, LJ_TISGCV + | cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v) | jbe <1 - | mov GCOBJ:RA, [BASE+RD*8] | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) - | jz <2 - | // Crossed a write barrier. So move the barrier forward. - | mov ARG2, UPVAL:RB - | mov ARG3, GCOBJ:RA - | mov RB, UPVAL:RB->v - | mov RD, [BASE+RD*8+4] - | mov [RB], GCOBJ:RA - | mov [RB+4], RD - |->BC_USETV_Z: - | mov L:RB, SAVE_L - | lea GL:RA, [DISPATCH+GG_DISP2G] - | mov L:RB->base, BASE - | mov ARG1, GL:RA - | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v) - | mov BASE, L:RB->base - | jmp <3 + | jz <1 + | // Crossed a write barrier. Move the barrier forward. + | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). + | lea GL:FCARG1, [DISPATCH+GG_DISP2G] + | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) + | mov BASE, RB // Restore BASE. + | jmp <1 break; +#undef TV2MARKOFS case BC_USETS: | ins_AND // RA = upvalue #, RD = str const (~) | mov LFUNC:RB, [BASE-8] - | mov GCOBJ:RD, [KBASE+RD*4] | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] - | mov RA, UPVAL:RB->v - | mov dword [RA+4], LJ_TSTR - | mov [RA], GCOBJ:RD + | mov GCOBJ:RA, [KBASE+RD*4] + | mov RD, UPVAL:RB->v + | mov [RD], GCOBJ:RA + | mov dword [RD+4], LJ_TSTR | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | jnz >2 |1: | ins_next | - |2: // Upvalue is black. Check if string is white. - | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str) + |2: // Check if string is white and ensure upvalue is closed. + | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) | jz <1 - | // Crossed a write barrier. So move the barrier forward. - | mov ARG3, GCOBJ:RD - | mov ARG2, UPVAL:RB - | jmp ->BC_USETV_Z + | cmp byte UPVAL:RB->closed, 0 + | jz <1 + | // Crossed a write barrier. Move the barrier forward. + | mov RB, BASE // Save BASE (FCARG2 == BASE). + | mov FCARG2, RD + | lea GL:FCARG1, [DISPATCH+GG_DISP2G] + | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) + | mov BASE, RB // Restore BASE. + | jmp <1 break; case BC_USETN: | ins_AD // RA = upvalue #, RD = num const @@ -2808,23 +2806,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | mov dword [BASE+RA*8+4], LJ_TTAB | ins_next |2: - | call extern lj_gc_step_fixtop // (lua_State *L) - | mov ARG1, L:RB // Args owned by callee. Set it again. + | mov L:FCARG1, L:RB + | call extern lj_gc_step_fixtop@4 // (lua_State *L) | jmp <1 break; case BC_TDUP: | ins_AND // RA = dst, RD = table const (~) (holding template table) - | mov TAB:RD, [KBASE+RD*4] | mov L:RB, SAVE_L - | mov ARG2, TAB:RD - | mov ARG1, L:RB | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | mov SAVE_PC, PC | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | mov L:RB->base, BASE | jae >3 |2: - | call extern lj_tab_dup // (lua_State *L, Table *kt) + | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE + | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA + | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) | // Table * returned in eax (RC). | mov BASE, L:RB->base | movzx RA, PC_RA @@ -2832,8 +2829,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | mov dword [BASE+RA*8+4], LJ_TTAB | ins_next |3: - | call extern lj_gc_step_fixtop // (lua_State *L) - | mov ARG1, L:RB // Args owned by callee. Set it again. + | mov L:FCARG1, L:RB + | call extern lj_gc_step_fixtop@4 // (lua_State *L) + | movzx RD, PC_RD // Need to reload RD. + | not RD | jmp <2 break; diff --git a/src/lib_base.c b/src/lib_base.c index 6b9e8eef..821c81b4 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -183,7 +183,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) int32_t base = lj_lib_optint(L, 2, 10); if (base == 10) { TValue *o = lj_lib_checkany(L, 1); - if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) { + if (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))) { setnumV(L->base-1, numV(o)); return FFH_RES(1); } @@ -206,6 +206,9 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) return FFH_RES(1); } +LJLIB_PUSH("nil") +LJLIB_PUSH("false") +LJLIB_PUSH("true") LJLIB_ASM(tostring) LJLIB_REC(.) { TValue *o = lj_lib_checkany(L, 1); @@ -218,12 +221,8 @@ LJLIB_ASM(tostring) LJLIB_REC(.) GCstr *s; if (tvisnum(o)) { s = lj_str_fromnum(L, &o->n); - } else if (tvisnil(o)) { - s = lj_str_newlit(L, "nil"); - } else if (tvisfalse(o)) { - s = lj_str_newlit(L, "false"); - } else if (tvistrue(o)) { - s = lj_str_newlit(L, "true"); + } else if (tvispri(o)) { + s = strV(lj_lib_upvalue(L, -itype(o))); } else { if (tvisfunc(o) && isffunc(funcV(o))) lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid); diff --git a/src/lib_io.c b/src/lib_io.c index aefe4213..d69b99a4 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -17,14 +17,28 @@ #include "lualib.h" #include "lj_obj.h" -#include "lj_err.h" #include "lj_gc.h" +#include "lj_err.h" +#include "lj_str.h" #include "lj_ff.h" +#include "lj_trace.h" #include "lj_lib.h" -/* Index of standard handles in function environment. */ -#define IO_INPUT 1 -#define IO_OUTPUT 2 +/* Userdata payload for I/O file. */ +typedef struct IOFileUD { + FILE *fp; /* File handle. */ + uint32_t type; /* File type. */ +} IOFileUD; + +#define IOFILE_TYPE_FILE 0 /* Regular file. */ +#define IOFILE_TYPE_PIPE 1 /* Pipe. */ +#define IOFILE_TYPE_STDF 2 /* Standard file handle. */ +#define IOFILE_TYPE_MASK 3 + +#define IOFILE_FLAG_CLOSE 4 /* Close after io.lines() iterator. */ + +#define IOSTDF_UD(L, id) (&gcref(G(L)->gcroot[(id)])->ud) +#define IOSTDF_IOF(L, id) ((IOFileUD *)uddata(IOSTDF_UD(L, (id)))) /* -- Error handling ------------------------------------------------------ */ @@ -35,95 +49,102 @@ static int io_pushresult(lua_State *L, int ok, const char *fname) return 1; } else { int en = errno; /* Lua API calls may change this value. */ - lua_pushnil(L); + setnilV(L->top++); if (fname) lua_pushfstring(L, "%s: %s", fname, strerror(en)); else lua_pushfstring(L, "%s", strerror(en)); - lua_pushinteger(L, en); + setintV(L->top++, en); + lj_trace_abort(G(L)); return 3; } } -static void io_file_error(lua_State *L, int arg, const char *fname) +/* -- Open/close helpers -------------------------------------------------- */ + +static IOFileUD *io_tofilep(lua_State *L) { - lua_pushfstring(L, "%s: %s", fname, strerror(errno)); - luaL_argerror(L, arg, lua_tostring(L, -1)); + if (!(L->base < L->top && tvisudata(L->base) && + udataV(L->base)->udtype == UDTYPE_IO_FILE)) + lj_err_argtype(L, 1, "FILE*"); + return (IOFileUD *)uddata(udataV(L->base)); } -/* -- Open helpers -------------------------------------------------------- */ - -#define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE)) - -static FILE *io_tofile(lua_State *L) +static IOFileUD *io_tofile(lua_State *L) { - FILE **f = io_tofilep(L); - if (*f == NULL) + IOFileUD *iof = io_tofilep(L); + if (iof->fp == NULL) lj_err_caller(L, LJ_ERR_IOCLFL); - return *f; + return iof; } -static FILE **io_file_new(lua_State *L) +static FILE *io_stdfile(lua_State *L, ptrdiff_t id) { - FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *)); - *pf = NULL; - luaL_getmetatable(L, LUA_FILEHANDLE); - lua_setmetatable(L, -2); - return pf; + IOFileUD *iof = IOSTDF_IOF(L, id); + if (iof->fp == NULL) + lj_err_caller(L, LJ_ERR_IOSTDCL); + return iof->fp; } -/* -- Close helpers ------------------------------------------------------- */ +static IOFileUD *io_file_new(lua_State *L) +{ + IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD)); + GCudata *ud = udataV(L->top-1); + ud->udtype = UDTYPE_IO_FILE; + /* NOBARRIER: The GCudata is new (marked white). */ + setgcrefr(ud->metatable, curr_func(L)->c.env); + iof->fp = NULL; + iof->type = IOFILE_TYPE_FILE; + return iof; +} -static int lj_cf_io_std_close(lua_State *L) +static IOFileUD *io_file_open(lua_State *L, const char *mode) { - lua_pushnil(L); - lua_pushliteral(L, "cannot close standard file"); - return 2; + const char *fname = strdata(lj_lib_checkstr(L, 1)); + IOFileUD *iof = io_file_new(L); + iof->fp = fopen(fname, mode); + if (iof->fp == NULL) + luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); + return iof; } -static int lj_cf_io_pipe_close(lua_State *L) +static int io_file_close(lua_State *L, IOFileUD *iof) { - FILE **p = io_tofilep(L); + int ok; + if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_FILE) { + ok = (fclose(iof->fp) == 0); + } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) { #if defined(LUA_USE_POSIX) - int ok = (pclose(*p) != -1); + ok = (pclose(iof->fp) != -1); #elif defined(LUA_USE_WIN) - int ok = (_pclose(*p) != -1); + ok = (_pclose(iof->fp) != -1); #else - int ok = 0; + ok = 0; #endif - *p = NULL; - return io_pushresult(L, ok, NULL); -} - -static int lj_cf_io_file_close(lua_State *L) -{ - FILE **p = io_tofilep(L); - int ok = (fclose(*p) == 0); - *p = NULL; + } else { + lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); + setnilV(L->top++); + lua_pushliteral(L, "cannot close standard file"); + return 2; + } + iof->fp = NULL; return io_pushresult(L, ok, NULL); } -static int io_file_close(lua_State *L) -{ - lua_getfenv(L, 1); - lua_getfield(L, -1, "__close"); - return (lua_tocfunction(L, -1))(L); -} - /* -- Read/write helpers -------------------------------------------------- */ static int io_file_readnum(lua_State *L, FILE *fp) { lua_Number d; if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { - lua_pushnumber(L, d); + setnumV(L->top++, d); return 1; } else { - return 0; /* read fails */ + return 0; } } -static int test_eof(lua_State *L, FILE *fp) +static int io_file_testeof(lua_State *L, FILE *fp) { int c = getc(fp); ungetc(c, fp); @@ -168,7 +189,7 @@ static int io_file_readchars(lua_State *L, FILE *fp, size_t n) n -= nr; /* still have to read `n' chars */ } while (n > 0 && nr == rlen); /* until end of count or eof */ luaL_pushresult(&b); /* close buffer */ - return (n == 0 || lua_objlen(L, -1) > 0); + return (n == 0 || strV(L->top-1)->len > 0); } static int io_file_read(lua_State *L, FILE *fp, int start) @@ -197,7 +218,7 @@ static int io_file_read(lua_State *L, FILE *fp, int start) lj_err_arg(L, n+1, LJ_ERR_INVFMT); } else if (tvisnum(L->base+n)) { size_t len = (size_t)lj_lib_checkint(L, n+1); - ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp); + ok = len ? io_file_readchars(L, fp, len) : io_file_testeof(L, fp); } else { lj_err_arg(L, n+1, LJ_ERR_INVOPT); } @@ -233,30 +254,29 @@ static int io_file_write(lua_State *L, FILE *fp, int start) LJLIB_CF(io_method_close) { - if (lua_isnone(L, 1)) - lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT); - io_tofile(L); - return io_file_close(L); + IOFileUD *iof = L->base < L->top ? io_tofile(L) : + IOSTDF_IOF(L, GCROOT_IO_OUTPUT); + return io_file_close(L, iof); } LJLIB_CF(io_method_read) { - return io_file_read(L, io_tofile(L), 1); + return io_file_read(L, io_tofile(L)->fp, 1); } -LJLIB_CF(io_method_write) +LJLIB_CF(io_method_write) LJLIB_REC(io_write 0) { - return io_file_write(L, io_tofile(L), 1); + return io_file_write(L, io_tofile(L)->fp, 1); } -LJLIB_CF(io_method_flush) +LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) { - return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL); + return io_pushresult(L, fflush(io_tofile(L)->fp) == 0, NULL); } LJLIB_CF(io_method_seek) { - FILE *fp = io_tofile(L); + FILE *fp = io_tofile(L)->fp; int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); lua_Number ofs; int res; @@ -294,39 +314,40 @@ LJLIB_CF(io_method_seek) LJLIB_CF(io_method_setvbuf) { - FILE *fp = io_tofile(L); + FILE *fp = io_tofile(L)->fp; int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no"); size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE); if (opt == 0) opt = _IOFBF; else if (opt == 1) opt = _IOLBF; else if (opt == 2) opt = _IONBF; - return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL); + return io_pushresult(L, setvbuf(fp, NULL, opt, sz) == 0, NULL); } -/* Forward declaration. */ -static void io_file_lines(lua_State *L, int idx, int toclose); - +LJLIB_PUSH(top-2) /* io_lines_iter */ LJLIB_CF(io_method_lines) { io_tofile(L); - io_file_lines(L, 1, 0); - return 1; + setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1))); + setudataV(L, L->top+1, udataV(L->base)); + L->top += 2; + return 2; } LJLIB_CF(io_method___gc) { - FILE *fp = *io_tofilep(L); - if (fp != NULL) io_file_close(L); + IOFileUD *iof = io_tofilep(L); + if (iof->fp != NULL) + io_file_close(L, iof); return 0; } LJLIB_CF(io_method___tostring) { - FILE *fp = *io_tofilep(L); - if (fp == NULL) - lua_pushliteral(L, "file (closed)"); + IOFileUD *iof = io_tofilep(L); + if (iof->fp != NULL) + lua_pushfstring(L, "file (%p)", iof->fp); else - lua_pushfstring(L, "file (%p)", fp); + lua_pushliteral(L, "file (closed)"); return 1; } @@ -340,30 +361,41 @@ LJLIB_PUSH(top-1) LJLIB_SET(__index) LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ -static FILE *io_file_get(lua_State *L, int findex) +LJLIB_CF(io_open) { - GCtab *fenv = tabref(curr_func(L)->c.env); - GCudata *ud = udataV(&tvref(fenv->array)[findex]); - FILE *fp = *(FILE **)uddata(ud); - if (fp == NULL) - lj_err_caller(L, LJ_ERR_IOSTDCL); - return fp; + const char *fname = strdata(lj_lib_checkstr(L, 1)); + GCstr *s = lj_lib_optstr(L, 2); + const char *mode = s ? strdata(s) : "r"; + IOFileUD *iof = io_file_new(L); + iof->fp = fopen(fname, mode); + return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname); } -LJLIB_CF(io_open) +LJLIB_CF(io_popen) { - const char *fname = luaL_checkstring(L, 1); - const char *mode = luaL_optstring(L, 2, "r"); - FILE **pf = io_file_new(L); - *pf = fopen(fname, mode); - return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; +#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) + const char *fname = strdata(lj_lib_checkstr(L, 1)); + GCstr *s = lj_lib_optstr(L, 2); + const char *mode = s ? strdata(s) : "r"; + IOFileUD *iof = io_file_new(L); + iof->type = IOFILE_TYPE_PIPE; +#ifdef LUA_USE_POSIX + fflush(NULL); + iof->fp = popen(fname, mode); +#else + iof->fp = _popen(fname, mode); +#endif + return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname); +#else + luaL_error(L, LUA_QL("popen") " not supported"); +#endif } LJLIB_CF(io_tmpfile) { - FILE **pf = io_file_new(L); - *pf = tmpfile(); - return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1; + IOFileUD *iof = io_file_new(L); + iof->fp = tmpfile(); + return iof->fp != NULL ? 1 : io_pushresult(L, 0, NULL); } LJLIB_CF(io_close) @@ -373,169 +405,112 @@ LJLIB_CF(io_close) LJLIB_CF(io_read) { - return io_file_read(L, io_file_get(L, IO_INPUT), 0); -} - -LJLIB_CF(io_write) -{ - return io_file_write(L, io_file_get(L, IO_OUTPUT), 0); -} - -LJLIB_CF(io_flush) -{ - return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL); + return io_file_read(L, io_stdfile(L, GCROOT_IO_INPUT), 0); } -LJLIB_NOREG LJLIB_CF(io_lines_iter) -{ - FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1))); - int ok; - if (fp == NULL) - lj_err_caller(L, LJ_ERR_IOCLFL); - ok = io_file_readline(L, fp); - if (ferror(fp)) - return luaL_error(L, "%s", strerror(errno)); - if (ok) - return 1; - if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */ - L->top = L->base+1; - setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1))); - io_file_close(L); - } - return 0; -} - -static void io_file_lines(lua_State *L, int idx, int toclose) +LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT) { - lua_pushvalue(L, idx); - lua_pushboolean(L, toclose); - lua_pushcclosure(L, lj_cf_io_lines_iter, 2); - funcV(L->top-1)->c.ffid = FF_io_lines_iter; + return io_file_write(L, io_stdfile(L, GCROOT_IO_OUTPUT), 0); } -LJLIB_CF(io_lines) +LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT) { - if (lua_isnoneornil(L, 1)) { /* no arguments? */ - /* will iterate over default input */ - lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT); - return lj_cf_io_method_lines(L); - } else { - const char *fname = luaL_checkstring(L, 1); - FILE **pf = io_file_new(L); - *pf = fopen(fname, "r"); - if (*pf == NULL) - io_file_error(L, 1, fname); - io_file_lines(L, lua_gettop(L), 1); - return 1; - } + return io_pushresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL); } -static int io_std_get(lua_State *L, int fp, const char *mode) +static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode) { - if (!lua_isnoneornil(L, 1)) { - const char *fname = lua_tostring(L, 1); - if (fname) { - FILE **pf = io_file_new(L); - *pf = fopen(fname, mode); - if (*pf == NULL) - io_file_error(L, 1, fname); + if (L->base < L->top && !tvisnil(L->base)) { + if (tvisudata(L->base)) { + io_tofile(L); + L->top = L->base+1; } else { - io_tofile(L); /* check that it's a valid file handle */ - lua_pushvalue(L, 1); + io_file_open(L, mode); } - lua_rawseti(L, LUA_ENVIRONINDEX, fp); + /* NOBARRIER: The standard I/O handles are GC roots. */ + setgcref(G(L)->gcroot[id], gcV(L->top-1)); + } else { + setudataV(L, L->top++, IOSTDF_UD(L, id)); } - /* return current value */ - lua_rawgeti(L, LUA_ENVIRONINDEX, fp); return 1; } LJLIB_CF(io_input) { - return io_std_get(L, IO_INPUT, "r"); + return io_std_getset(L, GCROOT_IO_INPUT, "r"); } LJLIB_CF(io_output) { - return io_std_get(L, IO_OUTPUT, "w"); + return io_std_getset(L, GCROOT_IO_OUTPUT, "w"); } -LJLIB_CF(io_type) +LJLIB_NOREG LJLIB_CF(io_lines_iter) { - void *ud; - luaL_checkany(L, 1); - ud = lua_touserdata(L, 1); - lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); - if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1)) - lua_pushnil(L); /* not a file */ - else if (*((FILE **)ud) == NULL) - lua_pushliteral(L, "closed file"); - else - lua_pushliteral(L, "file"); - return 1; + IOFileUD *iof = io_tofile(L); + int ok = io_file_readline(L, iof->fp); + if (ferror(iof->fp)) + lj_err_callermsg(L, strerror(errno)); + if (!ok && (iof->type & IOFILE_FLAG_CLOSE)) + io_file_close(L, iof); /* Return values are ignored (ok is 0). */ + return ok; } -LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */ +LJLIB_PUSH(top-3) /* io_lines_iter */ +LJLIB_CF(io_lines) +{ + if (L->base < L->top && !tvisnil(L->base)) { /* io.lines(fname) */ + IOFileUD *iof = io_file_open(L, "r"); + iof->type = IOFILE_TYPE_FILE|IOFILE_FLAG_CLOSE; + setfuncV(L, L->top-2, funcV(lj_lib_upvalue(L, 1))); + } else { /* io.lines() iterates over stdin. */ + setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1))); + setudataV(L, L->top+1, IOSTDF_UD(L, GCROOT_IO_INPUT)); + L->top += 2; + } + return 2; +} -LJLIB_CF(io_popen) +LJLIB_CF(io_type) { -#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) - const char *fname = luaL_checkstring(L, 1); - const char *mode = luaL_optstring(L, 2, "r"); - FILE **pf = io_file_new(L); -#ifdef LUA_USE_POSIX - fflush(NULL); - *pf = popen(fname, mode); -#else - *pf = _popen(fname, mode); -#endif - return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; -#else - luaL_error(L, LUA_QL("popen") " not supported"); -#endif + cTValue *o = lj_lib_checkany(L, 1); + if (!(tvisudata(o) && udataV(o)->udtype == UDTYPE_IO_FILE)) + setnilV(L->top++); + else if (((IOFileUD *)uddata(udataV(o)))->fp != NULL) + lua_pushliteral(L, "file"); + else + lua_pushliteral(L, "closed file"); + return 1; } #include "lj_libdef.h" /* ------------------------------------------------------------------------ */ -static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname) +static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name) { - FILE **pf = io_file_new(L); + IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD)); GCudata *ud = udataV(L->top-1); - GCtab *envt = tabV(L->top-2); - *pf = fp; - setgcref(ud->env, obj2gco(envt)); - lj_gc_objbarrier(L, obj2gco(ud), envt); - if (k > 0) { - lua_pushvalue(L, -1); - lua_rawseti(L, -5, k); - } - lua_setfield(L, -3, fname); -} - -static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls) -{ - lua_createtable(L, narr, 1); - lua_pushcfunction(L, cls); - lua_setfield(L, -2, "__close"); + ud->udtype = UDTYPE_IO_FILE; + /* NOBARRIER: The GCudata is new (marked white). */ + setgcref(ud->metatable, gcV(L->top-3)); + iof->fp = fp; + iof->type = IOFILE_TYPE_STDF; + lua_setfield(L, -2, name); + return obj2gco(ud); } LUALIB_API int luaopen_io(lua_State *L) { - lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); - if (tvisnil(L->top-1)) { - LJ_LIB_REG_(L, NULL, io_method); - lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); - } - io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */ - io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */ + lua_pushcfunction(L, lj_cf_io_lines_iter); + funcV(L->top-1)->c.ffid = FF_io_lines_iter; + LJ_LIB_REG_(L, NULL, io_method); + copyTV(L, L->top, L->top-1); L->top++; + lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); LJ_LIB_REG(L, io); - io_fenv_new(L, 0, lj_cf_io_std_close); - io_std_new(L, stdin, IO_INPUT, "stdin"); - io_std_new(L, stdout, IO_OUTPUT, "stdout"); - io_std_new(L, stderr, 0, "stderr"); - L->top--; + setgcref(G(L)->gcroot[GCROOT_IO_INPUT], io_std_new(L, stdin, "stdin")); + setgcref(G(L)->gcroot[GCROOT_IO_OUTPUT], io_std_new(L, stdout, "stdout")); + io_std_new(L, stderr, "stderr"); return 1; } diff --git a/src/lib_math.c b/src/lib_math.c index adc77c9d..f3803e8f 100644 --- a/src/lib_math.c +++ b/src/lib_math.c @@ -36,9 +36,9 @@ LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) -LJLIB_ASM_(math_sinh) -LJLIB_ASM_(math_cosh) -LJLIB_ASM_(math_tanh) +LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) +LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) +LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) LJLIB_ASM_(math_frexp) LJLIB_ASM_(math_modf) LJLIB_REC(.) @@ -82,35 +82,33 @@ LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); } */ /* PRNG state. */ -typedef struct TW223State { +struct RandomState { uint64_t gen[4]; /* State of the 4 LFSR generators. */ int valid; /* State is valid. */ -} TW223State; +}; /* Union needed for bit-pattern conversion between uint64_t and double. */ typedef union { uint64_t u64; double d; } U64double; /* Update generator i and compute a running xor of all states. */ #define TW223_GEN(i, k, q, s) \ - z = tw->gen[i]; \ + z = rs->gen[i]; \ z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ - r ^= z; tw->gen[i] = z; + r ^= z; rs->gen[i] = z; /* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ -static LJ_NOINLINE double tw223_step(TW223State *tw) +LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) { uint64_t z, r = 0; - U64double u; TW223_GEN(0, 63, 31, 18) TW223_GEN(1, 58, 19, 28) TW223_GEN(2, 55, 24, 7) TW223_GEN(3, 47, 21, 8) - u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52); - return u.d; + return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); } /* PRNG initialization function. */ -static void tw223_init(TW223State *tw, double d) +static void random_init(RandomState *rs, double d) { uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ int i; @@ -120,22 +118,24 @@ static void tw223_init(TW223State *tw, double d) r >>= 8; u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ - tw->gen[i] = u.u64; + rs->gen[i] = u.u64; } - tw->valid = 1; + rs->valid = 1; for (i = 0; i < 10; i++) - tw223_step(tw); + lj_math_random_step(rs); } /* PRNG extract function. */ -LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ -LJLIB_CF(math_random) +LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ +LJLIB_CF(math_random) LJLIB_REC(.) { int n = cast_int(L->top - L->base); - TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + U64double u; double d; - if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0); - d = tw223_step(tw) - 1.0; + if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); + u.u64 = lj_math_random_step(rs); + d = u.d - 1.0; if (n > 0) { double r1 = lj_lib_checknum(L, 1); if (n == 1) { @@ -150,11 +150,11 @@ LJLIB_CF(math_random) } /* PRNG seed function. */ -LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ LJLIB_CF(math_randomseed) { - TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); - tw223_init(tw, lj_lib_checknum(L, 1)); + RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + random_init(rs, lj_lib_checknum(L, 1)); return 0; } @@ -164,9 +164,9 @@ LJLIB_CF(math_randomseed) LUALIB_API int luaopen_math(lua_State *L) { - TW223State *tw; - tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State)); - tw->valid = 0; /* Use lazy initialization to save some time on startup. */ + RandomState *rs; + rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); + rs->valid = 0; /* Use lazy initialization to save some time on startup. */ LJ_LIB_REG(L, math); #if defined(LUA_COMPAT_MOD) lua_getfield(L, -1, "fmod"); diff --git a/src/lib_string.c b/src/lib_string.c index 6c857328..e7ad12df 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -776,16 +776,18 @@ LUALIB_API int luaopen_string(lua_State *L) { GCtab *mt; GCstr *mmstr; + global_State *g; LJ_LIB_REG(L, string); #if defined(LUA_COMPAT_GFIND) lua_getfield(L, -1, "gmatch"); lua_setfield(L, -2, "gfind"); #endif mt = lj_tab_new(L, 0, 1); - /* NOBARRIER: G(L)->mmname[] is a GC root. */ - setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt)); - mmstr = strref(G(L)->mmname[MM_index]); - if (isdead(G(L), obj2gco(mmstr))) flipwhite(obj2gco(mmstr)); + /* NOBARRIER: basemt is a GC root. */ + g = G(L); + setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); + mmstr = strref(g->mmname[MM_index]); + if (isdead(g, obj2gco(mmstr))) flipwhite(obj2gco(mmstr)); settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1)); mt->nomm = cast_byte(~(1u<<MM_index)); return 1; diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 8ad4f8fb..6d8b4ccb 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -1186,10 +1186,10 @@ static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize) size_t rsize = oldsize - nb; newp = oldp; if (rsize >= MIN_CHUNK_SIZE) { - mchunkptr remainder = chunk_plus_offset(newp, nb); + mchunkptr rem = chunk_plus_offset(newp, nb); set_inuse(m, newp, nb); - set_inuse(m, remainder, rsize); - lj_alloc_free(m, chunk2mem(remainder)); + set_inuse(m, rem, rsize); + lj_alloc_free(m, chunk2mem(rem)); } } else if (next == m->top && oldsize + m->topsize > nb) { /* Expand into top */ diff --git a/src/lj_api.c b/src/lj_api.c index 7a759e5f..4bac5024 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -227,7 +227,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); TValue tmp; - return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))); + return (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), &tmp))); } LUA_API int lua_isstring(lua_State *L, int idx) @@ -307,7 +307,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) TValue tmp; if (LJ_LIKELY(tvisnum(o))) return numV(o); - else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) + else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) return numV(&tmp); else return 0; @@ -319,7 +319,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) TValue tmp; if (tvisnum(o)) return numV(o); - else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))) + else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp))) lj_err_argt(L, idx, LUA_TNUMBER); return numV(&tmp); } @@ -332,7 +332,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def) return numV(o); else if (tvisnil(o)) return def; - else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))) + else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp))) lj_err_argt(L, idx, LUA_TNUMBER); return numV(&tmp); } @@ -344,7 +344,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) lua_Number n; if (LJ_LIKELY(tvisnum(o))) n = numV(o); - else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) + else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) n = numV(&tmp); else return 0; @@ -362,7 +362,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) lua_Number n; if (LJ_LIKELY(tvisnum(o))) n = numV(o); - else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) + else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) n = numV(&tmp); else lj_err_argt(L, idx, LUA_TNUMBER); @@ -382,7 +382,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) n = numV(o); else if (tvisnil(o)) return def; - else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) + else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) n = numV(&tmp); else lj_err_argt(L, idx, LUA_TNUMBER); @@ -753,7 +753,7 @@ LUA_API int lua_getmetatable(lua_State *L, int idx) else if (tvisudata(o)) mt = tabref(udataV(o)->metatable); else - mt = tabref(G(L)->basemt[itypemap(o)]); + mt = tabref(basemt_obj(G(L), o)); if (mt == NULL) return 0; settabV(L, L->top, mt); @@ -941,12 +941,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) if (lj_trace_flushall(L)) lj_err_caller(L, LJ_ERR_NOGCMM); if (tvisbool(o)) { - /* NOBARRIER: g->basemt[] is a GC root. */ - setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt)); - setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt)); + /* NOBARRIER: basemt is a GC root. */ + setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt)); + setgcref(basemt_it(g, LJ_TFALSE), obj2gco(mt)); } else { - /* NOBARRIER: g->basemt[] is a GC root. */ - setgcref(g->basemt[itypemap(o)], obj2gco(mt)); + /* NOBARRIER: basemt is a GC root. */ + setgcref(basemt_obj(g, o), obj2gco(mt)); } } L->top--; diff --git a/src/lj_asm.c b/src/lj_asm.c index a4d0c606..f26a40a5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -13,6 +13,7 @@ #include "lj_gc.h" #include "lj_str.h" #include "lj_tab.h" +#include "lj_frame.h" #include "lj_ir.h" #include "lj_jit.h" #include "lj_iropt.h" @@ -81,6 +82,10 @@ typedef struct ASMState { #define IR(ref) (&as->ir[(ref)]) +#define ASMREF_TMP1 REF_TRUE /* Temp. register. */ +#define ASMREF_TMP2 REF_FALSE /* Temp. register. */ +#define ASMREF_L REF_NIL /* Stores register for L. */ + /* Check for variant to invariant references. */ #define iscrossref(as, ref) ((ref) < as->sectref) @@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ if (rex != 0x40) *--(p) = rex; } #define FORCE_REX 0x200 +#define REX_64 (FORCE_REX|0x080000) #else #define REXRB(p, rr, rb) ((void)0) #define FORCE_REX 0 +#define REX_64 0 #endif #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) @@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, { uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); if (rex != 0x40) { + rex |= (rr >> 16); if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } *--p = (MCode)rex; } @@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target) #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) -/* Argument setup for C calls. Up to 3 args need no stack adjustment. */ -#define emit_setargr(as, narg, r) \ - emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4); -#define emit_setargi(as, narg, imm) \ - emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm)) -#define emit_setargp(as, narg, ptr) \ - emit_setargi(as, (narg), ptr2addr((ptr))) - /* -- Register allocator debugging ---------------------------------------- */ /* #define LUAJIT_DEBUG_RA */ @@ -578,10 +578,6 @@ static void ra_setup(ASMState *as) memset(as->phireg, 0, sizeof(as->phireg)); memset(as->cost, 0, sizeof(as->cost)); as->cost[RID_ESP] = REGCOST(~0u, 0u); - - /* Start slots for spill slot allocation. */ - as->evenspill = (SPS_FIRST+1)&~1; - as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0; } /* Rematerialize constants. */ @@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) } else if (ir->o == IR_BASE) { ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ emit_getgl(as, r, jit_base); + } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ + lua_assert(irt_isnil(ir->t)); + emit_getgl(as, r, jit_L); } else { lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KNULL); @@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir) return sps_scale(slot); } +/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */ +static Reg ra_releasetmp(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + Reg r = ir->r; + lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + ra_free(as, r); + ra_modified(as, r); + ir->r = RID_INIT; + return r; +} + /* Restore a register (marked as free). Rematerialize or force a spill. */ static Reg ra_restore(ASMState *as, IRRef ref) { @@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc) /* Arch-specific field offsets. */ static const uint8_t field_ofs[IRFL__MAX+1] = { -#define FLOFS(name, type, field) (uint8_t)offsetof(type, field), +#define FLOFS(name, ofs) (uint8_t)(ofs), IRFLDEF(FLOFS) #undef FLOFS 0 @@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) { IRIns *irr; lua_assert(ir->o == IR_STRREF); - as->mrm.idx = as->mrm.base = RID_NONE; + as->mrm.base = as->mrm.idx = RID_NONE; as->mrm.scale = XM_SCALE1; as->mrm.ofs = sizeof(GCstr); if (irref_isk(ir->op1)) { @@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) } } +static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow) +{ + if (ir->o == IR_KPTR) { + as->mrm.ofs = ir->i; + as->mrm.base = as->mrm.idx = RID_NONE; + } else { + lua_assert(ir->o == IR_STRREF); + asm_fusestrref(as, ir, allow); + } +} + /* Fuse load into memory operand. */ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) { @@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return RID_MRM; } if (ir->o == IR_KNUM) { + RegSet avail = as->freeset & ~as->modset & RSET_FPR; lua_assert(allow != RSET_EMPTY); - if (!(as->freeset & ~as->modset & RSET_FPR)) { + if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ as->mrm.ofs = ptr2addr(ir_knum(ir)); as->mrm.base = as->mrm.idx = RID_NONE; return RID_MRM; @@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return RID_MRM; } } else if (ir->o == IR_FLOAD) { - /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */ - if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) { + /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ + if ((irt_isint(ir->t) || irt_isaddr(ir->t)) && + noconflict(as, ref, IR_FSTORE)) { asm_fusefref(as, ir, xallow); return RID_MRM; } @@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return RID_MRM; } } else if (ir->o == IR_XLOAD) { - /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). + /* Generic fusion is only ok for 32 bit operand (but see asm_comp). ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). */ - if (irt_isint(ir->t)) { - asm_fusestrref(as, IR(ir->op1), xallow); + if (irt_isint(ir->t) || irt_isaddr(ir->t)) { + asm_fusexref(as, IR(ir->op1), xallow); return RID_MRM; } } @@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return ra_allocref(as, ref, allow); } +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + RegSet allow = RSET_ALL; + uint32_t n, nargs = CCI_NARGS(ci); + int32_t ofs = 0; + lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ + emit_call(as, ci->func); + for (n = 0; n < nargs; n++) { /* Setup args. */ +#if LJ_64 +#error "NYI: 64 bit mode call argument setup" +#endif + IRIns *ir = IR(args[n]); + if (irt_isnum(ir->t)) { + if ((ofs & 4) && irref_isk(args[n])) { + /* Split stores for unaligned FP consts. */ + emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); + emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); + } else { + Reg r; + if ((allow & RSET_FPR) == RSET_EMPTY) + lj_trace_err(as->J, LJ_TRERR_NYICOAL); + r = ra_alloc1(as, args[n], allow & RSET_FPR); + allow &= ~RID2RSET(r); + emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); + } + ofs += 8; + } else { + if ((ci->flags & CCI_FASTCALL) && n < 2) { + Reg r = n == 0 ? RID_ECX : RID_EDX; + if (args[n] < ASMREF_TMP1) { + emit_loadi(as, r, ir->i); + } else { + lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ + allow &= ~RID2RSET(r); + if (ra_hasreg(ir->r)) + emit_movrr(as, r, ir->r); + else + ra_allocref(as, args[n], RID2RSET(r)); + } + } else { + if (args[n] < ASMREF_TMP1) { + emit_movmroi(as, RID_ESP, ofs, ir->i); + } else { + Reg r; + if ((allow & RSET_GPR) == RSET_EMPTY) + lj_trace_err(as->J, LJ_TRERR_NYICOAL); + r = ra_alloc1(as, args[n], allow & RSET_GPR); + allow &= ~RID2RSET(r); + emit_movtomro(as, r, RID_ESP, ofs); + } + ofs += 4; + } + } + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + RegSet drop = RSET_SCRATCH; + if ((ci->flags & CCI_NOFPRCLOBBER)) + drop &= ~RSET_FPR; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + if (irt_isnum(ir->t)) { + int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ +#if LJ_64 + if ((ci->flags & CCI_CASTU64)) { + Reg dest = ir->r; + if (ra_hasreg(dest)) { + ra_free(as, dest); + ra_modified(as, dest); + emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */ + } else { + emit_movrmro(as, RID_RET, RID_ESP, ofs); + } + } else { + ra_destreg(as, ir, RID_FPRET); + } +#else + /* Number result is in x87 st0 for x86 calling convention. */ + Reg dest = ir->r; + if (ra_hasreg(dest)) { + ra_free(as, dest); + ra_modified(as, dest); + emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); + } + if ((ci->flags & CCI_CASTU64)) { + emit_movtomro(as, RID_RET, RID_ESP, ofs); + emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4); + } else { + emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); + } +#endif + } else { + lua_assert(!irt_ispri(ir->t)); + ra_destreg(as, ir, RID_RET); + } + } +} + +/* Collect arguments from CALL* and ARG instructions. */ +static void asm_collectargs(ASMState *as, IRIns *ir, + const CCallInfo *ci, IRRef *args) +{ + uint32_t n = CCI_NARGS(ci); + lua_assert(n <= CCI_NARGS_MAX); + if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } + while (n-- > 1) { + ir = IR(ir->op1); + lua_assert(ir->o == IR_CARG); + args[n] = ir->op2; + } + args[0] = ir->op1; + lua_assert(IR(ir->op1)->o != IR_CARG); +} + +static void asm_call(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX]; + const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; + asm_collectargs(as, ir, ci, args); + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); +} + /* -- Type conversions ---------------------------------------------------- */ static void asm_tonum(ASMState *as, IRIns *ir) @@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir) static void asm_strto(ASMState *as, IRIns *ir) { - Reg str; - int32_t ofs; - RegSet drop = RSET_SCRATCH; /* Force a spill slot for the destination register (if any). */ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum]; + IRRef args[2]; + RegSet drop = RSET_SCRATCH; if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ ra_evictset(as, drop); asm_guardcc(as, CC_E); emit_rr(as, XO_TEST, RID_RET, RID_RET); - /* int lj_str_numconv(const char *s, TValue *n) */ - emit_call(as, lj_str_numconv); - ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ - if (ofs == 0) { - emit_setargr(as, 2, RID_ESP); - } else { - emit_setargr(as, 2, RID_RET); - emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs); - } - emit_setargr(as, 1, RID_RET); - str = ra_alloc1(as, ir->op1, RSET_GPR); - emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr)); + args[0] = ir->op1; + args[1] = ASMREF_TMP1; + asm_gencall(as, ci, args); + /* Store the result to the spill slot or slots SPS_TEMP1/2. */ + emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), + RID_ESP, sps_scale(ir->s)); } static void asm_tostr(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); - ra_destreg(as, ir, RID_RET); - ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); + IRRef args[2]; + args[0] = ASMREF_L; as->gcsteps++; if (irt_isnum(irl->t)) { - /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */ - emit_call(as, lj_str_fromnum); - emit_setargr(as, 1, RID_RET); - emit_getgl(as, RID_RET, jit_L); - emit_setargr(as, 2, RID_RET); - emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl)); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; + args[1] = ASMREF_TMP1; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); + emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), + RID_ESP, ra_spill(as, irl)); } else { - /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */ - emit_call(as, lj_str_fromint); - emit_setargr(as, 1, RID_RET); - emit_getgl(as, RID_RET, jit_L); - emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR)); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; + args[1] = ir->op1; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); } } @@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir) lua_assert(!irt_isnil(ir->t)); return irt_type(ir->t)-IRT_FALSE; } else { - lua_assert(irt_isaddr(ir->t)); + lua_assert(irt_isgcv(ir->t)); lo = u32ptr(ir_kgc(ir)); hi = lo - 0x04c11db7; } @@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir) static void asm_newref(ASMState *as, IRIns *ir) { - IRRef keyref = ir->op2; - IRIns *irkey = IR(keyref); - RegSet allow = RSET_GPR; - Reg tab, tmp; - ra_destreg(as, ir, RID_RET); - ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); - tab = ra_alloc1(as, ir->op1, allow); - tmp = ra_scratch(as, rset_clear(allow, tab)); - /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */ - emit_call(as, lj_tab_newkey); - emit_setargr(as, 1, tmp); - emit_setargr(as, 2, tab); - emit_getgl(as, tmp, jit_L); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; + IRRef args[3]; + IRIns *irkey; + Reg tmp; + args[0] = ASMREF_L; + args[1] = ir->op1; + args[2] = ASMREF_TMP1; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); + tmp = ra_releasetmp(as, ASMREF_TMP1); + irkey = IR(ir->op2); if (irt_isnum(irkey->t)) { /* For numbers use the constant itself or a spill slot as a TValue. */ - if (irref_isk(keyref)) { - emit_setargp(as, 3, ir_knum(irkey)); - } else { - emit_setargr(as, 3, tmp); + if (irref_isk(ir->op2)) + emit_loada(as, tmp, ir_knum(irkey)); + else emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); - } } else { /* Otherwise use g->tmptv to hold the TValue. */ - lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t)); - emit_setargr(as, 3, tmp); - if (!irref_isk(keyref)) { - Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp)); + if (!irref_isk(ir->op2)) { + Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); emit_movtomro(as, src, tmp, 0); } else if (!irt_ispri(irkey->t)) { emit_movmroi(as, tmp, 0, irkey->i); @@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static void asm_fload(ASMState *as, IRIns *ir) +static void asm_fxload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); x86Op xo; - asm_fusefref(as, ir, RSET_GPR); + if (ir->o == IR_FLOAD) + asm_fusefref(as, ir, RSET_GPR); + else + asm_fusexref(as, IR(ir->op1), RSET_GPR); + /* ir->op2 is ignored -- unaligned loads are ok on x86. */ switch (irt_type(ir->t)) { case IRT_I8: xo = XO_MOVSXb; break; case IRT_U8: xo = XO_MOVZXb; break; @@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir) } } -static void asm_xload(ASMState *as, IRIns *ir) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - x86Op xo; - asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */ - /* ir->op2 is ignored -- unaligned loads are ok on x86. */ - switch (irt_type(ir->t)) { - case IRT_I8: xo = XO_MOVSXb; break; - case IRT_U8: xo = XO_MOVZXb; break; - case IRT_I16: xo = XO_MOVSXw; break; - case IRT_U16: xo = XO_MOVZXw; break; - default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break; - } - emit_mrm(as, xo, dest, RID_MRM); -} - -/* -- String ops ---------------------------------------------------------- */ +/* -- Allocations --------------------------------------------------------- */ static void asm_snew(ASMState *as, IRIns *ir) { - RegSet allow = RSET_GPR; - Reg left, right; - IRIns *irl; - ra_destreg(as, ir, RID_RET); - ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); - irl = IR(ir->op1); - left = irl->r; - right = IR(ir->op2)->r; - if (ra_noreg(left)) { - lua_assert(irl->o == IR_STRREF); - /* Get register only for non-const STRREF. */ - if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) { - if (ra_hasreg(right)) rset_clear(allow, right); - left = ra_allocref(as, ir->op1, allow); - } - } - if (ra_noreg(right) && !irref_isk(ir->op2)) { - if (ra_hasreg(left)) rset_clear(allow, left); - right = ra_allocref(as, ir->op2, allow); - } - /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */ - emit_call(as, lj_str_new); - emit_setargr(as, 1, RID_RET); - emit_getgl(as, RID_RET, jit_L); - if (ra_noreg(left)) /* Use immediate for const STRREF. */ - emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i + - (int32_t)sizeof(GCstr)); - else - emit_setargr(as, 2, left); - if (ra_noreg(right)) - emit_setargi(as, 3, IR(ir->op2)->i); - else - emit_setargr(as, 3, right); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; + IRRef args[3]; + args[0] = ASMREF_L; + args[1] = ir->op1; + args[2] = ir->op2; as->gcsteps++; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); } -/* -- Table ops ----------------------------------------------------------- */ - static void asm_tnew(ASMState *as, IRIns *ir) { - ra_destreg(as, ir, RID_RET); - ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); - /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */ - emit_call(as, lj_tab_new); - emit_setargr(as, 1, RID_RET); - emit_setargi(as, 2, ir->op1); - emit_setargi(as, 3, ir->op2); - emit_getgl(as, RID_RET, jit_L); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; + IRRef args[2]; + args[0] = ASMREF_L; + args[1] = ASMREF_TMP1; as->gcsteps++; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24)); } static void asm_tdup(ASMState *as, IRIns *ir) { - ra_destreg(as, ir, RID_RET); - ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); - /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */ - emit_call(as, lj_tab_dup); - emit_setargr(as, 1, RID_RET); - emit_setargp(as, 2, ir_kgc(IR(ir->op1))); - emit_getgl(as, RID_RET, jit_L); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; + IRRef args[2]; + args[0] = ASMREF_L; + args[1] = ir->op1; as->gcsteps++; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); } -static void asm_tlen(ASMState *as, IRIns *ir) -{ - ra_destreg(as, ir, RID_RET); - ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); - emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */ - emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR)); -} +/* -- Write barriers ------------------------------------------------------ */ static void asm_tbar(ASMState *as, IRIns *ir) { @@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir) static void asm_obar(ASMState *as, IRIns *ir) { - RegSet allow = RSET_GPR; - Reg obj, val; - GCobj *valp; + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; MCLabel l_end; - int32_t ofs; - ra_evictset(as, RSET_SCRATCH); - if (irref_isk(ir->op2)) { - valp = ir_kgc(IR(ir->op2)); - val = RID_NONE; - } else { - valp = NULL; - val = ra_alloc1(as, ir->op2, allow); - rset_clear(allow, val); - } - obj = ra_alloc1(as, ir->op1, allow); - l_end = emit_label(as); + Reg obj; /* No need for other object barriers (yet). */ lua_assert(IR(ir->op1)->o == IR_UREFC); - ofs = -(int32_t)offsetof(GCupval, tv); - /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */ - emit_call(as, lj_gc_barrieruv); - if (ofs == 0) { - emit_setargr(as, 2, obj); - } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) { - emit_setargr(as, 2, obj); - emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs); - } else { - emit_setargr(as, 2, RID_RET); - emit_rmro(as, XO_LEA, RID_RET, obj, ofs); - } - emit_setargp(as, 1, J2G(as->J)); - if (valp) - emit_setargp(as, 3, valp); - else - emit_setargr(as, 3, val); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; + args[1] = ir->op1; + asm_gencall(as, ci, args); + emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J)); + obj = IR(ir->op1)->r; emit_sjcc(as, CC_Z, l_end); emit_i8(as, LJ_GC_WHITES); - if (valp) - emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked); - else + if (irref_isk(ir->op2)) { + GCobj *vp = ir_kgc(IR(ir->op2)); + emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked); + } else { + Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj)); emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); + } emit_sjcc(as, CC_Z, l_end); emit_i8(as, LJ_GC_BLACK); emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, - ofs + (int32_t)offsetof(GChead, marked)); + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); } /* -- FP/int arithmetic and logic operations ------------------------------ */ @@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) } } emit_mrm(as, XO_UCOMISD, left, right); - } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) { + } else { IRRef lref = ir->op1, rref = ir->op2; IROp leftop = (IROp)(IR(lref)->o); - lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); + lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E)); /* Swap constants (only for ABC) and fusable loads to the right. */ if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ @@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) } else { Reg left; if (opisfusableload((IROp)irl->o) && - ((irt_isi8(irl->t) && checki8(imm)) || - (irt_isu8(irl->t) && checku8(imm)))) { - /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8 - ** loads are handled here. The IRT_I16/IRT_U16 loads should never be - ** fused, since cmp word [mem], imm16 has a length-changing prefix. + ((irt_isu8(irl->t) && checku8(imm)) || + ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) || + (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) { + /* Only the IRT_INT case is fused by asm_fuseload. + ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads + ** are handled here. + ** Note that cmp word [mem], imm16 should not be generated, + ** since it has a length-changing prefix. Compares of a word + ** against a sign-extended imm8 are ok, however. */ IRType1 origt = irl->t; /* Temporarily flip types. */ irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; @@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) if (left == RID_MRM) { /* Fusion succeeded? */ asm_guardcc(as, cc); emit_i8(as, imm); - emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM); + emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ? + XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM); return; } /* Otherwise handle register case as usual. */ } else { @@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) asm_guardcc(as, cc); emit_mrm(as, XO_CMP, left, right); } - } else { /* Handle ordered string compares. */ - RegSet allow = RSET_GPR; - /* This assumes lj_str_cmp never uses any SSE registers. */ - ra_evictset(as, (RSET_SCRATCH & RSET_GPR)); - asm_guardcc(as, cc); - emit_rr(as, XO_TEST, RID_RET, RID_RET); - emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */ - if (irref_isk(ir->op1)) { - emit_setargi(as, 1, IR(ir->op1)->i); - } else { - Reg left = ra_alloc1(as, ir->op1, allow); - rset_clear(allow, left); - emit_setargr(as, 1, left); - } - if (irref_isk(ir->op2)) { - emit_setargi(as, 2, IR(ir->op2)->i); - } else { - Reg right = ra_alloc1(as, ir->op2, allow); - emit_setargr(as, 2, right); - } } } @@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) /* -- GC handling --------------------------------------------------------- */ /* Sync all live GC values to Lua stack slots. */ -static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) +static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) { + /* Some care must be taken when allocating registers here, since this is + ** not part of the fast path. All scratch registers are evicted in the + ** fast path, so it's easiest to force allocation from scratch registers + ** only. This avoids register allocation state unification. + */ + RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); IRRef2 *map = &as->T->snapmap[snap->mapofs]; BCReg s, nslots = snap->nslots; for (s = 0; s < nslots; s++) { @@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) /* Check GC threshold and do one or more GC steps. */ static void asm_gc_check(ASMState *as, SnapShot *snap) { + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; MCLabel l_end; - const BCIns *pc; - Reg tmp, base; + Reg base, lstate, tmp; RegSet drop = RSET_SCRATCH; - /* Must evict BASE because the stack may be reallocated by the GC. */ - if (ra_hasreg(IR(REF_BASE)->r)) - drop |= RID2RSET(IR(REF_BASE)->r); + if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */ + drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */ ra_evictset(as, drop); - base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET)); l_end = emit_label(as); - /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */ - emit_call(as, lj_gc_step_jit); - emit_movtomro(as, base, RID_RET, offsetof(lua_State, base)); - emit_setargr(as, 1, RID_RET); - emit_setargi(as, 3, (int32_t)as->gcsteps); - emit_getgl(as, RID_RET, jit_L); - pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots]; - emit_setargp(as, 2, pc); - asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base)); - if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */ - ra_restore(as, REF_BASE); /* Better do it inside the slow path. */ + args[0] = ASMREF_L; + args[1] = ASMREF_TMP1; + asm_gencall(as, ci, args); + tmp = ra_releasetmp(as, ASMREF_TMP1); + emit_loadi(as, tmp, (int32_t)as->gcsteps); + /* We don't know spadj yet, so get the C frame from L->cframe. */ + emit_movmroi(as, tmp, CFRAME_OFS_PC, + (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); + emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); + lstate = IR(ASMREF_L)->r; + emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); + /* It's ok if lstate is already in a non-scratch reg. But all allocations + ** in the non-fast path must use a scratch reg. See comment above. + */ + base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); + emit_movtomro(as, base, lstate, offsetof(lua_State, base)); + asm_gc_sync(as, snap, base); + /* BASE/L get restored anyway, better do it inside the slow path. */ + if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); + if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r)) + ra_restore(as, ASMREF_L); /* Jump around GC step if GC total < GC threshold. */ tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); emit_sjcc(as, CC_B, l_end); @@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as) { int32_t spadj; emit_setgli(as, vmstate, (int32_t)as->J->curtrace); - spadj = sps_adjust(as); + spadj = sps_adjust(as->evenspill); as->T->spadjust = (uint16_t)spadj; emit_addptr(as, RID_ESP, -spadj); } @@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as) { IRIns *ir = IR(REF_BASE); Reg r = ir->r; - lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); - ra_free(as, r); - if (r != RID_BASE) { - ra_scratch(as, RID2RSET(RID_BASE)); - emit_rr(as, XO_MOV, r, RID_BASE); + lua_assert(!ra_hasspill(ir->s)); + if (ra_hasreg(r)) { + ra_free(as, r); + if (r != RID_BASE) { + ra_scratch(as, RID2RSET(RID_BASE)); + emit_rr(as, XO_MOV, r, RID_BASE); + } } } @@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as) } /* Calculate stack frame adjustment. */ - spadj = sps_adjust(as); + spadj = sps_adjust(as->evenspill); spdelta = spadj - (int32_t)as->parent->spadjust; if (spdelta < 0) { /* Don't shrink the stack frame. */ spadj = (int32_t)as->parent->spadjust; @@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as) GCfunc *fn = ir_kfunc(IR(ir->op2)); if (isluafunc(fn)) { BCReg fs = s + funcproto(fn)->framesize; - newbase = s; - if (secondbase == ~(BCReg)0) secondbase = s; if (fs > topslot) topslot = fs; + if (s != 0) { + newbase = s; + if (secondbase == ~(BCReg)0) secondbase = s; + } } } } @@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir) /* Loads and stores. */ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; - case IR_FLOAD: asm_fload(as, ir); break; + case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; case IR_SLOAD: asm_sload(as, ir); break; - case IR_XLOAD: asm_xload(as, ir); break; case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; case IR_FSTORE: asm_fstore(as, ir); break; - /* String ops. */ + /* Allocations. */ case IR_SNEW: asm_snew(as, ir); break; - - /* Table ops. */ case IR_TNEW: asm_tnew(as, ir); break; case IR_TDUP: asm_tdup(as, ir); break; - case IR_TLEN: asm_tlen(as, ir); break; + + /* Write barriers. */ case IR_TBAR: asm_tbar(as, ir); break; case IR_OBAR: asm_obar(as, ir); break; @@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_TOSTR: asm_tostr(as, ir); break; case IR_STRTO: asm_strto(as, ir); break; + /* Calls. */ + case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; + case IR_CARG: break; + default: setintV(&as->J->errinfo, ir->o); lj_trace_err_info(as->J, LJ_TRERR_NYIIR); @@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T) IRRef i, nins; int inloop; + ra_setup(as); + /* Clear reg/sp for constants. */ for (i = T->nk; i < REF_BIAS; i++) IR(i)->prev = REGSP_INIT; @@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T) as->curins = nins; inloop = 0; + as->evenspill = SPS_FIRST; for (i = REF_FIRST; i < nins; i++) { IRIns *ir = IR(i); switch (ir->o) { @@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T) if (i == as->stopins+1 && ir->op1 == ir->op2) as->stopins++; break; + case IR_CALLN: case IR_CALLL: case IR_CALLS: { + const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; + /* NYI: not fastcall-aware, but doesn't matter (yet). */ + if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ + as->evenspill = (int32_t)CCI_NARGS(ci); +#if LJ_64 + ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); +#else + ir->prev = REGSP_HINT(RID_RET); +#endif + if (inloop) + as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ? + (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; + continue; + } /* C calls evict all scratch regs and return results in RID_RET. */ - case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR: + case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR: case IR_NEWREF: ir->prev = REGSP_HINT(RID_RET); if (inloop) @@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T) if (inloop) as->modset = RSET_SCRATCH; break; - /* Ordered string compares evict all integer scratch registers. */ - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - if (irt_isstr(ir->t) && inloop) - as->modset |= (RSET_SCRATCH & RSET_GPR); - break; /* Non-constant shift counts need to be in RID_ECX. */ case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) @@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T) } ir->prev = REGSP_INIT; } + if ((as->evenspill & 1)) + as->oddspill = as->evenspill++; + else + as->oddspill = 0; } /* -- Assembler core ------------------------------------------------------ */ @@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T) as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; /* Setup register allocation. */ - ra_setup(as); asm_setup_regsp(as, T); if (!as->loopref) { diff --git a/src/lj_def.h b/src/lj_def.h index dbfd5bf5..3d6ba417 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -88,6 +88,7 @@ typedef unsigned __int32 uintptr_t; #define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) #define checki16(x) ((x) == (int32_t)(int16_t)(x)) +#define checku16(x) ((x) == (int32_t)(uint16_t)(x)) /* Every half-decent C compiler transforms this into a rotate instruction. */ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n)))) diff --git a/src/lj_gc.c b/src/lj_gc.c index 0d8a03ec..5c9d2bcb 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -73,13 +73,13 @@ static void gc_mark(global_State *g, GCobj *o) } } -/* Mark the base metatables. */ -static void gc_mark_basemt(global_State *g) +/* Mark GC roots. */ +static void gc_mark_gcroot(global_State *g) { - int i; - for (i = 0; i < BASEMT_MAX; i++) - if (tabref(g->basemt[i]) != NULL) - gc_markobj(g, tabref(g->basemt[i])); + ptrdiff_t i; + for (i = 0; i < GCROOT__MAX; i++) + if (gcref(g->gcroot[i]) != NULL) + gc_markobj(g, gcref(g->gcroot[i])); } /* Start a GC cycle and mark the root set. */ @@ -91,7 +91,7 @@ static void gc_mark_start(global_State *g) gc_markobj(g, mainthread(g)); gc_markobj(g, tabref(mainthread(g)->env)); gc_marktv(g, &g->registrytv); - gc_mark_basemt(g); + gc_mark_gcroot(g); g->gc.state = GCSpropagate; } @@ -541,7 +541,7 @@ static void atomic(global_State *g, lua_State *L) lua_assert(!iswhite(obj2gco(mainthread(g)))); gc_markobj(g, L); /* Mark running thread. */ gc_mark_curtrace(g); /* Mark current trace. */ - gc_mark_basemt(g); /* Mark base metatables (again). */ + gc_mark_gcroot(g); /* Mark GC roots (again). */ gc_propagate_gray(g); /* Propagate all of the above. */ setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */ @@ -643,16 +643,15 @@ int lj_gc_step(lua_State *L) } /* Ditto, but fix the stack top first. */ -void lj_gc_step_fixtop(lua_State *L) +void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) { if (curr_funcisL(L)) L->top = curr_topL(L); lj_gc_step(L); } /* Perform multiple GC steps. Called from JIT-compiled code. */ -void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) +void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps) { - cframe_pc(cframe_raw(L->cframe)) = pc; L->top = curr_topL(L); while (steps-- > 0 && lj_gc_step(L) == 0) ; @@ -711,17 +710,16 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) makewhite(g, o); /* Make it white to avoid the following barrier. */ } -/* The reason for duplicating this is that it needs to be visible from ASM. */ -void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) +/* Specialized barrier for closed upvalue. Pass &uv->tv. */ +void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv) { - lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); - lua_assert(o->gch.gct == ~LJ_TUPVAL); - /* Preserve invariant during propagation. Otherwise it doesn't matter. */ +#define TV2MARKED(x) \ + (*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked))) if (g->gc.state == GCSpropagate) - gc_mark(g, v); /* Move frontier forward. */ + gc_mark(g, gcV(tv)); else - makewhite(g, o); /* Make it white to avoid the following barrier. */ + TV2MARKED(tv) = (TV2MARKED(tv) & cast_byte(~LJ_GC_COLORS)) | curwhite(g); +#undef TV2MARKED } /* Close upvalue. Also needs a write barrier. */ diff --git a/src/lj_gc.h b/src/lj_gc.h index 192066d3..0dbb9b82 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h @@ -43,8 +43,8 @@ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all); LJ_FUNC void lj_gc_finalizeudata(lua_State *L); LJ_FUNC void lj_gc_freeall(global_State *g); LJ_FUNCA int lj_gc_step(lua_State *L); -LJ_FUNCA void lj_gc_step_fixtop(lua_State *L); -LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps); +LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L); +LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps); LJ_FUNC void lj_gc_fullgc(lua_State *L); /* GC check: drive collector forward if the GC threshold has been reached. */ @@ -58,7 +58,7 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L); /* Write barriers. */ LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t); LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); -LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v); +LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv); LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T); diff --git a/src/lj_ir.c b/src/lj_ir.c index 1efb12f0..cf0b6b55 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -6,16 +6,22 @@ #define lj_ir_c #define LUA_CORE +/* For pointers to libc/libm functions. */ +#include <stdio.h> +#include <math.h> + #include "lj_obj.h" #if LJ_HASJIT #include "lj_gc.h" #include "lj_str.h" +#include "lj_tab.h" #include "lj_ir.h" #include "lj_jit.h" #include "lj_iropt.h" #include "lj_trace.h" +#include "lj_lib.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -32,6 +38,17 @@ IRDEF(IRMODE) 0 }; +/* C call info for CALL* instructions. */ +LJ_DATADEF const CCallInfo lj_ir_callinfo[] = { +#define IRCALLCI(name, nargs, kind, type, flags) \ + { (ASMFunction)name, \ + (nargs)|(CCI_CALL_##kind)|(IRT_##type<<CCI_OTSHIFT)|(flags) }, +IRCALLDEF(IRCALLCI) +#undef IRCALLCI + { NULL, 0 } +}; + + /* -- IR emitter ---------------------------------------------------------- */ /* Grow IR buffer at the top. */ @@ -92,6 +109,25 @@ TRef LJ_FASTCALL lj_ir_emit(jit_State *J) return TREF(ref, irt_t((ir->t = fins->t))); } +/* Emit call to a C function. */ +TRef lj_ir_call(jit_State *J, IRCallID id, ...) +{ + const CCallInfo *ci = &lj_ir_callinfo[id]; + uint32_t n = CCI_NARGS(ci); + TRef tr = TREF_NIL; + va_list argp; + va_start(argp, id); + if ((ci->flags & CCI_L)) n--; + if (n > 0) + tr = va_arg(argp, IRRef); + while (n-- > 1) + tr = emitir(IRT(IR_CARG, IRT_NIL), tr, va_arg(argp, IRRef)); + va_end(argp); + if (CCI_OP(ci) == IR_CALLS) + J->needsnap = 1; /* Need snapshot after call with side effect. */ + return emitir(CCI_OPTYPE(ci), tr, id); +} + /* -- Interning of constants ---------------------------------------------- */ /* diff --git a/src/lj_ir.h b/src/lj_ir.h index a6973a81..9a7e711d 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -8,6 +8,8 @@ #include "lj_obj.h" +/* -- IR instructions ----------------------------------------------------- */ + /* IR instruction definition. Order matters, see below. */ #define IRDEF(_) \ /* Miscellaneous ops. */ \ @@ -101,13 +103,12 @@ _(USTORE, S , ref, ref) \ _(FSTORE, S , ref, ref) \ \ - /* String ops. */ \ - _(SNEW, N , ref, ref) \ - \ - /* Table ops. */ \ + /* Allocations. */ \ + _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \ _(TNEW, A , lit, lit) \ _(TDUP, A , ref, ___) \ - _(TLEN, L , ref, ___) \ + \ + /* Write barriers. */ \ _(TBAR, S , ref, ___) \ _(OBAR, S , ref, ref) \ \ @@ -118,6 +119,12 @@ _(TOSTR, N , ref, ___) \ _(STRTO, G , ref, ___) \ \ + /* Calls. */ \ + _(CALLN, N , ref, lit) \ + _(CALLL, L , ref, lit) \ + _(CALLS, S , ref, lit) \ + _(CARG, N , ref, ref) \ + \ /* End of list. */ /* IR opcodes (max. 256). */ @@ -144,6 +151,8 @@ LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE); LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); +/* -- Named IR literals --------------------------------------------------- */ + /* FPMATH sub-functions. ORDER FPM. */ #define IRFPMDEF(_) \ _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ @@ -158,20 +167,22 @@ IRFPMDEF(FPMENUM) IRFPM__MAX } IRFPMathOp; -/* FLOAD field IDs. */ +/* FLOAD fields. */ #define IRFLDEF(_) \ - _(STR_LEN, GCstr, len) \ - _(FUNC_ENV, GCfunc, l.env) \ - _(TAB_META, GCtab, metatable) \ - _(TAB_ARRAY, GCtab, array) \ - _(TAB_NODE, GCtab, node) \ - _(TAB_ASIZE, GCtab, asize) \ - _(TAB_HMASK, GCtab, hmask) \ - _(TAB_NOMM, GCtab, nomm) \ - _(UDATA_META, GCudata, metatable) + _(STR_LEN, offsetof(GCstr, len)) \ + _(FUNC_ENV, offsetof(GCfunc, l.env)) \ + _(TAB_META, offsetof(GCtab, metatable)) \ + _(TAB_ARRAY, offsetof(GCtab, array)) \ + _(TAB_NODE, offsetof(GCtab, node)) \ + _(TAB_ASIZE, offsetof(GCtab, asize)) \ + _(TAB_HMASK, offsetof(GCtab, hmask)) \ + _(TAB_NOMM, offsetof(GCtab, nomm)) \ + _(UDATA_META, offsetof(GCudata, metatable)) \ + _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ + _(UDATA_FILE, sizeof(GCudata)) typedef enum { -#define FLENUM(name, type, field) IRFL_##name, +#define FLENUM(name, ofs) IRFL_##name, IRFLDEF(FLENUM) #undef FLENUM IRFL__MAX @@ -183,7 +194,8 @@ IRFLDEF(FLENUM) #define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ /* XLOAD mode, stored in op2. */ -#define IRXLOAD_UNALIGNED 1 +#define IRXLOAD_READONLY 1 /* Load from read-only data. */ +#define IRXLOAD_UNALIGNED 2 /* Unaligned load. */ /* TOINT mode, stored in op2. Ordered by strength of the checks. */ #define IRTOINT_CHECK 0 /* Number checked for integerness. */ @@ -191,6 +203,67 @@ IRFLDEF(FLENUM) #define IRTOINT_ANY 2 /* Any FP number is ok. */ #define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */ +/* C call info for CALL* instructions. */ +typedef struct CCallInfo { + ASMFunction func; /* Function pointer. */ + uint32_t flags; /* Number of arguments and flags. */ +} CCallInfo; + +#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ +#define CCI_NARGS_MAX 16 /* Max. # of args. */ + +#define CCI_OTSHIFT 16 +#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ +#define CCI_OPSHIFT 24 +#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ + +#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) +#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) +#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) +#define CCI_CALL_FN (CCI_CALL_N|CCI_FASTCALL) +#define CCI_CALL_FL (CCI_CALL_L|CCI_FASTCALL) +#define CCI_CALL_FS (CCI_CALL_S|CCI_FASTCALL) + +/* C call info flags. */ +#define CCI_L 0x0100 /* Implicit L arg. */ +#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ +#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ +#define CCI_FASTCALL 0x0800 /* Fastcall convention. */ + +/* Function definitions for CALL* instructions. */ +#define IRCALLDEF(_) \ + _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ + _(lj_str_new, 3, S, STR, CCI_L) \ + _(lj_str_tonum, 2, FN, INT, 0) \ + _(lj_str_fromint, 2, FN, STR, CCI_L) \ + _(lj_str_fromnum, 2, FN, STR, CCI_L) \ + _(lj_tab_new1, 2, FS, TAB, CCI_L) \ + _(lj_tab_dup, 2, FS, TAB, CCI_L) \ + _(lj_tab_newkey, 3, S, PTR, CCI_L) \ + _(lj_tab_len, 1, FL, INT, 0) \ + _(lj_gc_step_jit, 2, FS, NIL, CCI_L) \ + _(lj_gc_barrieruv, 2, FS, NIL, 0) \ + _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ + _(sinh, 1, N, NUM, 0) \ + _(cosh, 1, N, NUM, 0) \ + _(tanh, 1, N, NUM, 0) \ + _(fputc, 2, S, INT, 0) \ + _(fwrite, 4, S, INT, 0) \ + _(fflush, 1, S, INT, 0) \ + \ + /* End of list. */ + +typedef enum { +#define IRCALLENUM(name, nargs, kind, type, flags) IRCALL_##name, +IRCALLDEF(IRCALLENUM) +#undef IRCALLENUM + IRCALL__MAX +} IRCallID; + +LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; + +/* -- IR operands --------------------------------------------------------- */ + /* IR operand mode (2 bit). */ typedef enum { IRMref, /* IR reference. */ @@ -227,6 +300,8 @@ typedef enum { LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; +/* -- IR instruction types ------------------------------------------------ */ + /* IR result type and flags (8 bit). */ typedef enum { /* Map of itypes to non-negative numbers. ORDER LJ_T */ @@ -314,6 +389,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1; /* Stored combined IR opcode and type. */ typedef uint16_t IROpT; +/* -- IR references ------------------------------------------------------- */ + /* IR references. */ typedef uint16_t IRRef1; /* One stored reference. */ typedef uint32_t IRRef2; /* Two stored references. */ @@ -382,6 +459,8 @@ typedef uint32_t TRef; #define TREF_FALSE (TREF_PRI(IRT_FALSE)) #define TREF_TRUE (TREF_PRI(IRT_TRUE)) +/* -- IR format ----------------------------------------------------------- */ + /* IR instruction format (64 bit). ** ** 16 16 8 8 8 8 @@ -425,5 +504,6 @@ typedef union IRIns { #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) #define ir_knum(ir) (mref((ir)->ptr, cTValue)) +#define ir_kptr(ir) (mref((ir)->ptr, void)) #endif diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 69b0a955..52077ad5 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -6,6 +6,8 @@ #ifndef _LJ_IROPT_H #define _LJ_IROPT_H +#include <stdarg.h> + #include "lj_obj.h" #include "lj_jit.h" @@ -13,6 +15,7 @@ /* IR emitter. */ LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); +LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...); /* Save current IR in J->fold.ins, but do not emit it (yet). */ static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) @@ -83,6 +86,7 @@ LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref); /* Emit IR instructions with on-the-fly optimizations. */ LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim); /* Special return values for the fold functions. */ enum { @@ -106,7 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); /* Dead-store elimination. */ diff --git a/src/lj_lib.c b/src/lj_lib.c index 683c66d6..d8254093 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -152,7 +152,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg) { TValue *o = L->base + narg-1; if (!(o < L->top && - (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))))) + (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))))) lj_err_argt(L, narg, LUA_TNUMBER); return numV(o); } diff --git a/src/lj_lib.h b/src/lj_lib.h index 59a0f2be..a7a6317e 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h @@ -90,4 +90,9 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, #define LIBINIT_FFID 0xfe #define LIBINIT_END 0xff +/* Exported library functions. */ + +typedef struct RandomState RandomState; +LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); + #endif diff --git a/src/lj_meta.c b/src/lj_meta.c index dff01f85..1182d908 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -60,7 +60,7 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm) else if (tvisudata(o)) mt = tabref(udataV(o)->metatable); else - mt = tabref(G(L)->basemt[itypemap(o)]); + mt = tabref(basemt_obj(G(L), o)); if (mt) { cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm])); if (mo) @@ -157,7 +157,7 @@ static cTValue *str2num(cTValue *o, TValue *n) { if (tvisnum(o)) return o; - else if (tvisstr(o) && lj_str_numconv(strVdata(o), n)) + else if (tvisstr(o) && lj_str_tonum(strV(o), n)) return n; else return NULL; @@ -295,7 +295,7 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) top = curr_top(L); setcont(top, ne ? lj_cont_condf : lj_cont_condt); copyTV(L, top+1, mo); - it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA; + it = ~o1->gch.gct; setgcV(L, top+2, &o1->gch, it); setgcV(L, top+3, &o2->gch, it); return top+2; /* Trigger metamethod call. */ diff --git a/src/lj_obj.h b/src/lj_obj.h index 9101f053..cebeda9b 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -315,7 +315,7 @@ typedef struct GCstr { /* Userdata object. Payload follows. */ typedef struct GCudata { GCHeader; - uint8_t unused1; + uint8_t udtype; /* Userdata type. */ uint8_t unused2; GCRef env; /* Should be at same offset in GCfunc. */ MSize len; /* Size of payload. */ @@ -323,6 +323,13 @@ typedef struct GCudata { uint32_t align1; /* To force 8 byte alignment of the payload. */ } GCudata; +/* Userdata types. */ +enum { + UDTYPE_USERDATA, /* Regular userdata. */ + UDTYPE_IO_FILE, /* I/O library FILE. */ + UDTYPE__MAX +}; + #define uddata(u) ((void *)((u)+1)) #define sizeudata(u) (sizeof(struct GCudata)+(u)->len) @@ -496,7 +503,17 @@ MMDEF(MMENUM) MM_FAST = MM_eq } MMS; -#define BASEMT_MAX ((~LJ_TNUMX)+1) +/* GC root IDs. */ +typedef enum { + GCROOT_BASEMT, /* Metatables for base types. */ + GCROOT_BASEMT_NUM = ~LJ_TNUMX, /* Last base metatable. */ + GCROOT_IO_INPUT, /* Userdata for default I/O input file. */ + GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */ + GCROOT__MAX +} GCRootID; + +#define basemt_it(g, it) ((g)->gcroot[GCROOT_BASEMT+~(it)]) +#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) typedef struct GCState { MSize total; /* Memory currently allocated. */ @@ -544,7 +561,7 @@ typedef struct global_State { volatile int32_t vmstate; /* VM state or current JIT code trace number. */ GCRef jit_L; /* Current JIT code lua_State or NULL. */ MRef jit_base; /* Current JIT code L->base. */ - GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */ + GCRef gcroot[GCROOT__MAX]; /* GC roots. */ GCRef mmname[MM_MAX]; /* Array holding metamethod names. */ } global_State; diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 2102561d..98266d21 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -282,21 +282,50 @@ LJFOLD(STRTO KGC) LJFOLDF(kfold_strto) { TValue n; - if (lj_str_numconv(strdata(ir_kstr(fleft)), &n)) + if (lj_str_tonum(ir_kstr(fleft), &n)) return lj_ir_knum(J, numV(&n)); return FAILFOLD; } -LJFOLD(SNEW STRREF KINT) -LJFOLDF(kfold_snew) +LJFOLD(SNEW KPTR KINT) +LJFOLDF(kfold_snew_kptr) +{ + GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i); + return lj_ir_kstr(J, s); +} + +LJFOLD(SNEW any KINT) +LJFOLDF(kfold_snew_empty) { if (fright->i == 0) return lj_ir_kstr(J, lj_str_new(J->L, "", 0)); + return NEXTFOLD; +} + +LJFOLD(STRREF KGC KINT) +LJFOLDF(kfold_strref) +{ + GCstr *str = ir_kstr(fleft); + lua_assert((MSize)fright->i < str->len); + return lj_ir_kptr(J, (char *)strdata(str) + fright->i); +} + +LJFOLD(STRREF SNEW any) +LJFOLDF(kfold_strref_snew) +{ PHIBARRIER(fleft); - if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { - const char *s = strdata(ir_kstr(IR(fleft->op1))); - int32_t ofs = IR(fleft->op2)->i; - return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i)); + if (irref_isk(fins->op2) && fright->i == 0) { + return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */ + } else { + /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */ + IRIns *ir = IR(fleft->op1); + IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */ + lua_assert(ir->o == IR_STRREF); + PHIBARRIER(ir); + fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ + fins->op1 = str; + fins->ot = IRT(IR_STRREF, IRT_PTR); + return RETRYFOLD; } return NEXTFOLD; } @@ -343,16 +372,13 @@ LJFOLDF(kfold_intcomp) } } -LJFOLD(LT KGC KGC) -LJFOLD(GE KGC KGC) -LJFOLD(LE KGC KGC) -LJFOLD(GT KGC KGC) -LJFOLDF(kfold_strcomp) +LJFOLD(CALLN CARG IRCALL_lj_str_cmp) +LJFOLDF(kfold_strcmp) { - if (irt_isstr(fins->t)) { - GCstr *a = ir_kstr(fleft); - GCstr *b = ir_kstr(fright); - return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o)); + if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { + GCstr *a = ir_kstr(IR(fleft->op1)); + GCstr *b = ir_kstr(IR(fleft->op2)); + return INTFOLD(lj_str_cmp(a, b)); } return NEXTFOLD; } @@ -1070,7 +1096,8 @@ LJFOLDF(merge_eqne_snew_kgc) uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) : len == 2 ? IRT(IR_XLOAD, IRT_U16) : IRTI(IR_XLOAD)); - TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0); + TRef tmp = emitir(ot, strref, + IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0)); TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr))); if (len == 3) tmp = emitir(IRTI(IR_BAND), tmp, @@ -1103,8 +1130,8 @@ LJFOLDX(lj_opt_fwd_hload) LJFOLD(ULOAD any) LJFOLDX(lj_opt_fwd_uload) -LJFOLD(TLEN any) -LJFOLDX(lj_opt_fwd_tlen) +LJFOLD(CALLL any IRCALL_lj_tab_len) +LJFOLDX(lj_opt_fwd_tab_len) /* Upvalue refs are really loads, but there are no corresponding stores. ** So CSE is ok for them, except for UREFO across a GC step (see below). @@ -1194,13 +1221,23 @@ LJFOLDF(fload_tab_ah) /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ LJFOLD(FLOAD KGC IRFL_STR_LEN) -LJFOLDF(fload_str_len) +LJFOLDF(fload_str_len_kgc) { if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) return INTFOLD((int32_t)ir_kstr(fleft)->len); return NEXTFOLD; } +LJFOLD(FLOAD SNEW IRFL_STR_LEN) +LJFOLDF(fload_str_len_snew) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { + PHIBARRIER(fleft); + return fleft->op2; + } + return NEXTFOLD; +} + LJFOLD(FLOAD any IRFL_STR_LEN) LJFOLDX(lj_opt_cse) @@ -1216,20 +1253,28 @@ LJFOLDF(fwd_sload) return J->slot[fins->op1]; } -/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */ -LJFOLD(XLOAD STRREF any) -LJFOLDF(xload_str) +LJFOLD(XLOAD KPTR any) +LJFOLDF(xload_kptr) { - if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { - GCstr *str = ir_kstr(IR(fleft->op1)); - int32_t ofs = IR(fleft->op2)->i; - lua_assert((MSize)ofs < str->len); - lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len); - return INTFOLD(kfold_xload(fins, strdata(str)+ofs)); + /* Only fold read-only integer loads for now. */ + if ((fins->op2 & IRXLOAD_READONLY) && irt_isinteger(fins->t)) + return INTFOLD(kfold_xload(fins, ir_kptr(fleft))); + return NEXTFOLD; +} + +/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ +LJFOLD(XLOAD any any) +LJFOLDF(fwd_xload) +{ + IRRef ref = J->chain[IR_XLOAD]; + IRRef op1 = fins->op1; + while (ref > op1) { + if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t)) + return ref; + ref = IR(ref)->prev; } - return CSEFOLD; + return EMITFOLD; } -/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */ /* -- Write barriers ------------------------------------------------------ */ @@ -1279,12 +1324,11 @@ LJFOLD(FSTORE any any) LJFOLDX(lj_opt_dse_fstore) LJFOLD(NEWREF any any) /* Treated like a store. */ +LJFOLD(CALLS any any) +LJFOLD(CALLL any any) /* Safeguard fallback. */ LJFOLD(TNEW any any) LJFOLD(TDUP any) -LJFOLDF(store_raw) -{ - return EMITFOLD; -} +LJFOLDX(lj_ir_emit) /* ------------------------------------------------------------------------ */ @@ -1402,6 +1446,19 @@ TRef LJ_FASTCALL lj_opt_cse(jit_State *J) } } +/* CSE with explicit search limit. */ +TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim) +{ + IRRef ref = J->chain[fins->o]; + IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); + while (ref > lim) { + if (IR(ref)->op12 == op12) + return ref; + ref = IR(ref)->prev; + } + return lj_ir_emit(J); +} + /* ------------------------------------------------------------------------ */ #undef IR diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index f9a2a808..90ab1b6f 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -310,7 +310,13 @@ static void loop_unroll(jit_State *J) /* Undo any partial changes made by the loop optimization. */ static void loop_undo(jit_State *J, IRRef ins) { + ptrdiff_t i; lj_ir_rollback(J, ins); + for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ + BPropEntry *bp = &J->bpropcache[i]; + if (bp->val >= ins) + bp->key = 0; + } for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */ IRIns *ir = IR(ins); irt_clearphi(ir->t); diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 94fc4ad8..882ba6c5 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -307,14 +307,7 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) conflict: /* Try to find a matching load. Below the conflicting store, if any. */ - ref = J->chain[IR_ULOAD]; - while (ref > lim) { - IRIns *load = IR(ref); - if (load->op1 == uref) - return ref; /* Load forwarding. */ - ref = load->prev; - } - return EMITFOLD; /* Conflict or no match. */ + return lj_opt_cselim(J, lim); } /* USTORE elimination. */ @@ -405,14 +398,7 @@ TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J) conflict: /* Try to find a matching load. Below the conflicting store, if any. */ - ref = J->chain[IR_FLOAD]; - while (ref > lim) { - IRIns *load = IR(ref); - if (load->op1 == oref && load->op2 == fid) - return ref; /* Load forwarding. */ - ref = load->prev; - } - return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ + return lj_opt_cselim(J, lim); } /* FSTORE elimination. */ @@ -458,10 +444,10 @@ doemit: return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } -/* -- TLEN forwarding ----------------------------------------------------- */ +/* -- Forwarding of lj_tab_len -------------------------------------------- */ /* This is rather simplistic right now, but better than nothing. */ -TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) +TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) { IRRef tab = fins->op1; /* Table reference. */ IRRef lim = tab; /* Search limit. */ @@ -484,14 +470,7 @@ TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) } /* Try to find a matching load. Below the conflicting store, if any. */ - ref = J->chain[IR_TLEN]; - while (ref > lim) { - IRIns *tlen = IR(ref); - if (tlen->op1 == tab) - return ref; /* Load forwarding. */ - ref = tlen->prev; - } - return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ + return lj_opt_cselim(J, lim); } /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 60a6afb8..b9107c5e 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -370,7 +370,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) { lua_Number n; - if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc)) + if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) lj_trace_err(J, LJ_TRERR_BADTYPE); n = numV(vc); /* Limit narrowing for pow to small exponents (or for two constants). */ diff --git a/src/lj_parse.c b/src/lj_parse.c index 000772fe..1de07e92 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -317,6 +317,7 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len) GCstr *s = lj_str_new(L, str, len); TValue *tv = lj_tab_setstr(L, ls->fs->kt, s); if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ + lj_gc_check(L); return s; } diff --git a/src/lj_record.c b/src/lj_record.c index 68a233b9..9b223ff6 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -441,7 +441,7 @@ static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); } else { /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ - mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]); + mt = tabref(basemt_obj(J2G(J), &ix->tabv)); if (mt == NULL) return 0; /* No metamethod. */ mix.tab = lj_ir_ktab(J, mt); @@ -855,7 +855,7 @@ typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd); /* Get runtime value of int argument. */ static int32_t argv2int(jit_State *J, TValue *o) { - if (tvisstr(o) && !lj_str_numconv(strVdata(o), o)) + if (tvisstr(o) && !lj_str_tonum(strV(o), o)) lj_trace_err(J, LJ_TRERR_BADTYPE); return lj_num2bit(numV(o)); } @@ -1017,6 +1017,8 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd) /* Otherwise res[0] already contains the result. */ } else if (tref_isnumber(tr)) { res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); + } else if (tref_ispri(tr)) { + res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[tref_type(tr)])); } else { recff_err_nyi(J, rd); } @@ -1165,10 +1167,16 @@ static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd) res[0] = emitir(IRTN(IR_ATAN2), y, x); } +static void recff_math_htrig(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef tr = lj_ir_tonum(J, arg[0]); + res[0] = lj_ir_call(J, rd->data, tr); +} + static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd) { TRef tr = arg[0]; - if (tref_isinteger(arg[0])) { + if (tref_isinteger(tr)) { res[0] = tr; res[1] = lj_ir_kint(J, 0); } else { @@ -1187,9 +1195,10 @@ static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd) static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd) { + TRef tr = lj_ir_tonum(J, arg[0]); if (!tref_isnumber_str(arg[1])) lj_trace_err(J, LJ_TRERR_BADTYPE); - res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]); + res[0] = lj_opt_narrow_pow(J, tr, arg[1], &rd->argv[1]); UNUSED(rd); } @@ -1203,6 +1212,32 @@ static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd) res[0] = tr; } +static void recff_math_random(jit_State *J, TRef *res, RecordFFData *rd) +{ + GCudata *ud = udataV(&rd->fn->c.upvalue[0]); + TRef tr, one; + lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ + tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); + one = lj_ir_knum_one(J); + tr = emitir(IRTN(IR_SUB), tr, one); + if (arg[0]) { + TRef tr1 = lj_ir_tonum(J, arg[0]); + if (arg[1]) { /* d = floor(d*(r2-r1+1.0)) + r1 */ + TRef tr2 = lj_ir_tonum(J, arg[1]); + tr2 = emitir(IRTN(IR_SUB), tr2, tr1); + tr2 = emitir(IRTN(IR_ADD), tr2, one); + tr = emitir(IRTN(IR_MUL), tr, tr2); + tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR); + tr = emitir(IRTN(IR_ADD), tr, tr1); + } else { /* d = floor(d*r1) + 1.0 */ + tr = emitir(IRTN(IR_MUL), tr, tr1); + tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR); + tr = emitir(IRTN(IR_ADD), tr, one); + } + } + res[0] = tr; +} + /* -- Bit library fast functions ------------------------------------------ */ /* Record unary bit.tobit, bit.bnot, bit.bswap. */ @@ -1321,7 +1356,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd) for (i = 0; i < len; i++) { TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i)); tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp); - res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0); + res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); } } else { /* Empty range or range underflow: return no results. */ emitir(IRTGI(IR_LE), trend, trstart); @@ -1335,7 +1370,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd) static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd) { if (tref_istab(arg[0])) { - res[0] = emitir(IRTI(IR_TLEN), arg[0], 0); + res[0] = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); } /* else: Interpreter will throw. */ UNUSED(rd); } @@ -1344,7 +1379,7 @@ static void recff_table_remove(jit_State *J, TRef *res, RecordFFData *rd) { if (tref_istab(arg[0])) { if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */ - TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0); + TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); GCtab *t = tabV(&rd->argv[0]); MSize len = lj_tab_len(t); emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); @@ -1376,7 +1411,7 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd) rd->nres = 0; if (tref_istab(arg[0]) && arg[1]) { if (!arg[2]) { /* Simple push: t[#t+1] = v */ - TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0); + TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); GCtab *t = tabV(&rd->argv[0]); RecordIndex ix; ix.tab = arg[0]; @@ -1392,6 +1427,62 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd) } /* else: Interpreter will throw. */ } +/* -- I/O library fast functions ------------------------------------------ */ + +/* Get FILE* for I/O function. Any I/O error aborts recording, so there's +** no need to encode the alternate cases for any of the guards. +*/ +static TRef recff_io_fp(jit_State *J, TRef *res, uint32_t id) +{ + TRef tr, ud, fp; + if (id) { /* io.func() */ + tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); + ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); + } else { /* fp:method() */ + ud = arg[0]; + if (!tref_isudata(ud)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + tr = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE); + emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); + } + fp = emitir(IRT(IR_FLOAD, IRT_LIGHTUD), ud, IRFL_UDATA_FILE); + emitir(IRTG(IR_NE, IRT_LIGHTUD), fp, lj_ir_knull(J, IRT_LIGHTUD)); + return fp; +} + +static void recff_io_write(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef fp = recff_io_fp(J, res, rd->data); + TRef zero = lj_ir_kint(J, 0); + TRef one = lj_ir_kint(J, 1); + ptrdiff_t i = rd->data == 0 ? 1 : 0; + for (; arg[i]; i++) { + TRef str = lj_ir_tostr(J, arg[i]); + TRef buf = emitir(IRT(IR_STRREF, IRT_PTR), str, zero); + TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); + if (tref_isk(len) && IR(tref_ref(len))->i == 1) { + TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); + tr = lj_ir_call(J, IRCALL_fputc, tr, fp); + if (rd->cres != 0) /* Check result only if requested. */ + emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); + } else { + TRef tr = lj_ir_call(J, IRCALL_fwrite, buf, one, len, fp); + if (rd->cres != 0) /* Check result only if requested. */ + emitir(IRTGI(IR_EQ), tr, len); + } + } + res[0] = TREF_TRUE; +} + +static void recff_io_flush(jit_State *J, TRef *res, RecordFFData *rd) +{ + TRef fp = recff_io_fp(J, res, rd->data); + TRef tr = lj_ir_call(J, IRCALL_fflush, fp); + if (rd->cres != 0) /* Check result only if requested. */ + emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0)); + res[0] = TREF_TRUE; +} + /* -- Record calls and returns -------------------------------------------- */ #undef arg @@ -1696,6 +1787,9 @@ void lj_record_ins(jit_State *J) if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; } else if (ta == IRT_STR) { if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; + ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); + rc = lj_ir_kint(J, 0); + ta = IRT_INT; } else { rec_mm_comp(J, &ix, (int)op); break; @@ -1745,7 +1839,7 @@ void lj_record_ins(jit_State *J) if (tref_isstr(rc)) { rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); } else if (tref_istab(rc)) { - rc = emitir(IRTI(IR_TLEN), rc, 0); + rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); } else { ix.tab = rc; copyTV(J->L, &ix.tabv, &ix.keyv); @@ -1879,8 +1973,6 @@ void lj_record_ins(jit_State *J) /* fallthrough */ case BC_CALL: callop: - if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */ - } rec_call(J, ra, (int)(rb-1), (int)(rc-1)); break; @@ -2064,8 +2156,11 @@ static void rec_setup_side(jit_State *J, Trace *T) BCReg j; for (j = 0; j < s; j++) if (snap_ref(map[j]) == ref) { - if (ir->o == IR_FRAME && irt_isfunc(ir->t)) + if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { + lua_assert(s != 0); J->baseslot = s+1; + J->framedepth++; + } tr = J->slot[j]; goto dupslot; } @@ -2078,8 +2173,10 @@ static void rec_setup_side(jit_State *J, Trace *T) case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ if (irt_isfunc(ir->t)) { - J->baseslot = s+1; - J->framedepth++; + if (s != 0) { + J->baseslot = s+1; + J->framedepth++; + } tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); } else { diff --git a/src/lj_snap.c b/src/lj_snap.c index 09cd095c..d27404f2 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -251,9 +251,9 @@ void lj_snap_restore(jit_State *J, void *exptr) GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); if (isluafunc(fn)) { TValue *fs; - newbase = o+1; - fs = newbase + funcproto(fn)->framesize; + fs = o+1 + funcproto(fn)->framesize; if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ + if (s != 0) newbase = o+1; } } } @@ -262,21 +262,17 @@ void lj_snap_restore(jit_State *J, void *exptr) setnilV(o); /* Clear unreferenced slots of newly added frames. */ } } - if (newbase) { /* Clear remainder of newly added frames. */ - L->base = newbase; - if (ntop >= L->maxstack) { /* Need to grow the stack again. */ - MSize need = (MSize)(ntop - o); - L->top = o; - lj_state_growstack(L, need); - o = L->top; - ntop = o + need; - } - L->top = curr_topL(L); - for (; o < ntop; o++) - setnilV(o); - } else { /* Must not clear slots of existing frame. */ - L->top = curr_topL(L); + if (newbase) L->base = newbase; + if (ntop >= L->maxstack) { /* Need to grow the stack again. */ + MSize need = (MSize)(ntop - o); + L->top = o; + lj_state_growstack(L, need); + o = L->top; + ntop = o + need; } + L->top = curr_topL(L); + for (; o < ntop; o++) /* Clear remainder of newly added frames. */ + setnilV(o); lua_assert(map + nslots == flinks-1); J->pc = (const BCIns *)(uintptr_t)(*--flinks); } diff --git a/src/lj_state.h b/src/lj_state.h index 54e85405..4e4185c0 100644 --- a/src/lj_state.h +++ b/src/lj_state.h @@ -17,7 +17,7 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L); LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); -LJ_FUNCA void lj_state_growstack1(lua_State *L); +LJ_FUNC void lj_state_growstack1(lua_State *L); static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) { diff --git a/src/lj_str.c b/src/lj_str.c index 26f91cba..62322b59 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -21,7 +21,7 @@ /* -- String interning ---------------------------------------------------- */ /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ -int32_t lj_str_cmp(GCstr *a, GCstr *b) +int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) { MSize i, n = a->len > b->len ? b->len : a->len; for (i = 0; i < n; i += 4) { @@ -119,8 +119,14 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) /* -- Type conversions ---------------------------------------------------- */ +/* Convert string object to number. */ +int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n) +{ + return lj_str_numconv(strdata(str), n); +} + /* Convert string to number. */ -int lj_str_numconv(const char *s, TValue *n) +int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n) { lua_Number sign = 1; const uint8_t *p = (const uint8_t *)s; @@ -167,7 +173,7 @@ parsedbl: } /* Convert number to string. */ -GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) +GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) { char s[LUAI_MAXNUMBER2STR]; lua_Number n = *np; @@ -176,7 +182,7 @@ GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) } /* Convert integer to string. */ -GCstr *lj_str_fromint(lua_State *L, int32_t k) +GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) { char s[1+10]; char *p = s+sizeof(s); diff --git a/src/lj_str.h b/src/lj_str.h index f7e56d16..e8b242c0 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -11,7 +11,7 @@ #include "lj_obj.h" /* String interning. */ -LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b); +LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); @@ -20,9 +20,10 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) /* Type conversions. */ -LJ_FUNCA int lj_str_numconv(const char *s, TValue *n); -LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np); -LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k); +LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n); +LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n); +LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np); +LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k); /* String formatting. */ LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); diff --git a/src/lj_tab.c b/src/lj_tab.c index 9af51027..ceafb770 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -160,8 +160,16 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits) return t; } +GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) +{ + GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24); + clearapart(t); + if (t->hmask > 0) clearhpart(t); + return t; +} + /* Duplicate a table. */ -GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) +GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) { GCtab *t; uint32_t asize, hmask; @@ -334,8 +342,8 @@ static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray) static uint32_t bestasize(uint32_t bins[], uint32_t *narray) { uint32_t b, sum, na = 0, sz = 0, nn = *narray; - for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++) - if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) { + for (b = 0, sum = 0; 2*nn > (1u<<b) && sum != nn; b++) + if (bins[b] > 0 && 2*(sum += bins[b]) > (1u<<b)) { sz = (2u<<b)+1; na = sum; } @@ -599,7 +607,7 @@ static MSize unbound_search(GCtab *t, MSize j) ** Try to find a boundary in table `t'. A `boundary' is an integer index ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). */ -MSize lj_tab_len(GCtab *t) +MSize LJ_FASTCALL lj_tab_len(GCtab *t) { MSize j = (MSize)t->asize; if (j > 1 && tvisnil(arrayslot(t, j-1))) { diff --git a/src/lj_tab.h b/src/lj_tab.h index e9e8bcd1..b2a8c3aa 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h @@ -11,7 +11,8 @@ #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); -LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt); +LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); +LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); @@ -36,6 +37,6 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); -LJ_FUNCA MSize lj_tab_len(GCtab *t); +LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); #endif diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 3ee4fa00..2fb3c4b8 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -32,6 +32,11 @@ enum { /* Calling conventions. */ RID_RET = RID_EAX, +#if LJ_64 + RID_FPRET = RID_XMM0, +#else + RID_RETHI = RID_EDX, +#endif /* These definitions must match with the *.dasc file(s): */ RID_BASE = RID_EDX, /* Interpreter BASE. */ @@ -98,8 +103,8 @@ enum { }; /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ -#define sps_scale(slot) (4 * (int32_t)(slot)) -#define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3)) +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3)) /* -- Exit state ---------------------------------------------------------- */ @@ -185,6 +190,7 @@ typedef enum { XO_ARITHib = XO_(80), XO_ARITHi = XO_(81), XO_ARITHi8 = XO_(83), + XO_ARITHiw8 = XO_66(83), XO_SHIFTi = XO_(c1), XO_SHIFT1 = XO_(d1), XO_SHIFTcl = XO_(d3), @@ -216,6 +222,7 @@ typedef enum { XO_CVTSI2SD = XO_f20f(2a), XO_CVTSD2SI = XO_f20f(2d), XO_CVTTSD2SI= XO_f20f(2c), + XO_MOVD = XO_660f(6e), XO_MOVDto = XO_660f(7e), XO_FLDq = XO_(dd), XOg_FLDq = 0, diff --git a/src/lj_udata.c b/src/lj_udata.c index 863889c9..717d483b 100644 --- a/src/lj_udata.c +++ b/src/lj_udata.c @@ -16,6 +16,7 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) global_State *g = G(L); newwhite(g, ud); /* Not finalized. */ ud->gct = ~LJ_TUDATA; + ud->udtype = UDTYPE_USERDATA; ud->len = sz; /* NOBARRIER: The GCudata is new (marked white). */ setgcrefnull(ud->metatable); |