summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-08 20:35:29 +0100
committerMike Pall <mike>2009-12-08 20:35:29 +0100
commit3f1f9e11f4f699ae94182d4cba158092f434a7f6 (patch)
tree88fbb674a21a1d554d4b1ee9d4ef2c5fed6a1d88 /src
parent5287b9326479ea2b7dddd6f642673e58e5a7f354 (diff)
downloadluajit2-3f1f9e11f4f699ae94182d4cba158092f434a7f6.tar.gz
Fast forward to sync public repo.
Compile math.sinh(), math.cosh(), math.tanh() and math.random(). Compile various io.*() functions. Drive the GC forward on string allocations in the parser. Improve KNUM fuse vs. load heuristics. Add abstract C call handling to IR.
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.dep15
-rw-r--r--src/buildvm.c14
-rw-r--r--src/buildvm.h1
-rw-r--r--src/buildvm_asm.c8
-rw-r--r--src/buildvm_fold.c4
-rw-r--r--src/buildvm_peobj.c14
-rw-r--r--src/buildvm_x86.dasc117
-rw-r--r--src/lib_base.c13
-rw-r--r--src/lib_io.c405
-rw-r--r--src/lib_math.c52
-rw-r--r--src/lib_string.c10
-rw-r--r--src/lj_alloc.c6
-rw-r--r--src/lj_api.c26
-rw-r--r--src/lj_asm.c598
-rw-r--r--src/lj_def.h1
-rw-r--r--src/lj_gc.c36
-rw-r--r--src/lj_gc.h6
-rw-r--r--src/lj_ir.c36
-rw-r--r--src/lj_ir.h114
-rw-r--r--src/lj_iropt.h6
-rw-r--r--src/lj_lib.c2
-rw-r--r--src/lj_lib.h5
-rw-r--r--src/lj_meta.c6
-rw-r--r--src/lj_obj.h23
-rw-r--r--src/lj_opt_fold.c127
-rw-r--r--src/lj_opt_loop.c6
-rw-r--r--src/lj_opt_mem.c31
-rw-r--r--src/lj_opt_narrow.c2
-rw-r--r--src/lj_parse.c1
-rw-r--r--src/lj_record.c125
-rw-r--r--src/lj_snap.c28
-rw-r--r--src/lj_state.h2
-rw-r--r--src/lj_str.c14
-rw-r--r--src/lj_str.h9
-rw-r--r--src/lj_tab.c16
-rw-r--r--src/lj_tab.h5
-rw-r--r--src/lj_target_x86.h11
-rw-r--r--src/lj_udata.c1
38 files changed, 1153 insertions, 743 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 1fb81e27..779ee545 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -21,8 +21,9 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h
lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \
- lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \
+ lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h lj_traceerr.h \
+ lj_lib.h lj_libdef.h
lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \
lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \
@@ -45,9 +46,9 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h
lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \
- lj_target.h lj_target_x86.h
+ lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
+ lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_asm.h \
+ lj_vm.h lj_target.h lj_target_x86.h
lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h
lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h
lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
@@ -67,8 +68,8 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \
lj_ir.h lj_dispatch.h
lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
- lj_traceerr.h
+ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
+ lj_bc.h lj_traceerr.h lj_lib.h
lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h
lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
diff --git a/src/buildvm.c b/src/buildvm.c
index b3738db4..4aba39d4 100644
--- a/src/buildvm.c
+++ b/src/buildvm.c
@@ -215,12 +215,19 @@ IRFPMDEF(FPMNAME)
};
const char *const irfield_names[] = {
-#define FLNAME(name, type, field) #name,
+#define FLNAME(name, ofs) #name,
IRFLDEF(FLNAME)
#undef FLNAME
NULL
};
+const char *const ircall_names[] = {
+#define IRCALLNAME(name, nargs, kind, type, flags) #name,
+IRCALLDEF(IRCALLNAME)
+#undef IRCALLNAME
+ NULL
+};
+
static const char *const trace_errors[] = {
#define TREDEF(name, msg) msg,
#include "lj_traceerr.h"
@@ -269,6 +276,11 @@ static void emit_vmdef(BuildCtx *ctx)
}
fprintf(ctx->fp, "}\n\n");
+ fprintf(ctx->fp, "ircall = {\n[0]=");
+ for (i = 0; ircall_names[i]; i++)
+ fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
+ fprintf(ctx->fp, "}\n\n");
+
fprintf(ctx->fp, "traceerr = {\n[0]=");
for (i = 0; trace_errors[i]; i++)
fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
diff --git a/src/buildvm.h b/src/buildvm.h
index e55527fd..53c820ad 100644
--- a/src/buildvm.h
+++ b/src/buildvm.h
@@ -102,5 +102,6 @@ extern const char *const bc_names[];
extern const char *const ir_names[];
extern const char *const irfpm_names[];
extern const char *const irfield_names[];
+extern const char *const ircall_names[];
#endif
diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c
index 5daab13b..31b6f61e 100644
--- a/src/buildvm_asm.c
+++ b/src/buildvm_asm.c
@@ -26,6 +26,14 @@ static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n)
static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r)
{
const char *sym = ctx->extnames[r->sym];
+ const char *p = strchr(sym, '@');
+ char buf[80];
+ if (p) {
+ /* Always strip fastcall suffix. Wrong for (unused) COFF on Win32. */
+ strncpy(buf, sym, p-sym);
+ buf[p-sym] = '\0';
+ sym = buf;
+ }
switch (ctx->mode) {
case BUILD_elfasm:
if (r->type)
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c
index 271118e0..77af3dc5 100644
--- a/src/buildvm_fold.c
+++ b/src/buildvm_fold.c
@@ -107,6 +107,10 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
for (i = 0; irfield_names[i]; i++)
if (!strcmp(irfield_names[i], p+5))
return i;
+ } else if (allowlit && !strncmp(p, "IRCALL_", 7)) {
+ for (i = 0; ircall_names[i]; i++)
+ if (!strcmp(ircall_names[i], p+7))
+ return i;
} else if (allowany && !strcmp("any", p)) {
return 0xff;
} else {
diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c
index 1a8661bf..a24ae727 100644
--- a/src/buildvm_peobj.c
+++ b/src/buildvm_peobj.c
@@ -85,6 +85,7 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */
#define PEOBJ_RELOC_DIR32 0x06
#define PEOBJ_SYM_PREFIX "_"
+#define PEOBJ_SYMF_PREFIX "@"
#elif LJ_TARGET_X64
#define PEOBJ_ARCH_TARGET 0x8664
#define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */
@@ -260,7 +261,18 @@ void emit_peobj(BuildCtx *ctx)
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT);
for (i = 0; ctx->extnames[i]; i++) {
- sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]);
+ const char *sym = ctx->extnames[i];
+ const char *p = strchr(sym, '@');
+ if (p) {
+#ifdef PEOBJ_SYMF_PREFIX
+ sprintf(name, PEOBJ_SYMF_PREFIX "%s", sym);
+#else
+ strncpy(name, sym, p-sym);
+ name[p-sym] = '\0';
+#endif
+ } else {
+ sprintf(name, PEOBJ_SYM_PREFIX "%s", sym);
+ }
emit_peobj_sym(ctx, name, 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
}
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 09cfa6dc..e857a6be 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -30,6 +30,9 @@
|.define RD, RC
|.define RDL, RCL
|
+|.define FCARG1, ecx // Fastcall arguments.
+|.define FCARG2, edx
+|
|// Type definitions. Some of these are only used for documentation.
|.type L, lua_State
|.type GL, global_State
@@ -1066,7 +1069,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
| mov RB, LJ_TNUMX
|7:
| not RB
- | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)]
+ | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
| jmp <2
|
|.ffunc_2 setmetatable
@@ -1126,17 +1129,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
| jmp ->fff_res1
|3: // Handle numbers inline, unless a number base metatable is present.
| cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
- | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0
+ | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
| jne ->fff_fallback
| ffgccheck // Caveat: uses label 1.
| mov L:RB, SAVE_L
- | mov ARG1, L:RB
- | mov ARG2, RA
| mov L:RB->base, RA // Add frame since C call can throw.
| mov [RA-4], PC
| mov SAVE_PC, PC // Redundant (but a defined value).
| mov ARG3, BASE // Save BASE.
- | call extern lj_str_fromnum // (lua_State *L, lua_Number *np)
+ | mov FCARG2, RA // Caveat: FCARG2 == BASE
+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
+ | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
| // GCstr returned in eax (RC).
| mov RA, L:RB->base
| mov BASE, ARG3
@@ -1762,11 +1765,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
|
|.ffunc_1 table_getn
| cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
- | mov TAB:RB, [RA]
- | mov ARG1, TAB:RB
- | mov RB, RA // Save RA and BASE.
- | mov ARG2, BASE
- | call extern lj_tab_len // (GCtab *t)
+ | mov ARG2, BASE // Save RA and BASE.
+ | mov RB, RA
+ | mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA
+ | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
| // Length of table returned in eax (RC).
| mov ARG1, RC
| mov RA, RB // Restore RA and BASE.
@@ -2512,10 +2514,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| ins_next
|2:
| checktab RD, ->vmeta_len
- | mov TAB:RD, [BASE+RD*8]
- | mov ARG1, TAB:RD
+ | mov TAB:FCARG1, [BASE+RD*8]
| mov RB, BASE // Save BASE.
- | call extern lj_tab_len // (GCtab *t)
+ | call extern lj_tab_len@4 // (GCtab *t)
| // Length of table returned in eax (RC).
| mov ARG1, RC
| mov BASE, RB // Restore BASE.
@@ -2665,66 +2666,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| ins_next
break;
case BC_USETV:
+#define TV2MARKOFS \
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
| ins_AD // RA = upvalue #, RD = src
- | // Really ugly code due to the lack of a 4th free register.
| mov LFUNC:RB, [BASE-8]
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
- | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
- | jnz >4
- |1:
- | mov RA, [BASE+RD*8]
- |2:
+ | cmp byte UPVAL:RB->closed, 0
| mov RB, UPVAL:RB->v
+ | mov RA, [BASE+RD*8]
| mov RD, [BASE+RD*8+4]
| mov [RB], RA
| mov [RB+4], RD
- |3:
+ | jz >1
+ | // Check barrier for closed upvalue.
+ | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
+ | jnz >2
+ |1:
| ins_next
|
- |4: // Upvalue is black. Check if new value is collectable and white.
- | mov RA, [BASE+RD*8+4]
- | sub RA, LJ_TISGCV
- | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
+ |2: // Upvalue is black. Check if new value is collectable and white.
+ | sub RD, LJ_TISGCV
+ | cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
| jbe <1
- | mov GCOBJ:RA, [BASE+RD*8]
| test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
- | jz <2
- | // Crossed a write barrier. So move the barrier forward.
- | mov ARG2, UPVAL:RB
- | mov ARG3, GCOBJ:RA
- | mov RB, UPVAL:RB->v
- | mov RD, [BASE+RD*8+4]
- | mov [RB], GCOBJ:RA
- | mov [RB+4], RD
- |->BC_USETV_Z:
- | mov L:RB, SAVE_L
- | lea GL:RA, [DISPATCH+GG_DISP2G]
- | mov L:RB->base, BASE
- | mov ARG1, GL:RA
- | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v)
- | mov BASE, L:RB->base
- | jmp <3
+ | jz <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
+ | mov BASE, RB // Restore BASE.
+ | jmp <1
break;
+#undef TV2MARKOFS
case BC_USETS:
| ins_AND // RA = upvalue #, RD = str const (~)
| mov LFUNC:RB, [BASE-8]
- | mov GCOBJ:RD, [KBASE+RD*4]
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
- | mov RA, UPVAL:RB->v
- | mov dword [RA+4], LJ_TSTR
- | mov [RA], GCOBJ:RD
+ | mov GCOBJ:RA, [KBASE+RD*4]
+ | mov RD, UPVAL:RB->v
+ | mov [RD], GCOBJ:RA
+ | mov dword [RD+4], LJ_TSTR
| test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
| jnz >2
|1:
| ins_next
|
- |2: // Upvalue is black. Check if string is white.
- | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str)
+ |2: // Check if string is white and ensure upvalue is closed.
+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
| jz <1
- | // Crossed a write barrier. So move the barrier forward.
- | mov ARG3, GCOBJ:RD
- | mov ARG2, UPVAL:RB
- | jmp ->BC_USETV_Z
+ | cmp byte UPVAL:RB->closed, 0
+ | jz <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | mov RB, BASE // Save BASE (FCARG2 == BASE).
+ | mov FCARG2, RD
+ | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
+ | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
+ | mov BASE, RB // Restore BASE.
+ | jmp <1
break;
case BC_USETN:
| ins_AD // RA = upvalue #, RD = num const
@@ -2808,23 +2806,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| mov dword [BASE+RA*8+4], LJ_TTAB
| ins_next
|2:
- | call extern lj_gc_step_fixtop // (lua_State *L)
- | mov ARG1, L:RB // Args owned by callee. Set it again.
+ | mov L:FCARG1, L:RB
+ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
| jmp <1
break;
case BC_TDUP:
| ins_AND // RA = dst, RD = table const (~) (holding template table)
- | mov TAB:RD, [KBASE+RD*4]
| mov L:RB, SAVE_L
- | mov ARG2, TAB:RD
- | mov ARG1, L:RB
| mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
| mov SAVE_PC, PC
| cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
| mov L:RB->base, BASE
| jae >3
|2:
- | call extern lj_tab_dup // (lua_State *L, Table *kt)
+ | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
+ | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
+ | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
| // Table * returned in eax (RC).
| mov BASE, L:RB->base
| movzx RA, PC_RA
@@ -2832,8 +2829,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
| mov dword [BASE+RA*8+4], LJ_TTAB
| ins_next
|3:
- | call extern lj_gc_step_fixtop // (lua_State *L)
- | mov ARG1, L:RB // Args owned by callee. Set it again.
+ | mov L:FCARG1, L:RB
+ | call extern lj_gc_step_fixtop@4 // (lua_State *L)
+ | movzx RD, PC_RD // Need to reload RD.
+ | not RD
| jmp <2
break;
diff --git a/src/lib_base.c b/src/lib_base.c
index 6b9e8eef..821c81b4 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -183,7 +183,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
int32_t base = lj_lib_optint(L, 2, 10);
if (base == 10) {
TValue *o = lj_lib_checkany(L, 1);
- if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) {
+ if (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))) {
setnumV(L->base-1, numV(o));
return FFH_RES(1);
}
@@ -206,6 +206,9 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
return FFH_RES(1);
}
+LJLIB_PUSH("nil")
+LJLIB_PUSH("false")
+LJLIB_PUSH("true")
LJLIB_ASM(tostring) LJLIB_REC(.)
{
TValue *o = lj_lib_checkany(L, 1);
@@ -218,12 +221,8 @@ LJLIB_ASM(tostring) LJLIB_REC(.)
GCstr *s;
if (tvisnum(o)) {
s = lj_str_fromnum(L, &o->n);
- } else if (tvisnil(o)) {
- s = lj_str_newlit(L, "nil");
- } else if (tvisfalse(o)) {
- s = lj_str_newlit(L, "false");
- } else if (tvistrue(o)) {
- s = lj_str_newlit(L, "true");
+ } else if (tvispri(o)) {
+ s = strV(lj_lib_upvalue(L, -itype(o)));
} else {
if (tvisfunc(o) && isffunc(funcV(o)))
lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid);
diff --git a/src/lib_io.c b/src/lib_io.c
index aefe4213..d69b99a4 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -17,14 +17,28 @@
#include "lualib.h"
#include "lj_obj.h"
-#include "lj_err.h"
#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
#include "lj_ff.h"
+#include "lj_trace.h"
#include "lj_lib.h"
-/* Index of standard handles in function environment. */
-#define IO_INPUT 1
-#define IO_OUTPUT 2
+/* Userdata payload for I/O file. */
+typedef struct IOFileUD {
+ FILE *fp; /* File handle. */
+ uint32_t type; /* File type. */
+} IOFileUD;
+
+#define IOFILE_TYPE_FILE 0 /* Regular file. */
+#define IOFILE_TYPE_PIPE 1 /* Pipe. */
+#define IOFILE_TYPE_STDF 2 /* Standard file handle. */
+#define IOFILE_TYPE_MASK 3
+
+#define IOFILE_FLAG_CLOSE 4 /* Close after io.lines() iterator. */
+
+#define IOSTDF_UD(L, id) (&gcref(G(L)->gcroot[(id)])->ud)
+#define IOSTDF_IOF(L, id) ((IOFileUD *)uddata(IOSTDF_UD(L, (id))))
/* -- Error handling ------------------------------------------------------ */
@@ -35,95 +49,102 @@ static int io_pushresult(lua_State *L, int ok, const char *fname)
return 1;
} else {
int en = errno; /* Lua API calls may change this value. */
- lua_pushnil(L);
+ setnilV(L->top++);
if (fname)
lua_pushfstring(L, "%s: %s", fname, strerror(en));
else
lua_pushfstring(L, "%s", strerror(en));
- lua_pushinteger(L, en);
+ setintV(L->top++, en);
+ lj_trace_abort(G(L));
return 3;
}
}
-static void io_file_error(lua_State *L, int arg, const char *fname)
+/* -- Open/close helpers -------------------------------------------------- */
+
+static IOFileUD *io_tofilep(lua_State *L)
{
- lua_pushfstring(L, "%s: %s", fname, strerror(errno));
- luaL_argerror(L, arg, lua_tostring(L, -1));
+ if (!(L->base < L->top && tvisudata(L->base) &&
+ udataV(L->base)->udtype == UDTYPE_IO_FILE))
+ lj_err_argtype(L, 1, "FILE*");
+ return (IOFileUD *)uddata(udataV(L->base));
}
-/* -- Open helpers -------------------------------------------------------- */
-
-#define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE))
-
-static FILE *io_tofile(lua_State *L)
+static IOFileUD *io_tofile(lua_State *L)
{
- FILE **f = io_tofilep(L);
- if (*f == NULL)
+ IOFileUD *iof = io_tofilep(L);
+ if (iof->fp == NULL)
lj_err_caller(L, LJ_ERR_IOCLFL);
- return *f;
+ return iof;
}
-static FILE **io_file_new(lua_State *L)
+static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
{
- FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *));
- *pf = NULL;
- luaL_getmetatable(L, LUA_FILEHANDLE);
- lua_setmetatable(L, -2);
- return pf;
+ IOFileUD *iof = IOSTDF_IOF(L, id);
+ if (iof->fp == NULL)
+ lj_err_caller(L, LJ_ERR_IOSTDCL);
+ return iof->fp;
}
-/* -- Close helpers ------------------------------------------------------- */
+static IOFileUD *io_file_new(lua_State *L)
+{
+ IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
+ GCudata *ud = udataV(L->top-1);
+ ud->udtype = UDTYPE_IO_FILE;
+ /* NOBARRIER: The GCudata is new (marked white). */
+ setgcrefr(ud->metatable, curr_func(L)->c.env);
+ iof->fp = NULL;
+ iof->type = IOFILE_TYPE_FILE;
+ return iof;
+}
-static int lj_cf_io_std_close(lua_State *L)
+static IOFileUD *io_file_open(lua_State *L, const char *mode)
{
- lua_pushnil(L);
- lua_pushliteral(L, "cannot close standard file");
- return 2;
+ const char *fname = strdata(lj_lib_checkstr(L, 1));
+ IOFileUD *iof = io_file_new(L);
+ iof->fp = fopen(fname, mode);
+ if (iof->fp == NULL)
+ luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+ return iof;
}
-static int lj_cf_io_pipe_close(lua_State *L)
+static int io_file_close(lua_State *L, IOFileUD *iof)
{
- FILE **p = io_tofilep(L);
+ int ok;
+ if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_FILE) {
+ ok = (fclose(iof->fp) == 0);
+ } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) {
#if defined(LUA_USE_POSIX)
- int ok = (pclose(*p) != -1);
+ ok = (pclose(iof->fp) != -1);
#elif defined(LUA_USE_WIN)
- int ok = (_pclose(*p) != -1);
+ ok = (_pclose(iof->fp) != -1);
#else
- int ok = 0;
+ ok = 0;
#endif
- *p = NULL;
- return io_pushresult(L, ok, NULL);
-}
-
-static int lj_cf_io_file_close(lua_State *L)
-{
- FILE **p = io_tofilep(L);
- int ok = (fclose(*p) == 0);
- *p = NULL;
+ } else {
+ lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF);
+ setnilV(L->top++);
+ lua_pushliteral(L, "cannot close standard file");
+ return 2;
+ }
+ iof->fp = NULL;
return io_pushresult(L, ok, NULL);
}
-static int io_file_close(lua_State *L)
-{
- lua_getfenv(L, 1);
- lua_getfield(L, -1, "__close");
- return (lua_tocfunction(L, -1))(L);
-}
-
/* -- Read/write helpers -------------------------------------------------- */
static int io_file_readnum(lua_State *L, FILE *fp)
{
lua_Number d;
if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
- lua_pushnumber(L, d);
+ setnumV(L->top++, d);
return 1;
} else {
- return 0; /* read fails */
+ return 0;
}
}
-static int test_eof(lua_State *L, FILE *fp)
+static int io_file_testeof(lua_State *L, FILE *fp)
{
int c = getc(fp);
ungetc(c, fp);
@@ -168,7 +189,7 @@ static int io_file_readchars(lua_State *L, FILE *fp, size_t n)
n -= nr; /* still have to read `n' chars */
} while (n > 0 && nr == rlen); /* until end of count or eof */
luaL_pushresult(&b); /* close buffer */
- return (n == 0 || lua_objlen(L, -1) > 0);
+ return (n == 0 || strV(L->top-1)->len > 0);
}
static int io_file_read(lua_State *L, FILE *fp, int start)
@@ -197,7 +218,7 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
lj_err_arg(L, n+1, LJ_ERR_INVFMT);
} else if (tvisnum(L->base+n)) {
size_t len = (size_t)lj_lib_checkint(L, n+1);
- ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp);
+ ok = len ? io_file_readchars(L, fp, len) : io_file_testeof(L, fp);
} else {
lj_err_arg(L, n+1, LJ_ERR_INVOPT);
}
@@ -233,30 +254,29 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
LJLIB_CF(io_method_close)
{
- if (lua_isnone(L, 1))
- lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT);
- io_tofile(L);
- return io_file_close(L);
+ IOFileUD *iof = L->base < L->top ? io_tofile(L) :
+ IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
+ return io_file_close(L, iof);
}
LJLIB_CF(io_method_read)
{
- return io_file_read(L, io_tofile(L), 1);
+ return io_file_read(L, io_tofile(L)->fp, 1);
}
-LJLIB_CF(io_method_write)
+LJLIB_CF(io_method_write) LJLIB_REC(io_write 0)
{
- return io_file_write(L, io_tofile(L), 1);
+ return io_file_write(L, io_tofile(L)->fp, 1);
}
-LJLIB_CF(io_method_flush)
+LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
{
- return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL);
+ return io_pushresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
}
LJLIB_CF(io_method_seek)
{
- FILE *fp = io_tofile(L);
+ FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
lua_Number ofs;
int res;
@@ -294,39 +314,40 @@ LJLIB_CF(io_method_seek)
LJLIB_CF(io_method_setvbuf)
{
- FILE *fp = io_tofile(L);
+ FILE *fp = io_tofile(L)->fp;
int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no");
size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE);
if (opt == 0) opt = _IOFBF;
else if (opt == 1) opt = _IOLBF;
else if (opt == 2) opt = _IONBF;
- return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL);
+ return io_pushresult(L, setvbuf(fp, NULL, opt, sz) == 0, NULL);
}
-/* Forward declaration. */
-static void io_file_lines(lua_State *L, int idx, int toclose);
-
+LJLIB_PUSH(top-2) /* io_lines_iter */
LJLIB_CF(io_method_lines)
{
io_tofile(L);
- io_file_lines(L, 1, 0);
- return 1;
+ setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1)));
+ setudataV(L, L->top+1, udataV(L->base));
+ L->top += 2;
+ return 2;
}
LJLIB_CF(io_method___gc)
{
- FILE *fp = *io_tofilep(L);
- if (fp != NULL) io_file_close(L);
+ IOFileUD *iof = io_tofilep(L);
+ if (iof->fp != NULL)
+ io_file_close(L, iof);
return 0;
}
LJLIB_CF(io_method___tostring)
{
- FILE *fp = *io_tofilep(L);
- if (fp == NULL)
- lua_pushliteral(L, "file (closed)");
+ IOFileUD *iof = io_tofilep(L);
+ if (iof->fp != NULL)
+ lua_pushfstring(L, "file (%p)", iof->fp);
else
- lua_pushfstring(L, "file (%p)", fp);
+ lua_pushliteral(L, "file (closed)");
return 1;
}
@@ -340,30 +361,41 @@ LJLIB_PUSH(top-1) LJLIB_SET(__index)
LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
-static FILE *io_file_get(lua_State *L, int findex)
+LJLIB_CF(io_open)
{
- GCtab *fenv = tabref(curr_func(L)->c.env);
- GCudata *ud = udataV(&tvref(fenv->array)[findex]);
- FILE *fp = *(FILE **)uddata(ud);
- if (fp == NULL)
- lj_err_caller(L, LJ_ERR_IOSTDCL);
- return fp;
+ const char *fname = strdata(lj_lib_checkstr(L, 1));
+ GCstr *s = lj_lib_optstr(L, 2);
+ const char *mode = s ? strdata(s) : "r";
+ IOFileUD *iof = io_file_new(L);
+ iof->fp = fopen(fname, mode);
+ return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname);
}
-LJLIB_CF(io_open)
+LJLIB_CF(io_popen)
{
- const char *fname = luaL_checkstring(L, 1);
- const char *mode = luaL_optstring(L, 2, "r");
- FILE **pf = io_file_new(L);
- *pf = fopen(fname, mode);
- return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
+#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
+ const char *fname = strdata(lj_lib_checkstr(L, 1));
+ GCstr *s = lj_lib_optstr(L, 2);
+ const char *mode = s ? strdata(s) : "r";
+ IOFileUD *iof = io_file_new(L);
+ iof->type = IOFILE_TYPE_PIPE;
+#ifdef LUA_USE_POSIX
+ fflush(NULL);
+ iof->fp = popen(fname, mode);
+#else
+ iof->fp = _popen(fname, mode);
+#endif
+ return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname);
+#else
+ luaL_error(L, LUA_QL("popen") " not supported");
+#endif
}
LJLIB_CF(io_tmpfile)
{
- FILE **pf = io_file_new(L);
- *pf = tmpfile();
- return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1;
+ IOFileUD *iof = io_file_new(L);
+ iof->fp = tmpfile();
+ return iof->fp != NULL ? 1 : io_pushresult(L, 0, NULL);
}
LJLIB_CF(io_close)
@@ -373,169 +405,112 @@ LJLIB_CF(io_close)
LJLIB_CF(io_read)
{
- return io_file_read(L, io_file_get(L, IO_INPUT), 0);
-}
-
-LJLIB_CF(io_write)
-{
- return io_file_write(L, io_file_get(L, IO_OUTPUT), 0);
-}
-
-LJLIB_CF(io_flush)
-{
- return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL);
+ return io_file_read(L, io_stdfile(L, GCROOT_IO_INPUT), 0);
}
-LJLIB_NOREG LJLIB_CF(io_lines_iter)
-{
- FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1)));
- int ok;
- if (fp == NULL)
- lj_err_caller(L, LJ_ERR_IOCLFL);
- ok = io_file_readline(L, fp);
- if (ferror(fp))
- return luaL_error(L, "%s", strerror(errno));
- if (ok)
- return 1;
- if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */
- L->top = L->base+1;
- setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1)));
- io_file_close(L);
- }
- return 0;
-}
-
-static void io_file_lines(lua_State *L, int idx, int toclose)
+LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT)
{
- lua_pushvalue(L, idx);
- lua_pushboolean(L, toclose);
- lua_pushcclosure(L, lj_cf_io_lines_iter, 2);
- funcV(L->top-1)->c.ffid = FF_io_lines_iter;
+ return io_file_write(L, io_stdfile(L, GCROOT_IO_OUTPUT), 0);
}
-LJLIB_CF(io_lines)
+LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
{
- if (lua_isnoneornil(L, 1)) { /* no arguments? */
- /* will iterate over default input */
- lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT);
- return lj_cf_io_method_lines(L);
- } else {
- const char *fname = luaL_checkstring(L, 1);
- FILE **pf = io_file_new(L);
- *pf = fopen(fname, "r");
- if (*pf == NULL)
- io_file_error(L, 1, fname);
- io_file_lines(L, lua_gettop(L), 1);
- return 1;
- }
+ return io_pushresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
}
-static int io_std_get(lua_State *L, int fp, const char *mode)
+static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)
{
- if (!lua_isnoneornil(L, 1)) {
- const char *fname = lua_tostring(L, 1);
- if (fname) {
- FILE **pf = io_file_new(L);
- *pf = fopen(fname, mode);
- if (*pf == NULL)
- io_file_error(L, 1, fname);
+ if (L->base < L->top && !tvisnil(L->base)) {
+ if (tvisudata(L->base)) {
+ io_tofile(L);
+ L->top = L->base+1;
} else {
- io_tofile(L); /* check that it's a valid file handle */
- lua_pushvalue(L, 1);
+ io_file_open(L, mode);
}
- lua_rawseti(L, LUA_ENVIRONINDEX, fp);
+ /* NOBARRIER: The standard I/O handles are GC roots. */
+ setgcref(G(L)->gcroot[id], gcV(L->top-1));
+ } else {
+ setudataV(L, L->top++, IOSTDF_UD(L, id));
}
- /* return current value */
- lua_rawgeti(L, LUA_ENVIRONINDEX, fp);
return 1;
}
LJLIB_CF(io_input)
{
- return io_std_get(L, IO_INPUT, "r");
+ return io_std_getset(L, GCROOT_IO_INPUT, "r");
}
LJLIB_CF(io_output)
{
- return io_std_get(L, IO_OUTPUT, "w");
+ return io_std_getset(L, GCROOT_IO_OUTPUT, "w");
}
-LJLIB_CF(io_type)
+LJLIB_NOREG LJLIB_CF(io_lines_iter)
{
- void *ud;
- luaL_checkany(L, 1);
- ud = lua_touserdata(L, 1);
- lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
- if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1))
- lua_pushnil(L); /* not a file */
- else if (*((FILE **)ud) == NULL)
- lua_pushliteral(L, "closed file");
- else
- lua_pushliteral(L, "file");
- return 1;
+ IOFileUD *iof = io_tofile(L);
+ int ok = io_file_readline(L, iof->fp);
+ if (ferror(iof->fp))
+ lj_err_callermsg(L, strerror(errno));
+ if (!ok && (iof->type & IOFILE_FLAG_CLOSE))
+ io_file_close(L, iof); /* Return values are ignored (ok is 0). */
+ return ok;
}
-LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */
+LJLIB_PUSH(top-3) /* io_lines_iter */
+LJLIB_CF(io_lines)
+{
+ if (L->base < L->top && !tvisnil(L->base)) { /* io.lines(fname) */
+ IOFileUD *iof = io_file_open(L, "r");
+ iof->type = IOFILE_TYPE_FILE|IOFILE_FLAG_CLOSE;
+ setfuncV(L, L->top-2, funcV(lj_lib_upvalue(L, 1)));
+ } else { /* io.lines() iterates over stdin. */
+ setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1)));
+ setudataV(L, L->top+1, IOSTDF_UD(L, GCROOT_IO_INPUT));
+ L->top += 2;
+ }
+ return 2;
+}
-LJLIB_CF(io_popen)
+LJLIB_CF(io_type)
{
-#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
- const char *fname = luaL_checkstring(L, 1);
- const char *mode = luaL_optstring(L, 2, "r");
- FILE **pf = io_file_new(L);
-#ifdef LUA_USE_POSIX
- fflush(NULL);
- *pf = popen(fname, mode);
-#else
- *pf = _popen(fname, mode);
-#endif
- return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
-#else
- luaL_error(L, LUA_QL("popen") " not supported");
-#endif
+ cTValue *o = lj_lib_checkany(L, 1);
+ if (!(tvisudata(o) && udataV(o)->udtype == UDTYPE_IO_FILE))
+ setnilV(L->top++);
+ else if (((IOFileUD *)uddata(udataV(o)))->fp != NULL)
+ lua_pushliteral(L, "file");
+ else
+ lua_pushliteral(L, "closed file");
+ return 1;
}
#include "lj_libdef.h"
/* ------------------------------------------------------------------------ */
-static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname)
+static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name)
{
- FILE **pf = io_file_new(L);
+ IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
GCudata *ud = udataV(L->top-1);
- GCtab *envt = tabV(L->top-2);
- *pf = fp;
- setgcref(ud->env, obj2gco(envt));
- lj_gc_objbarrier(L, obj2gco(ud), envt);
- if (k > 0) {
- lua_pushvalue(L, -1);
- lua_rawseti(L, -5, k);
- }
- lua_setfield(L, -3, fname);
-}
-
-static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls)
-{
- lua_createtable(L, narr, 1);
- lua_pushcfunction(L, cls);
- lua_setfield(L, -2, "__close");
+ ud->udtype = UDTYPE_IO_FILE;
+ /* NOBARRIER: The GCudata is new (marked white). */
+ setgcref(ud->metatable, gcV(L->top-3));
+ iof->fp = fp;
+ iof->type = IOFILE_TYPE_STDF;
+ lua_setfield(L, -2, name);
+ return obj2gco(ud);
}
LUALIB_API int luaopen_io(lua_State *L)
{
- lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
- if (tvisnil(L->top-1)) {
- LJ_LIB_REG_(L, NULL, io_method);
- lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
- }
- io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */
- io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */
+ lua_pushcfunction(L, lj_cf_io_lines_iter);
+ funcV(L->top-1)->c.ffid = FF_io_lines_iter;
+ LJ_LIB_REG_(L, NULL, io_method);
+ copyTV(L, L->top, L->top-1); L->top++;
+ lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
LJ_LIB_REG(L, io);
- io_fenv_new(L, 0, lj_cf_io_std_close);
- io_std_new(L, stdin, IO_INPUT, "stdin");
- io_std_new(L, stdout, IO_OUTPUT, "stdout");
- io_std_new(L, stderr, 0, "stderr");
- L->top--;
+ setgcref(G(L)->gcroot[GCROOT_IO_INPUT], io_std_new(L, stdin, "stdin"));
+ setgcref(G(L)->gcroot[GCROOT_IO_OUTPUT], io_std_new(L, stdout, "stdout"));
+ io_std_new(L, stderr, "stderr");
return 1;
}
diff --git a/src/lib_math.c b/src/lib_math.c
index adc77c9d..f3803e8f 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -36,9 +36,9 @@ LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
-LJLIB_ASM_(math_sinh)
-LJLIB_ASM_(math_cosh)
-LJLIB_ASM_(math_tanh)
+LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh)
+LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh)
+LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
LJLIB_ASM_(math_frexp)
LJLIB_ASM_(math_modf) LJLIB_REC(.)
@@ -82,35 +82,33 @@ LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); }
*/
/* PRNG state. */
-typedef struct TW223State {
+struct RandomState {
uint64_t gen[4]; /* State of the 4 LFSR generators. */
int valid; /* State is valid. */
-} TW223State;
+};
/* Union needed for bit-pattern conversion between uint64_t and double. */
typedef union { uint64_t u64; double d; } U64double;
/* Update generator i and compute a running xor of all states. */
#define TW223_GEN(i, k, q, s) \
- z = tw->gen[i]; \
+ z = rs->gen[i]; \
z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
- r ^= z; tw->gen[i] = z;
+ r ^= z; rs->gen[i] = z;
/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
-static LJ_NOINLINE double tw223_step(TW223State *tw)
+LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
{
uint64_t z, r = 0;
- U64double u;
TW223_GEN(0, 63, 31, 18)
TW223_GEN(1, 58, 19, 28)
TW223_GEN(2, 55, 24, 7)
TW223_GEN(3, 47, 21, 8)
- u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52);
- return u.d;
+ return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
}
/* PRNG initialization function. */
-static void tw223_init(TW223State *tw, double d)
+static void random_init(RandomState *rs, double d)
{
uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
int i;
@@ -120,22 +118,24 @@ static void tw223_init(TW223State *tw, double d)
r >>= 8;
u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
- tw->gen[i] = u.u64;
+ rs->gen[i] = u.u64;
}
- tw->valid = 1;
+ rs->valid = 1;
for (i = 0; i < 10; i++)
- tw223_step(tw);
+ lj_math_random_step(rs);
}
/* PRNG extract function. */
-LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
-LJLIB_CF(math_random)
+LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
+LJLIB_CF(math_random) LJLIB_REC(.)
{
int n = cast_int(L->top - L->base);
- TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ U64double u;
double d;
- if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0);
- d = tw223_step(tw) - 1.0;
+ if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0);
+ u.u64 = lj_math_random_step(rs);
+ d = u.d - 1.0;
if (n > 0) {
double r1 = lj_lib_checknum(L, 1);
if (n == 1) {
@@ -150,11 +150,11 @@ LJLIB_CF(math_random)
}
/* PRNG seed function. */
-LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
+LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
LJLIB_CF(math_randomseed)
{
- TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
- tw223_init(tw, lj_lib_checknum(L, 1));
+ RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ random_init(rs, lj_lib_checknum(L, 1));
return 0;
}
@@ -164,9 +164,9 @@ LJLIB_CF(math_randomseed)
LUALIB_API int luaopen_math(lua_State *L)
{
- TW223State *tw;
- tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State));
- tw->valid = 0; /* Use lazy initialization to save some time on startup. */
+ RandomState *rs;
+ rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
+ rs->valid = 0; /* Use lazy initialization to save some time on startup. */
LJ_LIB_REG(L, math);
#if defined(LUA_COMPAT_MOD)
lua_getfield(L, -1, "fmod");
diff --git a/src/lib_string.c b/src/lib_string.c
index 6c857328..e7ad12df 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -776,16 +776,18 @@ LUALIB_API int luaopen_string(lua_State *L)
{
GCtab *mt;
GCstr *mmstr;
+ global_State *g;
LJ_LIB_REG(L, string);
#if defined(LUA_COMPAT_GFIND)
lua_getfield(L, -1, "gmatch");
lua_setfield(L, -2, "gfind");
#endif
mt = lj_tab_new(L, 0, 1);
- /* NOBARRIER: G(L)->mmname[] is a GC root. */
- setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt));
- mmstr = strref(G(L)->mmname[MM_index]);
- if (isdead(G(L), obj2gco(mmstr))) flipwhite(obj2gco(mmstr));
+ /* NOBARRIER: basemt is a GC root. */
+ g = G(L);
+ setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
+ mmstr = strref(g->mmname[MM_index]);
+ if (isdead(g, obj2gco(mmstr))) flipwhite(obj2gco(mmstr));
settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1));
mt->nomm = cast_byte(~(1u<<MM_index));
return 1;
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index 8ad4f8fb..6d8b4ccb 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -1186,10 +1186,10 @@ static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
size_t rsize = oldsize - nb;
newp = oldp;
if (rsize >= MIN_CHUNK_SIZE) {
- mchunkptr remainder = chunk_plus_offset(newp, nb);
+ mchunkptr rem = chunk_plus_offset(newp, nb);
set_inuse(m, newp, nb);
- set_inuse(m, remainder, rsize);
- lj_alloc_free(m, chunk2mem(remainder));
+ set_inuse(m, rem, rsize);
+ lj_alloc_free(m, chunk2mem(rem));
}
} else if (next == m->top && oldsize + m->topsize > nb) {
/* Expand into top */
diff --git a/src/lj_api.c b/src/lj_api.c
index 7a759e5f..4bac5024 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -227,7 +227,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx)
{
cTValue *o = index2adr(L, idx);
TValue tmp;
- return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)));
+ return (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), &tmp)));
}
LUA_API int lua_isstring(lua_State *L, int idx)
@@ -307,7 +307,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
TValue tmp;
if (LJ_LIKELY(tvisnum(o)))
return numV(o);
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
return numV(&tmp);
else
return 0;
@@ -319,7 +319,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
TValue tmp;
if (tvisnum(o))
return numV(o);
- else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)))
+ else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
return numV(&tmp);
}
@@ -332,7 +332,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def)
return numV(o);
else if (tvisnil(o))
return def;
- else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)))
+ else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
return numV(&tmp);
}
@@ -344,7 +344,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
lua_Number n;
if (LJ_LIKELY(tvisnum(o)))
n = numV(o);
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
return 0;
@@ -362,7 +362,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
lua_Number n;
if (LJ_LIKELY(tvisnum(o)))
n = numV(o);
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
lj_err_argt(L, idx, LUA_TNUMBER);
@@ -382,7 +382,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
n = numV(o);
else if (tvisnil(o))
return def;
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
n = numV(&tmp);
else
lj_err_argt(L, idx, LUA_TNUMBER);
@@ -753,7 +753,7 @@ LUA_API int lua_getmetatable(lua_State *L, int idx)
else if (tvisudata(o))
mt = tabref(udataV(o)->metatable);
else
- mt = tabref(G(L)->basemt[itypemap(o)]);
+ mt = tabref(basemt_obj(G(L), o));
if (mt == NULL)
return 0;
settabV(L, L->top, mt);
@@ -941,12 +941,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
if (lj_trace_flushall(L))
lj_err_caller(L, LJ_ERR_NOGCMM);
if (tvisbool(o)) {
- /* NOBARRIER: g->basemt[] is a GC root. */
- setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt));
- setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt));
+ /* NOBARRIER: basemt is a GC root. */
+ setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));
+ setgcref(basemt_it(g, LJ_TFALSE), obj2gco(mt));
} else {
- /* NOBARRIER: g->basemt[] is a GC root. */
- setgcref(g->basemt[itypemap(o)], obj2gco(mt));
+ /* NOBARRIER: basemt is a GC root. */
+ setgcref(basemt_obj(g, o), obj2gco(mt));
}
}
L->top--;
diff --git a/src/lj_asm.c b/src/lj_asm.c
index a4d0c606..f26a40a5 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -13,6 +13,7 @@
#include "lj_gc.h"
#include "lj_str.h"
#include "lj_tab.h"
+#include "lj_frame.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
@@ -81,6 +82,10 @@ typedef struct ASMState {
#define IR(ref) (&as->ir[(ref)])
+#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
+#define ASMREF_TMP2 REF_FALSE /* Temp. register. */
+#define ASMREF_L REF_NIL /* Stores register for L. */
+
/* Check for variant to invariant references. */
#define iscrossref(as, ref) ((ref) < as->sectref)
@@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
{ MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
if (rex != 0x40) *--(p) = rex; }
#define FORCE_REX 0x200
+#define REX_64 (FORCE_REX|0x080000)
#else
#define REXRB(p, rr, rb) ((void)0)
#define FORCE_REX 0
+#define REX_64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
{
uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
if (rex != 0x40) {
+ rex |= (rr >> 16);
if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
*--p = (MCode)rex;
}
@@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target)
#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
-/* Argument setup for C calls. Up to 3 args need no stack adjustment. */
-#define emit_setargr(as, narg, r) \
- emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4);
-#define emit_setargi(as, narg, imm) \
- emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm))
-#define emit_setargp(as, narg, ptr) \
- emit_setargi(as, (narg), ptr2addr((ptr)))
-
/* -- Register allocator debugging ---------------------------------------- */
/* #define LUAJIT_DEBUG_RA */
@@ -578,10 +578,6 @@ static void ra_setup(ASMState *as)
memset(as->phireg, 0, sizeof(as->phireg));
memset(as->cost, 0, sizeof(as->cost));
as->cost[RID_ESP] = REGCOST(~0u, 0u);
-
- /* Start slots for spill slot allocation. */
- as->evenspill = (SPS_FIRST+1)&~1;
- as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0;
}
/* Rematerialize constants. */
@@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
} else if (ir->o == IR_BASE) {
ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
emit_getgl(as, r, jit_base);
+ } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */
+ lua_assert(irt_isnil(ir->t));
+ emit_getgl(as, r, jit_L);
} else {
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
ir->o == IR_KPTR || ir->o == IR_KNULL);
@@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
return sps_scale(slot);
}
+/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
+static Reg ra_releasetmp(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ Reg r = ir->r;
+ lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
+ ra_free(as, r);
+ ra_modified(as, r);
+ ir->r = RID_INIT;
+ return r;
+}
+
/* Restore a register (marked as free). Rematerialize or force a spill. */
static Reg ra_restore(ASMState *as, IRRef ref)
{
@@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc)
/* Arch-specific field offsets. */
static const uint8_t field_ofs[IRFL__MAX+1] = {
-#define FLOFS(name, type, field) (uint8_t)offsetof(type, field),
+#define FLOFS(name, ofs) (uint8_t)(ofs),
IRFLDEF(FLOFS)
#undef FLOFS
0
@@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
{
IRIns *irr;
lua_assert(ir->o == IR_STRREF);
- as->mrm.idx = as->mrm.base = RID_NONE;
+ as->mrm.base = as->mrm.idx = RID_NONE;
as->mrm.scale = XM_SCALE1;
as->mrm.ofs = sizeof(GCstr);
if (irref_isk(ir->op1)) {
@@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
}
}
+static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow)
+{
+ if (ir->o == IR_KPTR) {
+ as->mrm.ofs = ir->i;
+ as->mrm.base = as->mrm.idx = RID_NONE;
+ } else {
+ lua_assert(ir->o == IR_STRREF);
+ asm_fusestrref(as, ir, allow);
+ }
+}
+
/* Fuse load into memory operand. */
static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
{
@@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
if (ir->o == IR_KNUM) {
+ RegSet avail = as->freeset & ~as->modset & RSET_FPR;
lua_assert(allow != RSET_EMPTY);
- if (!(as->freeset & ~as->modset & RSET_FPR)) {
+ if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
as->mrm.ofs = ptr2addr(ir_knum(ir));
as->mrm.base = as->mrm.idx = RID_NONE;
return RID_MRM;
@@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_FLOAD) {
- /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */
- if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) {
+ /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
+ if ((irt_isint(ir->t) || irt_isaddr(ir->t)) &&
+ noconflict(as, ref, IR_FSTORE)) {
asm_fusefref(as, ir, xallow);
return RID_MRM;
}
@@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_XLOAD) {
- /* Generic fusion is only ok for IRT_INT operand (but see asm_comp).
+ /* Generic fusion is only ok for 32 bit operand (but see asm_comp).
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
- if (irt_isint(ir->t)) {
- asm_fusestrref(as, IR(ir->op1), xallow);
+ if (irt_isint(ir->t) || irt_isaddr(ir->t)) {
+ asm_fusexref(as, IR(ir->op1), xallow);
return RID_MRM;
}
}
@@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return ra_allocref(as, ref, allow);
}
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+ RegSet allow = RSET_ALL;
+ uint32_t n, nargs = CCI_NARGS(ci);
+ int32_t ofs = 0;
+ lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
+ emit_call(as, ci->func);
+ for (n = 0; n < nargs; n++) { /* Setup args. */
+#if LJ_64
+#error "NYI: 64 bit mode call argument setup"
+#endif
+ IRIns *ir = IR(args[n]);
+ if (irt_isnum(ir->t)) {
+ if ((ofs & 4) && irref_isk(args[n])) {
+ /* Split stores for unaligned FP consts. */
+ emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
+ emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
+ } else {
+ Reg r;
+ if ((allow & RSET_FPR) == RSET_EMPTY)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ r = ra_alloc1(as, args[n], allow & RSET_FPR);
+ allow &= ~RID2RSET(r);
+ emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
+ }
+ ofs += 8;
+ } else {
+ if ((ci->flags & CCI_FASTCALL) && n < 2) {
+ Reg r = n == 0 ? RID_ECX : RID_EDX;
+ if (args[n] < ASMREF_TMP1) {
+ emit_loadi(as, r, ir->i);
+ } else {
+ lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
+ allow &= ~RID2RSET(r);
+ if (ra_hasreg(ir->r))
+ emit_movrr(as, r, ir->r);
+ else
+ ra_allocref(as, args[n], RID2RSET(r));
+ }
+ } else {
+ if (args[n] < ASMREF_TMP1) {
+ emit_movmroi(as, RID_ESP, ofs, ir->i);
+ } else {
+ Reg r;
+ if ((allow & RSET_GPR) == RSET_EMPTY)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ r = ra_alloc1(as, args[n], allow & RSET_GPR);
+ allow &= ~RID2RSET(r);
+ emit_movtomro(as, r, RID_ESP, ofs);
+ }
+ ofs += 4;
+ }
+ }
+ }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+ RegSet drop = RSET_SCRATCH;
+ if ((ci->flags & CCI_NOFPRCLOBBER))
+ drop &= ~RSET_FPR;
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+ if (irt_isnum(ir->t)) {
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
+#if LJ_64
+ if ((ci->flags & CCI_CASTU64)) {
+ Reg dest = ir->r;
+ if (ra_hasreg(dest)) {
+ ra_free(as, dest);
+ ra_modified(as, dest);
+ emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
+ } else {
+ emit_movrmro(as, RID_RET, RID_ESP, ofs);
+ }
+ } else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
+#else
+ /* Number result is in x87 st0 for x86 calling convention. */
+ Reg dest = ir->r;
+ if (ra_hasreg(dest)) {
+ ra_free(as, dest);
+ ra_modified(as, dest);
+ emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
+ }
+ if ((ci->flags & CCI_CASTU64)) {
+ emit_movtomro(as, RID_RET, RID_ESP, ofs);
+ emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
+ } else {
+ emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+ }
+#endif
+ } else {
+ lua_assert(!irt_ispri(ir->t));
+ ra_destreg(as, ir, RID_RET);
+ }
+ }
+}
+
+/* Collect arguments from CALL* and ARG instructions. */
+static void asm_collectargs(ASMState *as, IRIns *ir,
+ const CCallInfo *ci, IRRef *args)
+{
+ uint32_t n = CCI_NARGS(ci);
+ lua_assert(n <= CCI_NARGS_MAX);
+ if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+ while (n-- > 1) {
+ ir = IR(ir->op1);
+ lua_assert(ir->o == IR_CARG);
+ args[n] = ir->op2;
+ }
+ args[0] = ir->op1;
+ lua_assert(IR(ir->op1)->o != IR_CARG);
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+ IRRef args[CCI_NARGS_MAX];
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+ asm_collectargs(as, ir, ci, args);
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+}
+
/* -- Type conversions ---------------------------------------------------- */
static void asm_tonum(ASMState *as, IRIns *ir)
@@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir)
static void asm_strto(ASMState *as, IRIns *ir)
{
- Reg str;
- int32_t ofs;
- RegSet drop = RSET_SCRATCH;
/* Force a spill slot for the destination register (if any). */
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
+ IRRef args[2];
+ RegSet drop = RSET_SCRATCH;
if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
ra_evictset(as, drop);
asm_guardcc(as, CC_E);
emit_rr(as, XO_TEST, RID_RET, RID_RET);
- /* int lj_str_numconv(const char *s, TValue *n) */
- emit_call(as, lj_str_numconv);
- ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
- if (ofs == 0) {
- emit_setargr(as, 2, RID_ESP);
- } else {
- emit_setargr(as, 2, RID_RET);
- emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs);
- }
- emit_setargr(as, 1, RID_RET);
- str = ra_alloc1(as, ir->op1, RSET_GPR);
- emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr));
+ args[0] = ir->op1;
+ args[1] = ASMREF_TMP1;
+ asm_gencall(as, ci, args);
+ /* Store the result to the spill slot or slots SPS_TEMP1/2. */
+ emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
+ RID_ESP, sps_scale(ir->s));
}
static void asm_tostr(ASMState *as, IRIns *ir)
{
IRIns *irl = IR(ir->op1);
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
+ IRRef args[2];
+ args[0] = ASMREF_L;
as->gcsteps++;
if (irt_isnum(irl->t)) {
- /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */
- emit_call(as, lj_str_fromnum);
- emit_setargr(as, 1, RID_RET);
- emit_getgl(as, RID_RET, jit_L);
- emit_setargr(as, 2, RID_RET);
- emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl));
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
+ args[1] = ASMREF_TMP1;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+ emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
+ RID_ESP, ra_spill(as, irl));
} else {
- /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */
- emit_call(as, lj_str_fromint);
- emit_setargr(as, 1, RID_RET);
- emit_getgl(as, RID_RET, jit_L);
- emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR));
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
+ args[1] = ir->op1;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
}
}
@@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir)
lua_assert(!irt_isnil(ir->t));
return irt_type(ir->t)-IRT_FALSE;
} else {
- lua_assert(irt_isaddr(ir->t));
+ lua_assert(irt_isgcv(ir->t));
lo = u32ptr(ir_kgc(ir));
hi = lo - 0x04c11db7;
}
@@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_newref(ASMState *as, IRIns *ir)
{
- IRRef keyref = ir->op2;
- IRIns *irkey = IR(keyref);
- RegSet allow = RSET_GPR;
- Reg tab, tmp;
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- tab = ra_alloc1(as, ir->op1, allow);
- tmp = ra_scratch(as, rset_clear(allow, tab));
- /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */
- emit_call(as, lj_tab_newkey);
- emit_setargr(as, 1, tmp);
- emit_setargr(as, 2, tab);
- emit_getgl(as, tmp, jit_L);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+ IRRef args[3];
+ IRIns *irkey;
+ Reg tmp;
+ args[0] = ASMREF_L;
+ args[1] = ir->op1;
+ args[2] = ASMREF_TMP1;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+ tmp = ra_releasetmp(as, ASMREF_TMP1);
+ irkey = IR(ir->op2);
if (irt_isnum(irkey->t)) {
/* For numbers use the constant itself or a spill slot as a TValue. */
- if (irref_isk(keyref)) {
- emit_setargp(as, 3, ir_knum(irkey));
- } else {
- emit_setargr(as, 3, tmp);
+ if (irref_isk(ir->op2))
+ emit_loada(as, tmp, ir_knum(irkey));
+ else
emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
- }
} else {
/* Otherwise use g->tmptv to hold the TValue. */
- lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t));
- emit_setargr(as, 3, tmp);
- if (!irref_isk(keyref)) {
- Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp));
+ if (!irref_isk(ir->op2)) {
+ Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
emit_movtomro(as, src, tmp, 0);
} else if (!irt_ispri(irkey->t)) {
emit_movmroi(as, tmp, 0, irkey->i);
@@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir)
/* -- Loads and stores ---------------------------------------------------- */
-static void asm_fload(ASMState *as, IRIns *ir)
+static void asm_fxload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
x86Op xo;
- asm_fusefref(as, ir, RSET_GPR);
+ if (ir->o == IR_FLOAD)
+ asm_fusefref(as, ir, RSET_GPR);
+ else
+ asm_fusexref(as, IR(ir->op1), RSET_GPR);
+ /* ir->op2 is ignored -- unaligned loads are ok on x86. */
switch (irt_type(ir->t)) {
case IRT_I8: xo = XO_MOVSXb; break;
case IRT_U8: xo = XO_MOVZXb; break;
@@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
}
}
-static void asm_xload(ASMState *as, IRIns *ir)
-{
- Reg dest = ra_dest(as, ir, RSET_GPR);
- x86Op xo;
- asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */
- /* ir->op2 is ignored -- unaligned loads are ok on x86. */
- switch (irt_type(ir->t)) {
- case IRT_I8: xo = XO_MOVSXb; break;
- case IRT_U8: xo = XO_MOVZXb; break;
- case IRT_I16: xo = XO_MOVSXw; break;
- case IRT_U16: xo = XO_MOVZXw; break;
- default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break;
- }
- emit_mrm(as, xo, dest, RID_MRM);
-}
-
-/* -- String ops ---------------------------------------------------------- */
+/* -- Allocations --------------------------------------------------------- */
static void asm_snew(ASMState *as, IRIns *ir)
{
- RegSet allow = RSET_GPR;
- Reg left, right;
- IRIns *irl;
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- irl = IR(ir->op1);
- left = irl->r;
- right = IR(ir->op2)->r;
- if (ra_noreg(left)) {
- lua_assert(irl->o == IR_STRREF);
- /* Get register only for non-const STRREF. */
- if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) {
- if (ra_hasreg(right)) rset_clear(allow, right);
- left = ra_allocref(as, ir->op1, allow);
- }
- }
- if (ra_noreg(right) && !irref_isk(ir->op2)) {
- if (ra_hasreg(left)) rset_clear(allow, left);
- right = ra_allocref(as, ir->op2, allow);
- }
- /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */
- emit_call(as, lj_str_new);
- emit_setargr(as, 1, RID_RET);
- emit_getgl(as, RID_RET, jit_L);
- if (ra_noreg(left)) /* Use immediate for const STRREF. */
- emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i +
- (int32_t)sizeof(GCstr));
- else
- emit_setargr(as, 2, left);
- if (ra_noreg(right))
- emit_setargi(as, 3, IR(ir->op2)->i);
- else
- emit_setargr(as, 3, right);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
+ IRRef args[3];
+ args[0] = ASMREF_L;
+ args[1] = ir->op1;
+ args[2] = ir->op2;
as->gcsteps++;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
}
-/* -- Table ops ----------------------------------------------------------- */
-
static void asm_tnew(ASMState *as, IRIns *ir)
{
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */
- emit_call(as, lj_tab_new);
- emit_setargr(as, 1, RID_RET);
- emit_setargi(as, 2, ir->op1);
- emit_setargi(as, 3, ir->op2);
- emit_getgl(as, RID_RET, jit_L);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
+ IRRef args[2];
+ args[0] = ASMREF_L;
+ args[1] = ASMREF_TMP1;
as->gcsteps++;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24));
}
static void asm_tdup(ASMState *as, IRIns *ir)
{
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */
- emit_call(as, lj_tab_dup);
- emit_setargr(as, 1, RID_RET);
- emit_setargp(as, 2, ir_kgc(IR(ir->op1)));
- emit_getgl(as, RID_RET, jit_L);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
+ IRRef args[2];
+ args[0] = ASMREF_L;
+ args[1] = ir->op1;
as->gcsteps++;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
}
-static void asm_tlen(ASMState *as, IRIns *ir)
-{
- ra_destreg(as, ir, RID_RET);
- ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
- emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */
- emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR));
-}
+/* -- Write barriers ------------------------------------------------------ */
static void asm_tbar(ASMState *as, IRIns *ir)
{
@@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir)
static void asm_obar(ASMState *as, IRIns *ir)
{
- RegSet allow = RSET_GPR;
- Reg obj, val;
- GCobj *valp;
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+ IRRef args[2];
MCLabel l_end;
- int32_t ofs;
- ra_evictset(as, RSET_SCRATCH);
- if (irref_isk(ir->op2)) {
- valp = ir_kgc(IR(ir->op2));
- val = RID_NONE;
- } else {
- valp = NULL;
- val = ra_alloc1(as, ir->op2, allow);
- rset_clear(allow, val);
- }
- obj = ra_alloc1(as, ir->op1, allow);
- l_end = emit_label(as);
+ Reg obj;
/* No need for other object barriers (yet). */
lua_assert(IR(ir->op1)->o == IR_UREFC);
- ofs = -(int32_t)offsetof(GCupval, tv);
- /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */
- emit_call(as, lj_gc_barrieruv);
- if (ofs == 0) {
- emit_setargr(as, 2, obj);
- } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) {
- emit_setargr(as, 2, obj);
- emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs);
- } else {
- emit_setargr(as, 2, RID_RET);
- emit_rmro(as, XO_LEA, RID_RET, obj, ofs);
- }
- emit_setargp(as, 1, J2G(as->J));
- if (valp)
- emit_setargp(as, 3, valp);
- else
- emit_setargr(as, 3, val);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1;
+ args[1] = ir->op1;
+ asm_gencall(as, ci, args);
+ emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
+ obj = IR(ir->op1)->r;
emit_sjcc(as, CC_Z, l_end);
emit_i8(as, LJ_GC_WHITES);
- if (valp)
- emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked);
- else
+ if (irref_isk(ir->op2)) {
+ GCobj *vp = ir_kgc(IR(ir->op2));
+ emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
+ } else {
+ Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
+ }
emit_sjcc(as, CC_Z, l_end);
emit_i8(as, LJ_GC_BLACK);
emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
- ofs + (int32_t)offsetof(GChead, marked));
+ (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
}
/* -- FP/int arithmetic and logic operations ------------------------------ */
@@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
}
}
emit_mrm(as, XO_UCOMISD, left, right);
- } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) {
+ } else {
IRRef lref = ir->op1, rref = ir->op2;
IROp leftop = (IROp)(IR(lref)->o);
- lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
+ lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E));
/* Swap constants (only for ABC) and fusable loads to the right. */
if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
@@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
} else {
Reg left;
if (opisfusableload((IROp)irl->o) &&
- ((irt_isi8(irl->t) && checki8(imm)) ||
- (irt_isu8(irl->t) && checku8(imm)))) {
- /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8
- ** loads are handled here. The IRT_I16/IRT_U16 loads should never be
- ** fused, since cmp word [mem], imm16 has a length-changing prefix.
+ ((irt_isu8(irl->t) && checku8(imm)) ||
+ ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
+ (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
+ /* Only the IRT_INT case is fused by asm_fuseload.
+ ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
+ ** are handled here.
+ ** Note that cmp word [mem], imm16 should not be generated,
+ ** since it has a length-changing prefix. Compares of a word
+ ** against a sign-extended imm8 are ok, however.
*/
IRType1 origt = irl->t; /* Temporarily flip types. */
irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
@@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
if (left == RID_MRM) { /* Fusion succeeded? */
asm_guardcc(as, cc);
emit_i8(as, imm);
- emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM);
+ emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
+ XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM);
return;
} /* Otherwise handle register case as usual. */
} else {
@@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
asm_guardcc(as, cc);
emit_mrm(as, XO_CMP, left, right);
}
- } else { /* Handle ordered string compares. */
- RegSet allow = RSET_GPR;
- /* This assumes lj_str_cmp never uses any SSE registers. */
- ra_evictset(as, (RSET_SCRATCH & RSET_GPR));
- asm_guardcc(as, cc);
- emit_rr(as, XO_TEST, RID_RET, RID_RET);
- emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */
- if (irref_isk(ir->op1)) {
- emit_setargi(as, 1, IR(ir->op1)->i);
- } else {
- Reg left = ra_alloc1(as, ir->op1, allow);
- rset_clear(allow, left);
- emit_setargr(as, 1, left);
- }
- if (irref_isk(ir->op2)) {
- emit_setargi(as, 2, IR(ir->op2)->i);
- } else {
- Reg right = ra_alloc1(as, ir->op2, allow);
- emit_setargr(as, 2, right);
- }
}
}
@@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
/* -- GC handling --------------------------------------------------------- */
/* Sync all live GC values to Lua stack slots. */
-static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
+static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
{
+ /* Some care must be taken when allocating registers here, since this is
+ ** not part of the fast path. All scratch registers are evicted in the
+ ** fast path, so it's easiest to force allocation from scratch registers
+ ** only. This avoids register allocation state unification.
+ */
+ RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
IRRef2 *map = &as->T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
for (s = 0; s < nslots; s++) {
@@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
/* Check GC threshold and do one or more GC steps. */
static void asm_gc_check(ASMState *as, SnapShot *snap)
{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+ IRRef args[2];
MCLabel l_end;
- const BCIns *pc;
- Reg tmp, base;
+ Reg base, lstate, tmp;
RegSet drop = RSET_SCRATCH;
- /* Must evict BASE because the stack may be reallocated by the GC. */
- if (ra_hasreg(IR(REF_BASE)->r))
- drop |= RID2RSET(IR(REF_BASE)->r);
+ if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */
+ drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */
ra_evictset(as, drop);
- base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET));
l_end = emit_label(as);
- /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */
- emit_call(as, lj_gc_step_jit);
- emit_movtomro(as, base, RID_RET, offsetof(lua_State, base));
- emit_setargr(as, 1, RID_RET);
- emit_setargi(as, 3, (int32_t)as->gcsteps);
- emit_getgl(as, RID_RET, jit_L);
- pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots];
- emit_setargp(as, 2, pc);
- asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base));
- if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */
- ra_restore(as, REF_BASE); /* Better do it inside the slow path. */
+ args[0] = ASMREF_L;
+ args[1] = ASMREF_TMP1;
+ asm_gencall(as, ci, args);
+ tmp = ra_releasetmp(as, ASMREF_TMP1);
+ emit_loadi(as, tmp, (int32_t)as->gcsteps);
+ /* We don't know spadj yet, so get the C frame from L->cframe. */
+ emit_movmroi(as, tmp, CFRAME_OFS_PC,
+ (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]);
+ emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
+ lstate = IR(ASMREF_L)->r;
+ emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
+ /* It's ok if lstate is already in a non-scratch reg. But all allocations
+ ** in the non-fast path must use a scratch reg. See comment above.
+ */
+ base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
+ emit_movtomro(as, base, lstate, offsetof(lua_State, base));
+ asm_gc_sync(as, snap, base);
+ /* BASE/L get restored anyway, better do it inside the slow path. */
+ if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
+ if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r))
+ ra_restore(as, ASMREF_L);
/* Jump around GC step if GC total < GC threshold. */
tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR);
emit_sjcc(as, CC_B, l_end);
@@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as)
{
int32_t spadj;
emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
- spadj = sps_adjust(as);
+ spadj = sps_adjust(as->evenspill);
as->T->spadjust = (uint16_t)spadj;
emit_addptr(as, RID_ESP, -spadj);
}
@@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as)
{
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
- ra_free(as, r);
- if (r != RID_BASE) {
- ra_scratch(as, RID2RSET(RID_BASE));
- emit_rr(as, XO_MOV, r, RID_BASE);
+ lua_assert(!ra_hasspill(ir->s));
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ if (r != RID_BASE) {
+ ra_scratch(as, RID2RSET(RID_BASE));
+ emit_rr(as, XO_MOV, r, RID_BASE);
+ }
}
}
@@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as)
}
/* Calculate stack frame adjustment. */
- spadj = sps_adjust(as);
+ spadj = sps_adjust(as->evenspill);
spdelta = spadj - (int32_t)as->parent->spadjust;
if (spdelta < 0) { /* Don't shrink the stack frame. */
spadj = (int32_t)as->parent->spadjust;
@@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as)
GCfunc *fn = ir_kfunc(IR(ir->op2));
if (isluafunc(fn)) {
BCReg fs = s + funcproto(fn)->framesize;
- newbase = s;
- if (secondbase == ~(BCReg)0) secondbase = s;
if (fs > topslot) topslot = fs;
+ if (s != 0) {
+ newbase = s;
+ if (secondbase == ~(BCReg)0) secondbase = s;
+ }
}
}
}
@@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir)
/* Loads and stores. */
case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break;
- case IR_FLOAD: asm_fload(as, ir); break;
+ case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
case IR_SLOAD: asm_sload(as, ir); break;
- case IR_XLOAD: asm_xload(as, ir); break;
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
case IR_FSTORE: asm_fstore(as, ir); break;
- /* String ops. */
+ /* Allocations. */
case IR_SNEW: asm_snew(as, ir); break;
-
- /* Table ops. */
case IR_TNEW: asm_tnew(as, ir); break;
case IR_TDUP: asm_tdup(as, ir); break;
- case IR_TLEN: asm_tlen(as, ir); break;
+
+ /* Write barriers. */
case IR_TBAR: asm_tbar(as, ir); break;
case IR_OBAR: asm_obar(as, ir); break;
@@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
case IR_TOSTR: asm_tostr(as, ir); break;
case IR_STRTO: asm_strto(as, ir); break;
+ /* Calls. */
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+ case IR_CARG: break;
+
default:
setintV(&as->J->errinfo, ir->o);
lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
@@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
IRRef i, nins;
int inloop;
+ ra_setup(as);
+
/* Clear reg/sp for constants. */
for (i = T->nk; i < REF_BIAS; i++)
IR(i)->prev = REGSP_INIT;
@@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
as->curins = nins;
inloop = 0;
+ as->evenspill = SPS_FIRST;
for (i = REF_FIRST; i < nins; i++) {
IRIns *ir = IR(i);
switch (ir->o) {
@@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
if (i == as->stopins+1 && ir->op1 == ir->op2)
as->stopins++;
break;
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: {
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+ /* NYI: not fastcall-aware, but doesn't matter (yet). */
+ if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
+ as->evenspill = (int32_t)CCI_NARGS(ci);
+#if LJ_64
+ ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
+#else
+ ir->prev = REGSP_HINT(RID_RET);
+#endif
+ if (inloop)
+ as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
+ (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
+ continue;
+ }
/* C calls evict all scratch regs and return results in RID_RET. */
- case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR:
+ case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR:
case IR_NEWREF:
ir->prev = REGSP_HINT(RID_RET);
if (inloop)
@@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
if (inloop)
as->modset = RSET_SCRATCH;
break;
- /* Ordered string compares evict all integer scratch registers. */
- case IR_LT: case IR_GE: case IR_LE: case IR_GT:
- if (irt_isstr(ir->t) && inloop)
- as->modset |= (RSET_SCRATCH & RSET_GPR);
- break;
/* Non-constant shift counts need to be in RID_ECX. */
case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
@@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
}
ir->prev = REGSP_INIT;
}
+ if ((as->evenspill & 1))
+ as->oddspill = as->evenspill++;
+ else
+ as->oddspill = 0;
}
/* -- Assembler core ------------------------------------------------------ */
@@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T)
as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
/* Setup register allocation. */
- ra_setup(as);
asm_setup_regsp(as, T);
if (!as->loopref) {
diff --git a/src/lj_def.h b/src/lj_def.h
index dbfd5bf5..3d6ba417 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -88,6 +88,7 @@ typedef unsigned __int32 uintptr_t;
#define checki8(x) ((x) == (int32_t)(int8_t)(x))
#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
#define checki16(x) ((x) == (int32_t)(int16_t)(x))
+#define checku16(x) ((x) == (int32_t)(uint16_t)(x))
/* Every half-decent C compiler transforms this into a rotate instruction. */
#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n))))
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 0d8a03ec..5c9d2bcb 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -73,13 +73,13 @@ static void gc_mark(global_State *g, GCobj *o)
}
}
-/* Mark the base metatables. */
-static void gc_mark_basemt(global_State *g)
+/* Mark GC roots. */
+static void gc_mark_gcroot(global_State *g)
{
- int i;
- for (i = 0; i < BASEMT_MAX; i++)
- if (tabref(g->basemt[i]) != NULL)
- gc_markobj(g, tabref(g->basemt[i]));
+ ptrdiff_t i;
+ for (i = 0; i < GCROOT__MAX; i++)
+ if (gcref(g->gcroot[i]) != NULL)
+ gc_markobj(g, gcref(g->gcroot[i]));
}
/* Start a GC cycle and mark the root set. */
@@ -91,7 +91,7 @@ static void gc_mark_start(global_State *g)
gc_markobj(g, mainthread(g));
gc_markobj(g, tabref(mainthread(g)->env));
gc_marktv(g, &g->registrytv);
- gc_mark_basemt(g);
+ gc_mark_gcroot(g);
g->gc.state = GCSpropagate;
}
@@ -541,7 +541,7 @@ static void atomic(global_State *g, lua_State *L)
lua_assert(!iswhite(obj2gco(mainthread(g))));
gc_markobj(g, L); /* Mark running thread. */
gc_mark_curtrace(g); /* Mark current trace. */
- gc_mark_basemt(g); /* Mark base metatables (again). */
+ gc_mark_gcroot(g); /* Mark GC roots (again). */
gc_propagate_gray(g); /* Propagate all of the above. */
setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */
@@ -643,16 +643,15 @@ int lj_gc_step(lua_State *L)
}
/* Ditto, but fix the stack top first. */
-void lj_gc_step_fixtop(lua_State *L)
+void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
{
if (curr_funcisL(L)) L->top = curr_topL(L);
lj_gc_step(L);
}
/* Perform multiple GC steps. Called from JIT-compiled code. */
-void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps)
+void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps)
{
- cframe_pc(cframe_raw(L->cframe)) = pc;
L->top = curr_topL(L);
while (steps-- > 0 && lj_gc_step(L) == 0)
;
@@ -711,17 +710,16 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
makewhite(g, o); /* Make it white to avoid the following barrier. */
}
-/* The reason for duplicating this is that it needs to be visible from ASM. */
-void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v)
+/* Specialized barrier for closed upvalue. Pass &uv->tv. */
+void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv)
{
- lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
- lua_assert(o->gch.gct == ~LJ_TUPVAL);
- /* Preserve invariant during propagation. Otherwise it doesn't matter. */
+#define TV2MARKED(x) \
+ (*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked)))
if (g->gc.state == GCSpropagate)
- gc_mark(g, v); /* Move frontier forward. */
+ gc_mark(g, gcV(tv));
else
- makewhite(g, o); /* Make it white to avoid the following barrier. */
+ TV2MARKED(tv) = (TV2MARKED(tv) & cast_byte(~LJ_GC_COLORS)) | curwhite(g);
+#undef TV2MARKED
}
/* Close upvalue. Also needs a write barrier. */
diff --git a/src/lj_gc.h b/src/lj_gc.h
index 192066d3..0dbb9b82 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -43,8 +43,8 @@ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all);
LJ_FUNC void lj_gc_finalizeudata(lua_State *L);
LJ_FUNC void lj_gc_freeall(global_State *g);
LJ_FUNCA int lj_gc_step(lua_State *L);
-LJ_FUNCA void lj_gc_step_fixtop(lua_State *L);
-LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps);
+LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L);
+LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps);
LJ_FUNC void lj_gc_fullgc(lua_State *L);
/* GC check: drive collector forward if the GC threshold has been reached. */
@@ -58,7 +58,7 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L);
/* Write barriers. */
LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t);
LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v);
-LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v);
+LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv);
LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv);
LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T);
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 1efb12f0..cf0b6b55 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -6,16 +6,22 @@
#define lj_ir_c
#define LUA_CORE
+/* For pointers to libc/libm functions. */
+#include <stdio.h>
+#include <math.h>
+
#include "lj_obj.h"
#if LJ_HASJIT
#include "lj_gc.h"
#include "lj_str.h"
+#include "lj_tab.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
#include "lj_trace.h"
+#include "lj_lib.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@@ -32,6 +38,17 @@ IRDEF(IRMODE)
0
};
+/* C call info for CALL* instructions. */
+LJ_DATADEF const CCallInfo lj_ir_callinfo[] = {
+#define IRCALLCI(name, nargs, kind, type, flags) \
+ { (ASMFunction)name, \
+ (nargs)|(CCI_CALL_##kind)|(IRT_##type<<CCI_OTSHIFT)|(flags) },
+IRCALLDEF(IRCALLCI)
+#undef IRCALLCI
+ { NULL, 0 }
+};
+
+
/* -- IR emitter ---------------------------------------------------------- */
/* Grow IR buffer at the top. */
@@ -92,6 +109,25 @@ TRef LJ_FASTCALL lj_ir_emit(jit_State *J)
return TREF(ref, irt_t((ir->t = fins->t)));
}
+/* Emit call to a C function. */
+TRef lj_ir_call(jit_State *J, IRCallID id, ...)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[id];
+ uint32_t n = CCI_NARGS(ci);
+ TRef tr = TREF_NIL;
+ va_list argp;
+ va_start(argp, id);
+ if ((ci->flags & CCI_L)) n--;
+ if (n > 0)
+ tr = va_arg(argp, IRRef);
+ while (n-- > 1)
+ tr = emitir(IRT(IR_CARG, IRT_NIL), tr, va_arg(argp, IRRef));
+ va_end(argp);
+ if (CCI_OP(ci) == IR_CALLS)
+ J->needsnap = 1; /* Need snapshot after call with side effect. */
+ return emitir(CCI_OPTYPE(ci), tr, id);
+}
+
/* -- Interning of constants ---------------------------------------------- */
/*
diff --git a/src/lj_ir.h b/src/lj_ir.h
index a6973a81..9a7e711d 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -8,6 +8,8 @@
#include "lj_obj.h"
+/* -- IR instructions ----------------------------------------------------- */
+
/* IR instruction definition. Order matters, see below. */
#define IRDEF(_) \
/* Miscellaneous ops. */ \
@@ -101,13 +103,12 @@
_(USTORE, S , ref, ref) \
_(FSTORE, S , ref, ref) \
\
- /* String ops. */ \
- _(SNEW, N , ref, ref) \
- \
- /* Table ops. */ \
+ /* Allocations. */ \
+ _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \
_(TNEW, A , lit, lit) \
_(TDUP, A , ref, ___) \
- _(TLEN, L , ref, ___) \
+ \
+ /* Write barriers. */ \
_(TBAR, S , ref, ___) \
_(OBAR, S , ref, ref) \
\
@@ -118,6 +119,12 @@
_(TOSTR, N , ref, ___) \
_(STRTO, G , ref, ___) \
\
+ /* Calls. */ \
+ _(CALLN, N , ref, lit) \
+ _(CALLL, L , ref, lit) \
+ _(CALLS, S , ref, lit) \
+ _(CARG, N , ref, ref) \
+ \
/* End of list. */
/* IR opcodes (max. 256). */
@@ -144,6 +151,8 @@ LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE);
LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE);
LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE);
+/* -- Named IR literals --------------------------------------------------- */
+
/* FPMATH sub-functions. ORDER FPM. */
#define IRFPMDEF(_) \
_(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
@@ -158,20 +167,22 @@ IRFPMDEF(FPMENUM)
IRFPM__MAX
} IRFPMathOp;
-/* FLOAD field IDs. */
+/* FLOAD fields. */
#define IRFLDEF(_) \
- _(STR_LEN, GCstr, len) \
- _(FUNC_ENV, GCfunc, l.env) \
- _(TAB_META, GCtab, metatable) \
- _(TAB_ARRAY, GCtab, array) \
- _(TAB_NODE, GCtab, node) \
- _(TAB_ASIZE, GCtab, asize) \
- _(TAB_HMASK, GCtab, hmask) \
- _(TAB_NOMM, GCtab, nomm) \
- _(UDATA_META, GCudata, metatable)
+ _(STR_LEN, offsetof(GCstr, len)) \
+ _(FUNC_ENV, offsetof(GCfunc, l.env)) \
+ _(TAB_META, offsetof(GCtab, metatable)) \
+ _(TAB_ARRAY, offsetof(GCtab, array)) \
+ _(TAB_NODE, offsetof(GCtab, node)) \
+ _(TAB_ASIZE, offsetof(GCtab, asize)) \
+ _(TAB_HMASK, offsetof(GCtab, hmask)) \
+ _(TAB_NOMM, offsetof(GCtab, nomm)) \
+ _(UDATA_META, offsetof(GCudata, metatable)) \
+ _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
+ _(UDATA_FILE, sizeof(GCudata))
typedef enum {
-#define FLENUM(name, type, field) IRFL_##name,
+#define FLENUM(name, ofs) IRFL_##name,
IRFLDEF(FLENUM)
#undef FLENUM
IRFL__MAX
@@ -183,7 +194,8 @@ IRFLDEF(FLENUM)
#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */
/* XLOAD mode, stored in op2. */
-#define IRXLOAD_UNALIGNED 1
+#define IRXLOAD_READONLY 1 /* Load from read-only data. */
+#define IRXLOAD_UNALIGNED 2 /* Unaligned load. */
/* TOINT mode, stored in op2. Ordered by strength of the checks. */
#define IRTOINT_CHECK 0 /* Number checked for integerness. */
@@ -191,6 +203,67 @@ IRFLDEF(FLENUM)
#define IRTOINT_ANY 2 /* Any FP number is ok. */
#define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */
+/* C call info for CALL* instructions. */
+typedef struct CCallInfo {
+ ASMFunction func; /* Function pointer. */
+ uint32_t flags; /* Number of arguments and flags. */
+} CCallInfo;
+
+#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */
+#define CCI_NARGS_MAX 16 /* Max. # of args. */
+
+#define CCI_OTSHIFT 16
+#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */
+#define CCI_OPSHIFT 24
+#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
+
+#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
+#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
+#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
+#define CCI_CALL_FN (CCI_CALL_N|CCI_FASTCALL)
+#define CCI_CALL_FL (CCI_CALL_L|CCI_FASTCALL)
+#define CCI_CALL_FS (CCI_CALL_S|CCI_FASTCALL)
+
+/* C call info flags. */
+#define CCI_L 0x0100 /* Implicit L arg. */
+#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
+#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
+#define CCI_FASTCALL 0x0800 /* Fastcall convention. */
+
+/* Function definitions for CALL* instructions. */
+#define IRCALLDEF(_) \
+ _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
+ _(lj_str_new, 3, S, STR, CCI_L) \
+ _(lj_str_tonum, 2, FN, INT, 0) \
+ _(lj_str_fromint, 2, FN, STR, CCI_L) \
+ _(lj_str_fromnum, 2, FN, STR, CCI_L) \
+ _(lj_tab_new1, 2, FS, TAB, CCI_L) \
+ _(lj_tab_dup, 2, FS, TAB, CCI_L) \
+ _(lj_tab_newkey, 3, S, PTR, CCI_L) \
+ _(lj_tab_len, 1, FL, INT, 0) \
+ _(lj_gc_step_jit, 2, FS, NIL, CCI_L) \
+ _(lj_gc_barrieruv, 2, FS, NIL, 0) \
+ _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
+ _(sinh, 1, N, NUM, 0) \
+ _(cosh, 1, N, NUM, 0) \
+ _(tanh, 1, N, NUM, 0) \
+ _(fputc, 2, S, INT, 0) \
+ _(fwrite, 4, S, INT, 0) \
+ _(fflush, 1, S, INT, 0) \
+ \
+ /* End of list. */
+
+typedef enum {
+#define IRCALLENUM(name, nargs, kind, type, flags) IRCALL_##name,
+IRCALLDEF(IRCALLENUM)
+#undef IRCALLENUM
+ IRCALL__MAX
+} IRCallID;
+
+LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
+
+/* -- IR operands --------------------------------------------------------- */
+
/* IR operand mode (2 bit). */
typedef enum {
IRMref, /* IR reference. */
@@ -227,6 +300,8 @@ typedef enum {
LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
+/* -- IR instruction types ------------------------------------------------ */
+
/* IR result type and flags (8 bit). */
typedef enum {
/* Map of itypes to non-negative numbers. ORDER LJ_T */
@@ -314,6 +389,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
/* Stored combined IR opcode and type. */
typedef uint16_t IROpT;
+/* -- IR references ------------------------------------------------------- */
+
/* IR references. */
typedef uint16_t IRRef1; /* One stored reference. */
typedef uint32_t IRRef2; /* Two stored references. */
@@ -382,6 +459,8 @@ typedef uint32_t TRef;
#define TREF_FALSE (TREF_PRI(IRT_FALSE))
#define TREF_TRUE (TREF_PRI(IRT_TRUE))
+/* -- IR format ----------------------------------------------------------- */
+
/* IR instruction format (64 bit).
**
** 16 16 8 8 8 8
@@ -425,5 +504,6 @@ typedef union IRIns {
#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
#define ir_knum(ir) (mref((ir)->ptr, cTValue))
+#define ir_kptr(ir) (mref((ir)->ptr, void))
#endif
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 69b0a955..52077ad5 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -6,6 +6,8 @@
#ifndef _LJ_IROPT_H
#define _LJ_IROPT_H
+#include <stdarg.h>
+
#include "lj_obj.h"
#include "lj_jit.h"
@@ -13,6 +15,7 @@
/* IR emitter. */
LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J);
+LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...);
/* Save current IR in J->fold.ins, but do not emit it (yet). */
static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b)
@@ -83,6 +86,7 @@ LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref);
/* Emit IR instructions with on-the-fly optimizations. */
LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim);
/* Special return values for the fold functions. */
enum {
@@ -106,7 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
-LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J);
LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
/* Dead-store elimination. */
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 683c66d6..d8254093 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -152,7 +152,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
if (!(o < L->top &&
- (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o)))))
+ (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o)))))
lj_err_argt(L, narg, LUA_TNUMBER);
return numV(o);
}
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 59a0f2be..a7a6317e 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -90,4 +90,9 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
#define LIBINIT_FFID 0xfe
#define LIBINIT_END 0xff
+/* Exported library functions. */
+
+typedef struct RandomState RandomState;
+LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs);
+
#endif
diff --git a/src/lj_meta.c b/src/lj_meta.c
index dff01f85..1182d908 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -60,7 +60,7 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm)
else if (tvisudata(o))
mt = tabref(udataV(o)->metatable);
else
- mt = tabref(G(L)->basemt[itypemap(o)]);
+ mt = tabref(basemt_obj(G(L), o));
if (mt) {
cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm]));
if (mo)
@@ -157,7 +157,7 @@ static cTValue *str2num(cTValue *o, TValue *n)
{
if (tvisnum(o))
return o;
- else if (tvisstr(o) && lj_str_numconv(strVdata(o), n))
+ else if (tvisstr(o) && lj_str_tonum(strV(o), n))
return n;
else
return NULL;
@@ -295,7 +295,7 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
top = curr_top(L);
setcont(top, ne ? lj_cont_condf : lj_cont_condt);
copyTV(L, top+1, mo);
- it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA;
+ it = ~o1->gch.gct;
setgcV(L, top+2, &o1->gch, it);
setgcV(L, top+3, &o2->gch, it);
return top+2; /* Trigger metamethod call. */
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 9101f053..cebeda9b 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -315,7 +315,7 @@ typedef struct GCstr {
/* Userdata object. Payload follows. */
typedef struct GCudata {
GCHeader;
- uint8_t unused1;
+ uint8_t udtype; /* Userdata type. */
uint8_t unused2;
GCRef env; /* Should be at same offset in GCfunc. */
MSize len; /* Size of payload. */
@@ -323,6 +323,13 @@ typedef struct GCudata {
uint32_t align1; /* To force 8 byte alignment of the payload. */
} GCudata;
+/* Userdata types. */
+enum {
+ UDTYPE_USERDATA, /* Regular userdata. */
+ UDTYPE_IO_FILE, /* I/O library FILE. */
+ UDTYPE__MAX
+};
+
#define uddata(u) ((void *)((u)+1))
#define sizeudata(u) (sizeof(struct GCudata)+(u)->len)
@@ -496,7 +503,17 @@ MMDEF(MMENUM)
MM_FAST = MM_eq
} MMS;
-#define BASEMT_MAX ((~LJ_TNUMX)+1)
+/* GC root IDs. */
+typedef enum {
+ GCROOT_BASEMT, /* Metatables for base types. */
+ GCROOT_BASEMT_NUM = ~LJ_TNUMX, /* Last base metatable. */
+ GCROOT_IO_INPUT, /* Userdata for default I/O input file. */
+ GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */
+ GCROOT__MAX
+} GCRootID;
+
+#define basemt_it(g, it) ((g)->gcroot[GCROOT_BASEMT+~(it)])
+#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
typedef struct GCState {
MSize total; /* Memory currently allocated. */
@@ -544,7 +561,7 @@ typedef struct global_State {
volatile int32_t vmstate; /* VM state or current JIT code trace number. */
GCRef jit_L; /* Current JIT code lua_State or NULL. */
MRef jit_base; /* Current JIT code L->base. */
- GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */
+ GCRef gcroot[GCROOT__MAX]; /* GC roots. */
GCRef mmname[MM_MAX]; /* Array holding metamethod names. */
} global_State;
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 2102561d..98266d21 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -282,21 +282,50 @@ LJFOLD(STRTO KGC)
LJFOLDF(kfold_strto)
{
TValue n;
- if (lj_str_numconv(strdata(ir_kstr(fleft)), &n))
+ if (lj_str_tonum(ir_kstr(fleft), &n))
return lj_ir_knum(J, numV(&n));
return FAILFOLD;
}
-LJFOLD(SNEW STRREF KINT)
-LJFOLDF(kfold_snew)
+LJFOLD(SNEW KPTR KINT)
+LJFOLDF(kfold_snew_kptr)
+{
+ GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
+ return lj_ir_kstr(J, s);
+}
+
+LJFOLD(SNEW any KINT)
+LJFOLDF(kfold_snew_empty)
{
if (fright->i == 0)
return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
+ return NEXTFOLD;
+}
+
+LJFOLD(STRREF KGC KINT)
+LJFOLDF(kfold_strref)
+{
+ GCstr *str = ir_kstr(fleft);
+ lua_assert((MSize)fright->i < str->len);
+ return lj_ir_kptr(J, (char *)strdata(str) + fright->i);
+}
+
+LJFOLD(STRREF SNEW any)
+LJFOLDF(kfold_strref_snew)
+{
PHIBARRIER(fleft);
- if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
- const char *s = strdata(ir_kstr(IR(fleft->op1)));
- int32_t ofs = IR(fleft->op2)->i;
- return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i));
+ if (irref_isk(fins->op2) && fright->i == 0) {
+ return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
+ } else {
+ /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
+ IRIns *ir = IR(fleft->op1);
+ IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
+ lua_assert(ir->o == IR_STRREF);
+ PHIBARRIER(ir);
+ fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
+ fins->op1 = str;
+ fins->ot = IRT(IR_STRREF, IRT_PTR);
+ return RETRYFOLD;
}
return NEXTFOLD;
}
@@ -343,16 +372,13 @@ LJFOLDF(kfold_intcomp)
}
}
-LJFOLD(LT KGC KGC)
-LJFOLD(GE KGC KGC)
-LJFOLD(LE KGC KGC)
-LJFOLD(GT KGC KGC)
-LJFOLDF(kfold_strcomp)
+LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
+LJFOLDF(kfold_strcmp)
{
- if (irt_isstr(fins->t)) {
- GCstr *a = ir_kstr(fleft);
- GCstr *b = ir_kstr(fright);
- return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o));
+ if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
+ GCstr *a = ir_kstr(IR(fleft->op1));
+ GCstr *b = ir_kstr(IR(fleft->op2));
+ return INTFOLD(lj_str_cmp(a, b));
}
return NEXTFOLD;
}
@@ -1070,7 +1096,8 @@ LJFOLDF(merge_eqne_snew_kgc)
uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
len == 2 ? IRT(IR_XLOAD, IRT_U16) :
IRTI(IR_XLOAD));
- TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0);
+ TRef tmp = emitir(ot, strref,
+ IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr)));
if (len == 3)
tmp = emitir(IRTI(IR_BAND), tmp,
@@ -1103,8 +1130,8 @@ LJFOLDX(lj_opt_fwd_hload)
LJFOLD(ULOAD any)
LJFOLDX(lj_opt_fwd_uload)
-LJFOLD(TLEN any)
-LJFOLDX(lj_opt_fwd_tlen)
+LJFOLD(CALLL any IRCALL_lj_tab_len)
+LJFOLDX(lj_opt_fwd_tab_len)
/* Upvalue refs are really loads, but there are no corresponding stores.
** So CSE is ok for them, except for UREFO across a GC step (see below).
@@ -1194,13 +1221,23 @@ LJFOLDF(fload_tab_ah)
/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
LJFOLD(FLOAD KGC IRFL_STR_LEN)
-LJFOLDF(fload_str_len)
+LJFOLDF(fload_str_len_kgc)
{
if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
return INTFOLD((int32_t)ir_kstr(fleft)->len);
return NEXTFOLD;
}
+LJFOLD(FLOAD SNEW IRFL_STR_LEN)
+LJFOLDF(fload_str_len_snew)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
+ PHIBARRIER(fleft);
+ return fleft->op2;
+ }
+ return NEXTFOLD;
+}
+
LJFOLD(FLOAD any IRFL_STR_LEN)
LJFOLDX(lj_opt_cse)
@@ -1216,20 +1253,28 @@ LJFOLDF(fwd_sload)
return J->slot[fins->op1];
}
-/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */
-LJFOLD(XLOAD STRREF any)
-LJFOLDF(xload_str)
+LJFOLD(XLOAD KPTR any)
+LJFOLDF(xload_kptr)
{
- if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
- GCstr *str = ir_kstr(IR(fleft->op1));
- int32_t ofs = IR(fleft->op2)->i;
- lua_assert((MSize)ofs < str->len);
- lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len);
- return INTFOLD(kfold_xload(fins, strdata(str)+ofs));
+ /* Only fold read-only integer loads for now. */
+ if ((fins->op2 & IRXLOAD_READONLY) && irt_isinteger(fins->t))
+ return INTFOLD(kfold_xload(fins, ir_kptr(fleft)));
+ return NEXTFOLD;
+}
+
+/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
+LJFOLD(XLOAD any any)
+LJFOLDF(fwd_xload)
+{
+ IRRef ref = J->chain[IR_XLOAD];
+ IRRef op1 = fins->op1;
+ while (ref > op1) {
+ if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t))
+ return ref;
+ ref = IR(ref)->prev;
}
- return CSEFOLD;
+ return EMITFOLD;
}
-/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */
/* -- Write barriers ------------------------------------------------------ */
@@ -1279,12 +1324,11 @@ LJFOLD(FSTORE any any)
LJFOLDX(lj_opt_dse_fstore)
LJFOLD(NEWREF any any) /* Treated like a store. */
+LJFOLD(CALLS any any)
+LJFOLD(CALLL any any) /* Safeguard fallback. */
LJFOLD(TNEW any any)
LJFOLD(TDUP any)
-LJFOLDF(store_raw)
-{
- return EMITFOLD;
-}
+LJFOLDX(lj_ir_emit)
/* ------------------------------------------------------------------------ */
@@ -1402,6 +1446,19 @@ TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
}
}
+/* CSE with explicit search limit. */
+TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
+{
+ IRRef ref = J->chain[fins->o];
+ IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
+ while (ref > lim) {
+ if (IR(ref)->op12 == op12)
+ return ref;
+ ref = IR(ref)->prev;
+ }
+ return lj_ir_emit(J);
+}
+
/* ------------------------------------------------------------------------ */
#undef IR
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index f9a2a808..90ab1b6f 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -310,7 +310,13 @@ static void loop_unroll(jit_State *J)
/* Undo any partial changes made by the loop optimization. */
static void loop_undo(jit_State *J, IRRef ins)
{
+ ptrdiff_t i;
lj_ir_rollback(J, ins);
+ for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */
+ BPropEntry *bp = &J->bpropcache[i];
+ if (bp->val >= ins)
+ bp->key = 0;
+ }
for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */
IRIns *ir = IR(ins);
irt_clearphi(ir->t);
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 94fc4ad8..882ba6c5 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -307,14 +307,7 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
conflict:
/* Try to find a matching load. Below the conflicting store, if any. */
- ref = J->chain[IR_ULOAD];
- while (ref > lim) {
- IRIns *load = IR(ref);
- if (load->op1 == uref)
- return ref; /* Load forwarding. */
- ref = load->prev;
- }
- return EMITFOLD; /* Conflict or no match. */
+ return lj_opt_cselim(J, lim);
}
/* USTORE elimination. */
@@ -405,14 +398,7 @@ TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J)
conflict:
/* Try to find a matching load. Below the conflicting store, if any. */
- ref = J->chain[IR_FLOAD];
- while (ref > lim) {
- IRIns *load = IR(ref);
- if (load->op1 == oref && load->op2 == fid)
- return ref; /* Load forwarding. */
- ref = load->prev;
- }
- return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+ return lj_opt_cselim(J, lim);
}
/* FSTORE elimination. */
@@ -458,10 +444,10 @@ doemit:
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
}
-/* -- TLEN forwarding ----------------------------------------------------- */
+/* -- Forwarding of lj_tab_len -------------------------------------------- */
/* This is rather simplistic right now, but better than nothing. */
-TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
+TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
{
IRRef tab = fins->op1; /* Table reference. */
IRRef lim = tab; /* Search limit. */
@@ -484,14 +470,7 @@ TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
}
/* Try to find a matching load. Below the conflicting store, if any. */
- ref = J->chain[IR_TLEN];
- while (ref > lim) {
- IRIns *tlen = IR(ref);
- if (tlen->op1 == tab)
- return ref; /* Load forwarding. */
- ref = tlen->prev;
- }
- return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+ return lj_opt_cselim(J, lim);
}
/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 60a6afb8..b9107c5e 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -370,7 +370,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
{
lua_Number n;
- if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc))
+ if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
lj_trace_err(J, LJ_TRERR_BADTYPE);
n = numV(vc);
/* Limit narrowing for pow to small exponents (or for two constants). */
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 000772fe..1de07e92 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -317,6 +317,7 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len)
GCstr *s = lj_str_new(L, str, len);
TValue *tv = lj_tab_setstr(L, ls->fs->kt, s);
if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */
+ lj_gc_check(L);
return s;
}
diff --git a/src/lj_record.c b/src/lj_record.c
index 68a233b9..9b223ff6 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -441,7 +441,7 @@ static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META);
} else {
/* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */
- mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]);
+ mt = tabref(basemt_obj(J2G(J), &ix->tabv));
if (mt == NULL)
return 0; /* No metamethod. */
mix.tab = lj_ir_ktab(J, mt);
@@ -855,7 +855,7 @@ typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd);
/* Get runtime value of int argument. */
static int32_t argv2int(jit_State *J, TValue *o)
{
- if (tvisstr(o) && !lj_str_numconv(strVdata(o), o))
+ if (tvisstr(o) && !lj_str_tonum(strV(o), o))
lj_trace_err(J, LJ_TRERR_BADTYPE);
return lj_num2bit(numV(o));
}
@@ -1017,6 +1017,8 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd)
/* Otherwise res[0] already contains the result. */
} else if (tref_isnumber(tr)) {
res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+ } else if (tref_ispri(tr)) {
+ res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[tref_type(tr)]));
} else {
recff_err_nyi(J, rd);
}
@@ -1165,10 +1167,16 @@ static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd)
res[0] = emitir(IRTN(IR_ATAN2), y, x);
}
+static void recff_math_htrig(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tonum(J, arg[0]);
+ res[0] = lj_ir_call(J, rd->data, tr);
+}
+
static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd)
{
TRef tr = arg[0];
- if (tref_isinteger(arg[0])) {
+ if (tref_isinteger(tr)) {
res[0] = tr;
res[1] = lj_ir_kint(J, 0);
} else {
@@ -1187,9 +1195,10 @@ static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd)
static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd)
{
+ TRef tr = lj_ir_tonum(J, arg[0]);
if (!tref_isnumber_str(arg[1]))
lj_trace_err(J, LJ_TRERR_BADTYPE);
- res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]);
+ res[0] = lj_opt_narrow_pow(J, tr, arg[1], &rd->argv[1]);
UNUSED(rd);
}
@@ -1203,6 +1212,32 @@ static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd)
res[0] = tr;
}
+static void recff_math_random(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ GCudata *ud = udataV(&rd->fn->c.upvalue[0]);
+ TRef tr, one;
+ lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */
+ tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud)));
+ one = lj_ir_knum_one(J);
+ tr = emitir(IRTN(IR_SUB), tr, one);
+ if (arg[0]) {
+ TRef tr1 = lj_ir_tonum(J, arg[0]);
+ if (arg[1]) { /* d = floor(d*(r2-r1+1.0)) + r1 */
+ TRef tr2 = lj_ir_tonum(J, arg[1]);
+ tr2 = emitir(IRTN(IR_SUB), tr2, tr1);
+ tr2 = emitir(IRTN(IR_ADD), tr2, one);
+ tr = emitir(IRTN(IR_MUL), tr, tr2);
+ tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR);
+ tr = emitir(IRTN(IR_ADD), tr, tr1);
+ } else { /* d = floor(d*r1) + 1.0 */
+ tr = emitir(IRTN(IR_MUL), tr, tr1);
+ tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR);
+ tr = emitir(IRTN(IR_ADD), tr, one);
+ }
+ }
+ res[0] = tr;
+}
+
/* -- Bit library fast functions ------------------------------------------ */
/* Record unary bit.tobit, bit.bnot, bit.bswap. */
@@ -1321,7 +1356,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
for (i = 0; i < len; i++) {
TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i));
tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp);
- res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0);
+ res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
}
} else { /* Empty range or range underflow: return no results. */
emitir(IRTGI(IR_LE), trend, trstart);
@@ -1335,7 +1370,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd)
{
if (tref_istab(arg[0])) {
- res[0] = emitir(IRTI(IR_TLEN), arg[0], 0);
+ res[0] = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
} /* else: Interpreter will throw. */
UNUSED(rd);
}
@@ -1344,7 +1379,7 @@ static void recff_table_remove(jit_State *J, TRef *res, RecordFFData *rd)
{
if (tref_istab(arg[0])) {
if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */
- TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0);
+ TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
GCtab *t = tabV(&rd->argv[0]);
MSize len = lj_tab_len(t);
emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0));
@@ -1376,7 +1411,7 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd)
rd->nres = 0;
if (tref_istab(arg[0]) && arg[1]) {
if (!arg[2]) { /* Simple push: t[#t+1] = v */
- TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0);
+ TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
GCtab *t = tabV(&rd->argv[0]);
RecordIndex ix;
ix.tab = arg[0];
@@ -1392,6 +1427,62 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd)
} /* else: Interpreter will throw. */
}
+/* -- I/O library fast functions ------------------------------------------ */
+
+/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
+** no need to encode the alternate cases for any of the guards.
+*/
+static TRef recff_io_fp(jit_State *J, TRef *res, uint32_t id)
+{
+ TRef tr, ud, fp;
+ if (id) { /* io.func() */
+ tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
+ ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
+ } else { /* fp:method() */
+ ud = arg[0];
+ if (!tref_isudata(ud))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ tr = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
+ emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE));
+ }
+ fp = emitir(IRT(IR_FLOAD, IRT_LIGHTUD), ud, IRFL_UDATA_FILE);
+ emitir(IRTG(IR_NE, IRT_LIGHTUD), fp, lj_ir_knull(J, IRT_LIGHTUD));
+ return fp;
+}
+
+static void recff_io_write(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef fp = recff_io_fp(J, res, rd->data);
+ TRef zero = lj_ir_kint(J, 0);
+ TRef one = lj_ir_kint(J, 1);
+ ptrdiff_t i = rd->data == 0 ? 1 : 0;
+ for (; arg[i]; i++) {
+ TRef str = lj_ir_tostr(J, arg[i]);
+ TRef buf = emitir(IRT(IR_STRREF, IRT_PTR), str, zero);
+ TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
+ if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
+ TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
+ tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
+ if (rd->cres != 0) /* Check result only if requested. */
+ emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
+ } else {
+ TRef tr = lj_ir_call(J, IRCALL_fwrite, buf, one, len, fp);
+ if (rd->cres != 0) /* Check result only if requested. */
+ emitir(IRTGI(IR_EQ), tr, len);
+ }
+ }
+ res[0] = TREF_TRUE;
+}
+
+static void recff_io_flush(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef fp = recff_io_fp(J, res, rd->data);
+ TRef tr = lj_ir_call(J, IRCALL_fflush, fp);
+ if (rd->cres != 0) /* Check result only if requested. */
+ emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0));
+ res[0] = TREF_TRUE;
+}
+
/* -- Record calls and returns -------------------------------------------- */
#undef arg
@@ -1696,6 +1787,9 @@ void lj_record_ins(jit_State *J)
if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1;
} else if (ta == IRT_STR) {
if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
+ ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc);
+ rc = lj_ir_kint(J, 0);
+ ta = IRT_INT;
} else {
rec_mm_comp(J, &ix, (int)op);
break;
@@ -1745,7 +1839,7 @@ void lj_record_ins(jit_State *J)
if (tref_isstr(rc)) {
rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
} else if (tref_istab(rc)) {
- rc = emitir(IRTI(IR_TLEN), rc, 0);
+ rc = lj_ir_call(J, IRCALL_lj_tab_len, rc);
} else {
ix.tab = rc;
copyTV(J->L, &ix.tabv, &ix.keyv);
@@ -1879,8 +1973,6 @@ void lj_record_ins(jit_State *J)
/* fallthrough */
case BC_CALL:
callop:
- if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */
- }
rec_call(J, ra, (int)(rb-1), (int)(rc-1));
break;
@@ -2064,8 +2156,11 @@ static void rec_setup_side(jit_State *J, Trace *T)
BCReg j;
for (j = 0; j < s; j++)
if (snap_ref(map[j]) == ref) {
- if (ir->o == IR_FRAME && irt_isfunc(ir->t))
+ if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
+ lua_assert(s != 0);
J->baseslot = s+1;
+ J->framedepth++;
+ }
tr = J->slot[j];
goto dupslot;
}
@@ -2078,8 +2173,10 @@ static void rec_setup_side(jit_State *J, Trace *T)
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
if (irt_isfunc(ir->t)) {
- J->baseslot = s+1;
- J->framedepth++;
+ if (s != 0) {
+ J->baseslot = s+1;
+ J->framedepth++;
+ }
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
} else {
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 09cd095c..d27404f2 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -251,9 +251,9 @@ void lj_snap_restore(jit_State *J, void *exptr)
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
if (isluafunc(fn)) {
TValue *fs;
- newbase = o+1;
- fs = newbase + funcproto(fn)->framesize;
+ fs = o+1 + funcproto(fn)->framesize;
if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
+ if (s != 0) newbase = o+1;
}
}
}
@@ -262,21 +262,17 @@ void lj_snap_restore(jit_State *J, void *exptr)
setnilV(o); /* Clear unreferenced slots of newly added frames. */
}
}
- if (newbase) { /* Clear remainder of newly added frames. */
- L->base = newbase;
- if (ntop >= L->maxstack) { /* Need to grow the stack again. */
- MSize need = (MSize)(ntop - o);
- L->top = o;
- lj_state_growstack(L, need);
- o = L->top;
- ntop = o + need;
- }
- L->top = curr_topL(L);
- for (; o < ntop; o++)
- setnilV(o);
- } else { /* Must not clear slots of existing frame. */
- L->top = curr_topL(L);
+ if (newbase) L->base = newbase;
+ if (ntop >= L->maxstack) { /* Need to grow the stack again. */
+ MSize need = (MSize)(ntop - o);
+ L->top = o;
+ lj_state_growstack(L, need);
+ o = L->top;
+ ntop = o + need;
}
+ L->top = curr_topL(L);
+ for (; o < ntop; o++) /* Clear remainder of newly added frames. */
+ setnilV(o);
lua_assert(map + nslots == flinks-1);
J->pc = (const BCIns *)(uintptr_t)(*--flinks);
}
diff --git a/src/lj_state.h b/src/lj_state.h
index 54e85405..4e4185c0 100644
--- a/src/lj_state.h
+++ b/src/lj_state.h
@@ -17,7 +17,7 @@
LJ_FUNC void lj_state_relimitstack(lua_State *L);
LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need);
-LJ_FUNCA void lj_state_growstack1(lua_State *L);
+LJ_FUNC void lj_state_growstack1(lua_State *L);
static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
{
diff --git a/src/lj_str.c b/src/lj_str.c
index 26f91cba..62322b59 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -21,7 +21,7 @@
/* -- String interning ---------------------------------------------------- */
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
-int32_t lj_str_cmp(GCstr *a, GCstr *b)
+int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
{
MSize i, n = a->len > b->len ? b->len : a->len;
for (i = 0; i < n; i += 4) {
@@ -119,8 +119,14 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
/* -- Type conversions ---------------------------------------------------- */
+/* Convert string object to number. */
+int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n)
+{
+ return lj_str_numconv(strdata(str), n);
+}
+
/* Convert string to number. */
-int lj_str_numconv(const char *s, TValue *n)
+int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n)
{
lua_Number sign = 1;
const uint8_t *p = (const uint8_t *)s;
@@ -167,7 +173,7 @@ parsedbl:
}
/* Convert number to string. */
-GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
+GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
{
char s[LUAI_MAXNUMBER2STR];
lua_Number n = *np;
@@ -176,7 +182,7 @@ GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
}
/* Convert integer to string. */
-GCstr *lj_str_fromint(lua_State *L, int32_t k)
+GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
{
char s[1+10];
char *p = s+sizeof(s);
diff --git a/src/lj_str.h b/src/lj_str.h
index f7e56d16..e8b242c0 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -11,7 +11,7 @@
#include "lj_obj.h"
/* String interning. */
-LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b);
+LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
@@ -20,9 +20,10 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
/* Type conversions. */
-LJ_FUNCA int lj_str_numconv(const char *s, TValue *n);
-LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np);
-LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k);
+LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n);
+LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n);
+LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
+LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
/* String formatting. */
LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
diff --git a/src/lj_tab.c b/src/lj_tab.c
index 9af51027..ceafb770 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -160,8 +160,16 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
return t;
}
+GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
+{
+ GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24);
+ clearapart(t);
+ if (t->hmask > 0) clearhpart(t);
+ return t;
+}
+
/* Duplicate a table. */
-GCtab *lj_tab_dup(lua_State *L, const GCtab *kt)
+GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
{
GCtab *t;
uint32_t asize, hmask;
@@ -334,8 +342,8 @@ static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray)
static uint32_t bestasize(uint32_t bins[], uint32_t *narray)
{
uint32_t b, sum, na = 0, sz = 0, nn = *narray;
- for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++)
- if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) {
+ for (b = 0, sum = 0; 2*nn > (1u<<b) && sum != nn; b++)
+ if (bins[b] > 0 && 2*(sum += bins[b]) > (1u<<b)) {
sz = (2u<<b)+1;
na = sum;
}
@@ -599,7 +607,7 @@ static MSize unbound_search(GCtab *t, MSize j)
** Try to find a boundary in table `t'. A `boundary' is an integer index
** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
*/
-MSize lj_tab_len(GCtab *t)
+MSize LJ_FASTCALL lj_tab_len(GCtab *t)
{
MSize j = (MSize)t->asize;
if (j > 1 && tvisnil(arrayslot(t, j-1))) {
diff --git a/src/lj_tab.h b/src/lj_tab.h
index e9e8bcd1..b2a8c3aa 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -11,7 +11,8 @@
#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
-LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt);
+LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
+LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
@@ -36,6 +37,6 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
(inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
-LJ_FUNCA MSize lj_tab_len(GCtab *t);
+LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
#endif
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 3ee4fa00..2fb3c4b8 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -32,6 +32,11 @@ enum {
/* Calling conventions. */
RID_RET = RID_EAX,
+#if LJ_64
+ RID_FPRET = RID_XMM0,
+#else
+ RID_RETHI = RID_EDX,
+#endif
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_EDX, /* Interpreter BASE. */
@@ -98,8 +103,8 @@ enum {
};
/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
-#define sps_scale(slot) (4 * (int32_t)(slot))
-#define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3))
+#define sps_scale(slot) (4 * (int32_t)(slot))
+#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3))
/* -- Exit state ---------------------------------------------------------- */
@@ -185,6 +190,7 @@ typedef enum {
XO_ARITHib = XO_(80),
XO_ARITHi = XO_(81),
XO_ARITHi8 = XO_(83),
+ XO_ARITHiw8 = XO_66(83),
XO_SHIFTi = XO_(c1),
XO_SHIFT1 = XO_(d1),
XO_SHIFTcl = XO_(d3),
@@ -216,6 +222,7 @@ typedef enum {
XO_CVTSI2SD = XO_f20f(2a),
XO_CVTSD2SI = XO_f20f(2d),
XO_CVTTSD2SI= XO_f20f(2c),
+ XO_MOVD = XO_660f(6e),
XO_MOVDto = XO_660f(7e),
XO_FLDq = XO_(dd), XOg_FLDq = 0,
diff --git a/src/lj_udata.c b/src/lj_udata.c
index 863889c9..717d483b 100644
--- a/src/lj_udata.c
+++ b/src/lj_udata.c
@@ -16,6 +16,7 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
global_State *g = G(L);
newwhite(g, ud); /* Not finalized. */
ud->gct = ~LJ_TUDATA;
+ ud->udtype = UDTYPE_USERDATA;
ud->len = sz;
/* NOBARRIER: The GCudata is new (marked white). */
setgcrefnull(ud->metatable);