summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2022-03-08 19:44:10 +0100
committerMike Pall <mike>2022-03-08 19:44:10 +0100
commit96d6d5032098ea9f0002165394a8774dcaa0c0ce (patch)
tree84712e02c1452d8f0c48a6f506a9aa1ebe0f4a32
parentaa0550e3f0999cc6ef62f7ef80493ebef8c9a2f7 (diff)
downloadluajit2-96d6d5032098ea9f0002165394a8774dcaa0c0ce.tar.gz
Revert to trival pow() optimizations to prevent inaccuracies.
-rw-r--r--src/lj_asm.c3
-rw-r--r--src/lj_dispatch.h2
-rw-r--r--src/lj_ffrecord.c4
-rw-r--r--src/lj_ircall.h3
-rw-r--r--src/lj_iropt.h1
-rw-r--r--src/lj_opt_fold.c37
-rw-r--r--src/lj_opt_narrow.c24
-rw-r--r--src/lj_opt_split.c2
-rw-r--r--src/lj_record.c2
-rw-r--r--src/lj_vm.h3
-rw-r--r--src/lj_vmmath.c44
-rw-r--r--src/vm_arm.dasc13
-rw-r--r--src/vm_arm64.dasc11
-rw-r--r--src/vm_mips.dasc11
-rw-r--r--src/vm_mips64.dasc11
-rw-r--r--src/vm_ppc.dasc11
-rw-r--r--src/vm_x64.dasc9
-rw-r--r--src/vm_x86.dasc11
18 files changed, 45 insertions, 157 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index eaee5547..32dd7e87 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1680,8 +1680,7 @@ static void asm_pow(ASMState *as, IRIns *ir)
IRCALL_lj_carith_powu64);
else
#endif
- asm_callid(as, ir, irt_isnum(IR(ir->op2)->t) ? IRCALL_lj_vm_pow :
- IRCALL_lj_vm_powi);
+ asm_callid(as, ir, IRCALL_pow);
}
static void asm_div(ASMState *as, IRIns *ir)
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index d0f86fab..0594af51 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -44,7 +44,7 @@ extern double __divdf3(double a, double b);
#define GOTDEF(_) \
_(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
_(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
- _(lj_vm_pow) _(fmod) _(ldexp) _(lj_vm_modi) \
+ _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
_(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
_(lj_dispatch_profile) _(lj_err_throw) \
_(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index a19f6fdc..60c1d84f 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -638,8 +638,8 @@ static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
{
- J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
- &rd->argv[0], &rd->argv[1]);
+ J->base[0] = lj_opt_narrow_arith(J, J->base[0], J->base[1],
+ &rd->argv[0], &rd->argv[1], IR_POW);
UNUSED(rd);
}
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index c4d4a7b8..67fb58ae 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -217,8 +217,7 @@ typedef struct CCallInfo {
_(FPMATH, sqrt, 1, N, NUM, XA_FP) \
_(ANY, log, 1, N, NUM, XA_FP) \
_(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
- _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
- _(ANY, lj_vm_pow, 2, N, NUM, XA2_FP) \
+ _(ANY, pow, 2, N, NUM, XA2_FP) \
_(ANY, atan2, 2, N, NUM, XA2_FP) \
_(ANY, ldexp, 2, N, NUM, XA_FP) \
_(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 7bce34c7..d239f173 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -145,7 +145,6 @@ LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
TValue *vb, TValue *vc, IROp op);
LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
-LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
/* Optimization passes. */
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 34f70e27..92bdc553 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -236,14 +236,10 @@ LJFOLDF(kfold_fpcall2)
return NEXTFOLD;
}
-LJFOLD(POW KNUM KINT)
LJFOLD(POW KNUM KNUM)
LJFOLDF(kfold_numpow)
{
- lua_Number a = knumleft;
- lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
- lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
- return lj_ir_knum(J, y);
+ return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
}
/* Must not use kfold_kref for numbers (could be NaN). */
@@ -1113,34 +1109,17 @@ LJFOLDF(simplify_nummuldiv_negneg)
return RETRYFOLD;
}
-LJFOLD(POW any KINT)
-LJFOLDF(simplify_numpow_xkint)
+LJFOLD(POW any KNUM)
+LJFOLDF(simplify_numpow_k)
{
- int32_t k = fright->i;
- TRef ref = fins->op1;
- if (k == 0) /* x ^ 0 ==> 1 */
+ if (knumright == 0) /* x ^ 0 ==> 1 */
return lj_ir_knum_one(J); /* Result must be a number, not an int. */
- if (k == 1) /* x ^ 1 ==> x */
+ else if (knumright == 1) /* x ^ 1 ==> x */
return LEFTFOLD;
- if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
+ else if (knumright == 2) /* x ^ 2 ==> x * x */
+ return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
+ else
return NEXTFOLD;
- if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
- ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
- k = -k;
- }
- /* Unroll x^k for 1 <= k <= 65536. */
- for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
- ref = emitir(IRTN(IR_MUL), ref, ref);
- if ((k >>= 1) != 0) { /* Handle trailing bits. */
- TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
- for (; k != 1; k >>= 1) {
- if (k & 1)
- ref = emitir(IRTN(IR_MUL), ref, tmp);
- tmp = emitir(IRTN(IR_MUL), tmp, tmp);
- }
- ref = emitir(IRTN(IR_MUL), ref, tmp);
- }
- return ref;
}
/* -- Simplify conversions ------------------------------------------------ */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index fe92468e..586f1bc7 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -584,30 +584,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
return emitir(IRTN(IR_SUB), rb, tmp);
}
-/* Narrowing of power operator or math.pow. */
-TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
-{
- rb = conv_str_tonum(J, rb, vb);
- rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */
- rc = conv_str_tonum(J, rc, vc);
- if (tvisint(vc) || numisint(numV(vc))) {
- int32_t k = numberVint(vc);
- if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
- if (!tref_isinteger(rc)) {
- /* Guarded conversion to integer! */
- rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
- }
- if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
- TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
- emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
- }
- } else {
-force_pow_num:
- rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */
- }
- return emitir(IRTN(IR_POW), rb, rc);
-}
-
/* -- Predictive narrowing of induction variables ------------------------- */
/* Narrow a single runtime value. */
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 6d32712b..18937c30 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -400,7 +400,7 @@ static void split_ir(jit_State *J)
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
break;
case IR_POW:
- hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
+ hi = split_call_li(J, hisubst, oir, ir, IRCALL_pow);
break;
case IR_FPMATH:
hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
diff --git a/src/lj_record.c b/src/lj_record.c
index d4d687e7..59798844 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -2394,7 +2394,7 @@ void lj_record_ins(jit_State *J)
case BC_POW:
if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
- rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
+ rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, IR_POW);
else
rc = rec_mm_arith(J, &ix, MM_pow);
break;
diff --git a/src/lj_vm.h b/src/lj_vm.h
index bfa7e0fd..c66db004 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -98,9 +98,6 @@ LJ_ASMF int lj_vm_errno(void);
LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx);
#endif
-LJ_ASMF double lj_vm_powi(double, int32_t);
-LJ_ASMF double lj_vm_pow(double, double);
-
/* Continuations for metamethods. */
LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */
LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index fa0de922..b6cc60ba 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -30,52 +30,12 @@ LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
+LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
#endif
/* -- Helper functions ---------------------------------------------------- */
-/* Unsigned x^k. */
-static double lj_vm_powui(double x, uint32_t k)
-{
- double y;
- lj_assertX(k != 0, "pow with zero exponent");
- for (; (k & 1) == 0; k >>= 1) x *= x;
- y = x;
- if ((k >>= 1) != 0) {
- for (;;) {
- x *= x;
- if (k == 1) break;
- if (k & 1) y *= x;
- k >>= 1;
- }
- y *= x;
- }
- return y;
-}
-
-/* Signed x^k. */
-double lj_vm_powi(double x, int32_t k)
-{
- if (k > 1)
- return lj_vm_powui(x, (uint32_t)k);
- else if (k == 1)
- return x;
- else if (k == 0)
- return 1.0;
- else
- return 1.0 / lj_vm_powui(x, (uint32_t)-k);
-}
-
-double lj_vm_pow(double x, double y)
-{
- int32_t k = lj_num2int(y);
- if ((k >= -65536 && k <= 65536) && y == (double)k)
- return lj_vm_powi(x, k);
- else
- return pow(x, y);
-}
-
double lj_vm_foldarith(double x, double y, int op)
{
switch (op) {
@@ -84,7 +44,7 @@ double lj_vm_foldarith(double x, double y, int op)
case IR_MUL - IR_ADD: return x*y; break;
case IR_DIV - IR_ADD: return x/y; break;
case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
- case IR_POW - IR_ADD: return lj_vm_pow(x, y); break;
+ case IR_POW - IR_ADD: return pow(x, y); break;
case IR_NEG - IR_ADD: return -x; break;
case IR_ABS - IR_ADD: return fabs(x); break;
#if LJ_HASJIT
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 636619fd..770c1602 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -1477,11 +1477,11 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|.endmacro
|
- |.macro math_extern2, name, func
+ |.macro math_extern2, func
|.if HFABI
- | .ffunc_dd math_ .. name
+ | .ffunc_dd math_ .. func
|.else
- | .ffunc_nn math_ .. name
+ | .ffunc_nn math_ .. func
|.endif
| .IOS mov RA, BASE
| bl extern func
@@ -1492,9 +1492,6 @@ static void build_subroutines(BuildCtx *ctx)
| b ->fff_restv
|.endif
|.endmacro
- |.macro math_extern2, func
- | math_extern2 func, func
- |.endmacro
|
|.if FPU
| .ffunc_d math_sqrt
@@ -1540,7 +1537,7 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern sinh
| math_extern cosh
| math_extern tanh
- | math_extern2 pow, lj_vm_pow
+ | math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
@@ -3206,7 +3203,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_POW:
| // NYI: (partial) integer arithmetic.
- | ins_arithfp extern, extern lj_vm_pow
+ | ins_arithfp extern, extern pow
break;
case BC_CAT:
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 7ef9ffba..f5f1b5f1 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -1387,14 +1387,11 @@ static void build_subroutines(BuildCtx *ctx)
| b ->fff_resn
|.endmacro
|
- |.macro math_extern2, name, func
- | .ffunc_nn math_ .. name
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
| bl extern func
| b ->fff_resn
|.endmacro
- |.macro math_extern2, func
- | math_extern2 func, func
- |.endmacro
|
|.ffunc_n math_sqrt
| fsqrt d0, d0
@@ -1423,7 +1420,7 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern sinh
| math_extern cosh
| math_extern tanh
- | math_extern2 pow, lj_vm_pow
+ | math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
@@ -2677,7 +2674,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_arithload FARG1, FARG2
| ins_arithfallback ins_arithcheck_num
|.if "fpins" == "fpow"
- | bl extern lj_vm_pow
+ | bl extern pow
|.else
| fpins FARG1, FARG1, FARG2
|.endif
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index cf791f74..34645bf1 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1623,17 +1623,14 @@ static void build_subroutines(BuildCtx *ctx)
|. nop
|.endmacro
|
- |.macro math_extern2, name, func
- | .ffunc_nn math_ .. name
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
|. load_got func
| call_extern
|. nop
| b ->fff_resn
|. nop
|.endmacro
- |.macro math_extern2, func
- | math_extern2 func, func
- |.endmacro
|
|// TODO: Return integer type if result is integer (own sf implementation).
|.macro math_round, func
@@ -1687,7 +1684,7 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern sinh
| math_extern cosh
| math_extern tanh
- | math_extern2 pow, lj_vm_pow
+ | math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
@@ -3692,7 +3689,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sltiu AT, SFARG1HI, LJ_TISNUM
| sltiu TMP0, SFARG2HI, LJ_TISNUM
| and AT, AT, TMP0
- | load_got lj_vm_pow
+ | load_got pow
| beqz AT, ->vmeta_arith
|. addu RA, BASE, RA
|.if FPU
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
index 3b916379..651bc42e 100644
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -1667,17 +1667,14 @@ static void build_subroutines(BuildCtx *ctx)
|. nop
|.endmacro
|
- |.macro math_extern2, name, func
- | .ffunc_nn math_ .. name
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
|. load_got func
| call_extern
|. nop
| b ->fff_resn
|. nop
|.endmacro
- |.macro math_extern2, func
- | math_extern2 func, func
- |.endmacro
|
|// TODO: Return integer type if result is integer (own sf implementation).
|.macro math_round, func
@@ -1731,7 +1728,7 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern sinh
| math_extern cosh
| math_extern tanh
- | math_extern2 pow, lj_vm_pow
+ | math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
@@ -3918,7 +3915,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sltiu TMP0, TMP0, LJ_TISNUM
| sltiu TMP1, TMP1, LJ_TISNUM
| and AT, TMP0, TMP1
- | load_got lj_vm_pow
+ | load_got pow
| beqz AT, ->vmeta_arith
|. daddu RA, BASE, RA
|.if FPU
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index cc4d56d7..3cad37d2 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -2012,14 +2012,11 @@ static void build_subroutines(BuildCtx *ctx)
| b ->fff_resn
|.endmacro
|
- |.macro math_extern2, name, func
- | .ffunc_nn math_ .. name
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
| blex func
| b ->fff_resn
|.endmacro
- |.macro math_extern2, func
- | math_extern2 func, func
- |.endmacro
|
|.macro math_round, func
| .ffunc_1 math_ .. func
@@ -2144,7 +2141,7 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern sinh
| math_extern cosh
| math_extern tanh
- | math_extern2 pow, lj_vm_pow
+ | math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
@@ -4142,7 +4139,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vv
- | blex lj_vm_pow
+ | blex pow
| ins_next1
|.if FPU
| stfdx FARG1, BASE, RA
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index 4aa8589c..8dd48b84 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -1755,16 +1755,13 @@ static void build_subroutines(BuildCtx *ctx)
| jmp ->fff_resxmm0
|.endmacro
|
- |.macro math_extern2, name, func
- | .ffunc_nn math_ .. name
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
| mov RB, BASE
| call extern func
| mov BASE, RB
| jmp ->fff_resxmm0
|.endmacro
- |.macro math_extern2, func
- | math_extern2 func, func
- |.endmacro
|
| math_extern log10
| math_extern exp
@@ -1777,7 +1774,7 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern sinh
| math_extern cosh
| math_extern tanh
- | math_extern2 pow, lj_vm_pow
+ | math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 36af852d..de12ac64 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -2138,8 +2138,8 @@ static void build_subroutines(BuildCtx *ctx)
| jmp ->fff_resfp
|.endmacro
|
- |.macro math_extern2, name, func
- | .ffunc_nnsse math_ .. name
+ |.macro math_extern2, func
+ | .ffunc_nnsse math_ .. func
|.if not X64
| movsd FPARG1, xmm0
| movsd FPARG3, xmm1
@@ -2149,9 +2149,6 @@ static void build_subroutines(BuildCtx *ctx)
| mov BASE, RB
| jmp ->fff_resfp
|.endmacro
- |.macro math_extern2, func
- | math_extern2 func, func
- |.endmacro
|
| math_extern log10
| math_extern exp
@@ -2164,7 +2161,7 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern sinh
| math_extern cosh
| math_extern tanh
- | math_extern2 pow, lj_vm_pow
+ | math_extern2 pow
| math_extern2 atan2
| math_extern2 fmod
|
@@ -3922,7 +3919,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| movsd FPARG1, xmm0
| movsd FPARG3, xmm1
|.endif
- | call extern lj_vm_pow
+ | call extern pow
| movzx RA, PC_RA
| mov BASE, RB
|.if X64