diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2023-01-26 18:26:18 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2023-02-24 17:27:45 -0500 |
commit | 4eb9c234886993e569ec43504f3c547c464ece4e (patch) | |
tree | 0c0a6b3a5dc022c247395c61f279f08374e367fa | |
parent | e9e7a00da2c5963a53dbcedaec793400fde07563 (diff) | |
download | haskell-4eb9c234886993e569ec43504f3c547c464ece4e.tar.gz |
JS: make some arithmetic primops faster (#22835)
Don't use BigInt for wordAdd2, mulWord32, and timesInt32.
Co-authored-by: Matthew Craven <5086-clyring@users.noreply.gitlab.haskell.org>
-rw-r--r-- | compiler/GHC/StgToJS/Prim.hs | 4 | ||||
-rw-r--r-- | rts/js/arith.js | 93 | ||||
-rw-r--r-- | testsuite/tests/numeric/should_run/all.T | 6 |
3 files changed, 69 insertions, 34 deletions
diff --git a/compiler/GHC/StgToJS/Prim.hs b/compiler/GHC/StgToJS/Prim.hs index 6085b110cf..8687b1db72 100644 --- a/compiler/GHC/StgToJS/Prim.hs +++ b/compiler/GHC/StgToJS/Prim.hs @@ -59,7 +59,7 @@ genPrim prof bound ty op = case op of IntAddOp -> \[r] [x,y] -> PrimInline $ r |= toI32 (Add x y) IntSubOp -> \[r] [x,y] -> PrimInline $ r |= toI32 (Sub x y) - IntMulOp -> \[r] [x,y] -> PrimInline $ r |= app "h$mulInt32" [x, y] + IntMulOp -> \[r] [x,y] -> PrimInline $ r |= app "Math.imul" [x, y] IntMul2Op -> \[c,hr,lr] [x,y] -> PrimInline $ appT [c,hr,lr] "h$hs_timesInt2" [x, y] IntMulMayOfloOp -> \[r] [x,y] -> PrimInline $ jVar \tmp -> mconcat [ tmp |= Mul x y @@ -374,7 +374,7 @@ genPrim prof bound ty op = case op of ] WordAdd2Op -> \[h,l] [x,y] -> PrimInline $ appT [h,l] "h$wordAdd2" [x,y] WordSubOp -> \ [r] [x,y] -> PrimInline $ r |= toU32 (Sub x y) - WordMulOp -> \ [r] [x,y] -> PrimInline $ r |= app "h$mulWord32" [x, y] + WordMulOp -> \ [r] [x,y] -> PrimInline $ r |= toU32 (app "Math.imul" [x, y]) WordMul2Op -> \[h,l] [x,y] -> PrimInline $ appT [h,l] "h$mul2Word32" [x,y] WordQuotOp -> \ [q] [x,y] -> PrimInline $ q |= app "h$quotWord32" [x,y] WordRemOp -> \ [r] [x,y] -> PrimInline $ r |= app "h$remWord32" [x,y] diff --git a/rts/js/arith.js b/rts/js/arith.js index 66649c31a9..e5aaaa1e12 100644 --- a/rts/js/arith.js +++ b/rts/js/arith.js @@ -203,38 +203,73 @@ function h$hs_uncheckedShiftRLInt64(h,l,n) { RETURN_UBX_TUP2(rh,rl); } -var h$mulInt32 = Math.imul; - // Compute product of two Ints. Returns (nh,ch,cl) // where (ch,cl) are the two parts of the 64-bit result // and nh is 0 if ch can be safely dropped (i.e. it's a sign-extension of cl). function h$hs_timesInt2(l1,l2) { - var a = I32(l1); - var b = I32(l2); - var r = BigInt.asIntN(64, a * b); - TRACE_ARITH("Int32: " + a + " * " + b + " ==> " + r + " (Int64)") - - var rh = I64h(r); - var rl = I64l(r)|0; - var nh = ((rh === 0 && rl >= 0) || (rh === -1 && rl < 0)) ? 0 : 1; - RETURN_UBX_TUP3(nh, rh, rl); + var ah = l1 >> 16; + var al = l1 & 0xFFFF; + var bh = l2 >> 16; + var bl = l2 & 0xFFFF; + + var r0 = al * bl; + var r1 = r0 >>> 16; + r0 &= 0xFFFF; + + r1 += al * bh; + var r2 = r1 >> 16; + r1 &= 0xFFFF; + + r1 += ah * bl; + r2 += r1 >> 16; + r1 &= 0xFFFF; + + r2 += ah * bh; + var r3 = (r2 >> 16) & 0xFFFF; + r2 &= 0xFFFF; + + const rh = r3 << 16 | r2; + const rl = r1 << 16 | r0; + + var s = rl >> 31; + if (rh === s) { + TRACE_ARITH("Int32: " + l1 + " * " + l2 + " ==> " + rl + " (Int64)") + RETURN_UBX_TUP3(0, s, rl); + } + else { + TRACE_ARITH("Int32: " + l1 + " * " + l2 + " ==> " + rh + " " + rl + " (Int64)") + RETURN_UBX_TUP3(1, rh, rl); + } } -function h$mulWord32(l1,l2) { - var a = W32(l1); - var b = W32(l2); - var r = BigInt.asUintN(32, a * b); - TRACE_ARITH("Word32: " + a + " * " + b + " ==> " + r) - RETURN_W32(r); -} - function h$mul2Word32(l1,l2) { - var a = W32(l1); - var b = W32(l2); - var r = BigInt.asUintN(64, a * b); - TRACE_ARITH("Word32: " + a + " * " + b + " ==> " + r + " (Word64)") - RETURN_W64(r); + var ah = l1 >>> 16; + var al = l1 & 0xFFFF; + var bh = l2 >>> 16; + var bl = l2 & 0xFFFF; + + var r0 = al * bl; + var r1 = r0 >>> 16; + r0 &= 0xFFFF; + + r1 += al * bh; + var r2 = r1 >>> 16; + r1 &= 0xFFFF; + + r1 += ah * bl; + r2 += r1 >>> 16; + r1 &= 0xFFFF; + + r2 += ah * bh; + var r3 = (r2 >>> 16) & 0xFFFF; + r2 &= 0xFFFF; + + const rh = (r3 << 16 | r2) >>> 0; + const rl = (r1 << 16 | r0) >>> 0; + + TRACE_ARITH("Word32: " + l1 + " * " + l2 + " ==> " + rh + " " + rl + " (Word64)") + RETURN_UBX_TUP2(rh,rl); } function h$quotWord32(n,d) { @@ -272,11 +307,11 @@ function h$quotRem2Word32(nh,nl,d) { } function h$wordAdd2(l1,l2) { - var a = W32(l1); - var b = W32(l2); - var r = BigInt.asUintN(64, a + b); - TRACE_ARITH("Word32: " + a + " + " + b + " ==> " + r + " (Word64)") - RETURN_W64(r); + var r = (l1 >>> 1) + (l2 >>> 1) + (1 & l1 & l2); + var h = r >>> 31; + var l = (l1 + l2) >>> 0; + TRACE_ARITH("Word32: " + a + " + " + b + " ==> " + h + " " + l + " (Word64)") + RETURN_UBX_TUP2(h,l); } function h$isDoubleNegativeZero(d) { diff --git a/testsuite/tests/numeric/should_run/all.T b/testsuite/tests/numeric/should_run/all.T index a9d5ef7b12..0833931f77 100644 --- a/testsuite/tests/numeric/should_run/all.T +++ b/testsuite/tests/numeric/should_run/all.T @@ -63,7 +63,7 @@ test('T9407', normal, compile_and_run, ['']) test('T9810', normal, compile_and_run, ['']) test('T10011', normal, compile_and_run, ['']) test('T10962', omit_ways(['ghci']), compile_and_run, ['-O2']) -test('T11702', [unless(arch("javascript"),extra_ways(['optasm']))], compile_and_run, ['']) +test('T11702', [unless(js_arch(),extra_ways(['optasm']))], compile_and_run, ['']) test('T12136', normal, compile_and_run, ['']) test('T15301', normal, compile_and_run, ['-O2']) test('T497', normal, compile_and_run, ['-O']) @@ -79,5 +79,5 @@ test('IntegerToFloat', normal, compile_and_run, ['']) test('T20291', normal, compile_and_run, ['']) test('T22282', normal, compile_and_run, ['']) -test('T22671', js_broken(22835), compile_and_run, ['']) -test('foundation', js_broken(22576), compile_and_run, ['-O -package transformers']) +test('T22671', normal, compile_and_run, ['']) +test('foundation', [when(js_arch(), run_timeout_multiplier(2))], compile_and_run, ['-O -package transformers']) |