summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs32
-rw-r--r--compiler/cmm/CmmMachOp.hs1
-rw-r--r--compiler/cmm/PprC.hs1
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs1
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs1
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs1
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs21
-rw-r--r--compiler/prelude/primops.txt.pp7
-rw-r--r--libraries/ghc-prim/changelog.md10
-rw-r--r--testsuite/tests/codeGen/should_run/all.T1
-rw-r--r--testsuite/tests/codeGen/should_run/cgrun079.hs98
11 files changed, 174 insertions, 0 deletions
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index e309d061a8..3728c0cac2 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -1426,10 +1426,17 @@ dispatchPrimop dflags = \case
if ncg && (x86ish || ppc) || llvm
then Left (MO_U_Mul2 (wordWidth dflags))
else Right genericWordMul2Op
+
+ IntMul2Op -> \_ -> OpDest_CallishHandledLater $
+ if ncg && x86ish
+ then Left (MO_S_Mul2 (wordWidth dflags))
+ else Right genericIntMul2Op
+
FloatFabsOp -> \_ -> OpDest_CallishHandledLater $
if (ncg && x86ish || ppc) || llvm
then Left MO_F32_Fabs
else Right $ genericFabsOp W32
+
DoubleFabsOp -> \_ -> OpDest_CallishHandledLater $
if (ncg && x86ish || ppc) || llvm
then Left MO_F64_Fabs
@@ -1870,6 +1877,31 @@ genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
topHalf (CmmReg r)])]
genericWordMul2Op _ _ = panic "genericWordMul2Op"
+genericIntMul2Op :: GenericOp
+genericIntMul2Op [res_c, res_h, res_l] [arg_x, arg_y]
+ = do dflags <- getDynFlags
+ -- Implement algorithm from Hacker's Delight, 2nd edition, p.174
+ let t = cmmExprType dflags arg_x
+ p <- newTemp t
+ -- 1) compute the multiplication as if numbers were unsigned
+ let wordMul2 = fromMaybe (panic "Unsupported out-of-line WordMul2Op")
+ (emitPrimOp dflags WordMul2Op [arg_x,arg_y])
+ wordMul2 [p,res_l]
+ -- 2) correct the high bits of the unsigned result
+ let carryFill x = CmmMachOp (MO_S_Shr ww) [x, wwm1]
+ sub x y = CmmMachOp (MO_Sub ww) [x, y]
+ and x y = CmmMachOp (MO_And ww) [x, y]
+ neq x y = CmmMachOp (MO_Ne ww) [x, y]
+ f x y = (carryFill x) `and` y
+ wwm1 = CmmLit (CmmInt (fromIntegral (widthInBits ww - 1)) ww)
+ rl x = CmmReg (CmmLocal x)
+ ww = wordWidth dflags
+ emit $ catAGraphs
+ [ mkAssign (CmmLocal res_h) (rl p `sub` f arg_x arg_y `sub` f arg_y arg_x)
+ , mkAssign (CmmLocal res_c) (rl res_h `neq` carryFill (rl res_l))
+ ]
+genericIntMul2Op _ _ = panic "genericIntMul2Op"
+
-- This replicates what we had in libraries/base/GHC/Float.hs:
--
-- abs x | x == 0 = 0 -- handles (-0.0)
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 9740d21bef..f8b7d4fb74 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -583,6 +583,7 @@ data CallishMachOp
| MO_UF_Conv Width
+ | MO_S_Mul2 Width
| MO_S_QuotRem Width
| MO_U_QuotRem Width
| MO_U_QuotRem2 Width
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 506116c584..d94bc01e03 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -825,6 +825,7 @@ pprCallishMachOp_for_C mop
(MO_AtomicWrite w) -> ptext (sLit $ atomicWriteLabel w)
(MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w)
+ MO_S_Mul2 {} -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index f86207e081..c8d88a8c2a 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -833,6 +833,7 @@ cmmPrimOpFunctions mop = do
MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow."
++ showSDoc dflags (ppr $ widthToLlvmInt w)
+ MO_S_Mul2 {} -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 11759fb1a5..05883d0e5a 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -2021,6 +2021,7 @@ genCCall' dflags gcp target dest_regs args
MO_AtomicRead _ -> unsupported
MO_AtomicWrite _ -> unsupported
+ MO_S_Mul2 {} -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index e24180e535..46b29d0a03 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -681,6 +681,7 @@ outOfLineMachOp_table mop
MO_AtomicRead w -> fsLit $ atomicReadLabel w
MO_AtomicWrite w -> fsLit $ atomicWriteLabel w
+ MO_S_Mul2 {} -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 1807bdcea1..59a1e4115b 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -2613,6 +2613,26 @@ genCCall' _ is32Bit target dest_regs args bid = do
MOV format (OpReg rax) (OpReg reg_l)]
return code
_ -> panic "genCCall: Wrong number of arguments/results for mul2"
+ (PrimTarget (MO_S_Mul2 width), [res_c, res_h, res_l]) ->
+ case args of
+ [arg_x, arg_y] ->
+ do (y_reg, y_code) <- getRegOrMem arg_y
+ x_code <- getAnyReg arg_x
+ reg_tmp <- getNewRegNat II8
+ let format = intFormat width
+ reg_h = getRegisterReg platform (CmmLocal res_h)
+ reg_l = getRegisterReg platform (CmmLocal res_l)
+ reg_c = getRegisterReg platform (CmmLocal res_c)
+ code = y_code `appOL`
+ x_code rax `appOL`
+ toOL [ IMUL2 format y_reg
+ , MOV format (OpReg rdx) (OpReg reg_h)
+ , MOV format (OpReg rax) (OpReg reg_l)
+ , SETCC CARRY (OpReg reg_tmp)
+ , MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
+ ]
+ return code
+ _ -> panic "genCCall: Wrong number of arguments/results for imul2"
_ -> if is32Bit
then genCCall32' dflags target dest_regs args
@@ -3204,6 +3224,7 @@ outOfLineCmmOp bid mop res args
MO_UF_Conv _ -> unsupported
+ MO_S_Mul2 {} -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 076854b4d2..0faf180061 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -251,6 +251,13 @@ primop IntMulOp "*#"
with commutable = True
fixity = infixl 7
+primop IntMul2Op "timesInt2#" GenPrimOp
+ Int# -> Int# -> (# Int#, Int#, Int# #)
+ {Return a triple (isHighNeeded,high,low) where high and low are respectively
+ the high and low bits of the double-word result. isHighNeeded is a cheap way
+ to test if the high word is a sign-extension of the low word (isHighNeeded =
+ 0#) or not (isHighNeeded = 1#).}
+
primop IntMulMayOfloOp "mulIntMayOflo#"
Dyadic Int# -> Int# -> Int#
{Return non-zero if there is any possibility that the upper word of a
diff --git a/libraries/ghc-prim/changelog.md b/libraries/ghc-prim/changelog.md
index 411d118aa1..cf14d21c81 100644
--- a/libraries/ghc-prim/changelog.md
+++ b/libraries/ghc-prim/changelog.md
@@ -27,6 +27,16 @@
reverses the order of its bits e.g. `0b110001` becomes `0b100011`.
These primitives use optimized machine instructions when available.
+- Add Int# multiplication primop:
+
+ timesInt2# :: Int# -> Int# -> (# Int#, Int#, Int# #)
+
+ `timesInt2#` computes the multiplication of its two parameters and returns a
+ triple (isHighNeeded,high,low) where high and low are respectively the high
+ and low bits of the double-word result. isHighNeeded is a cheap way to test
+ if the high word is a sign-extension of the low word (isHighNeeded = 0#) or
+ not (isHighNeeded = 1#).
+
## 0.6.0
- Shipped with GHC 8.8.1
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index f96820de81..01516136c2 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -88,6 +88,7 @@ test('cgrun075', normal, compile_and_run, [''])
test('cgrun076', normal, compile_and_run, [''])
test('cgrun077', [when(have_cpu_feature('bmi2'), extra_hc_opts('-mbmi2'))], compile_and_run, [''])
test('cgrun078', omit_ways(['ghci']), compile_and_run, [''])
+test('cgrun079', normal, compile_and_run, [''])
test('T1852', normal, compile_and_run, [''])
test('T1861', extra_run_opts('0'), compile_and_run, [''])
diff --git a/testsuite/tests/codeGen/should_run/cgrun079.hs b/testsuite/tests/codeGen/should_run/cgrun079.hs
new file mode 100644
index 0000000000..e299c860c3
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun079.hs
@@ -0,0 +1,98 @@
+{-# LANGUAGE CPP, MagicHash, BlockArguments, UnboxedTuples #-}
+
+-- Tests for the timesInt2# primop
+module Main ( main ) where
+
+import Data.Bits
+import GHC.Int
+import GHC.Prim
+import GHC.Word
+import Control.Monad
+
+#include "MachDeps.h"
+
+
+imul2 :: Int -> Int -> (Int,Int,Int)
+imul2 (I# x) (I# y) = case timesInt2# x y of
+ (# c, h, l #) -> (I# c, I# h, I# l)
+
+checkImul2 :: Int -> Int -> IO ()
+checkImul2 x y = do
+ -- First we compare against Integer result. Note that this test will become
+ -- moot when Integer implementation will use this primitive
+ let
+ w2 = fromIntegral x * (fromIntegral y :: Integer)
+ (c,h,l) = imul2 x y
+ w = case c of
+ 0 -> fromIntegral l
+ _ -> int2ToInteger h l
+
+ unless (w == w2) do
+ putStrLn $ mconcat
+ [ "Failed: "
+ , show x
+ , " * "
+ , show y
+ , "\n Got: "
+ , show w
+ , "\n Expected: "
+ , show w2
+ ]
+
+ -- Now we compare with a generic version using unsigned multiply.
+ -- This reimplements the fallback generic version that the compiler uses when
+ -- the mach-op isn't available so it'd better be correct too.
+ let (c',h',l') = genericIMul2 x y
+
+ unless ((c,h,l) == (c',h',l')) do
+ putStrLn $ mconcat
+ [ "Failed: "
+ , show x
+ , " * "
+ , show y
+ , "\n Got: "
+ , show (c,h,l)
+ , "\n Expected: "
+ , show (c',h',l')
+ ]
+
+addWordC :: Word -> Word -> (Word,Word)
+addWordC (W# x) (W# y) = case addWordC# x y of
+ (# l,c #) -> (W# (int2Word# c), W# l)
+
+int2ToInteger :: Int -> Int -> Integer
+int2ToInteger h l
+ | h < 0 = case addWordC (complement (fromIntegral l)) 1 of
+ (c,w) -> -1 * word2ToInteger (c + complement (fromIntegral h)) w
+ | otherwise = word2ToInteger (fromIntegral h) (fromIntegral l)
+ where
+ word2ToInteger :: Word -> Word -> Integer
+ word2ToInteger x y = (fromIntegral x) `shiftL` WORD_SIZE_IN_BITS + fromIntegral y
+
+timesWord2 :: Word -> Word -> (Int,Int)
+timesWord2 (W# x) (W# y) = case timesWord2# x y of
+ (# h, l #) -> (I# (word2Int# h), I# (word2Int# l))
+
+genericIMul2 :: Int -> Int -> (Int,Int,Int)
+genericIMul2 x y = (c,h,l)
+ where
+ (p,l) = timesWord2 (fromIntegral x) (fromIntegral y)
+ h = p - f x y - f y x
+ c = if h == carryFill l then 0 else 1
+ f u v = carryFill u .&. v
+
+ -- Return either 00..00 or FF..FF depending on the carry
+ carryFill :: Int -> Int
+ carryFill x = x `shiftR` (WORD_SIZE_IN_BITS - 1)
+
+
+main = do
+ checkImul2 10 10
+ checkImul2 10 (-10)
+ checkImul2 minBound (-1)
+ checkImul2 maxBound (-1)
+ checkImul2 minBound 0
+ checkImul2 maxBound 0
+ checkImul2 minBound minBound
+ checkImul2 minBound maxBound
+ checkImul2 maxBound maxBound