diff options
-rw-r--r-- | compiler/GHC/StgToCmm/Prim.hs | 32 | ||||
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 1 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 1 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 21 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 7 | ||||
-rw-r--r-- | libraries/ghc-prim/changelog.md | 10 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/all.T | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/cgrun079.hs | 98 |
11 files changed, 174 insertions, 0 deletions
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index e309d061a8..3728c0cac2 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -1426,10 +1426,17 @@ dispatchPrimop dflags = \case if ncg && (x86ish || ppc) || llvm then Left (MO_U_Mul2 (wordWidth dflags)) else Right genericWordMul2Op + + IntMul2Op -> \_ -> OpDest_CallishHandledLater $ + if ncg && x86ish + then Left (MO_S_Mul2 (wordWidth dflags)) + else Right genericIntMul2Op + FloatFabsOp -> \_ -> OpDest_CallishHandledLater $ if (ncg && x86ish || ppc) || llvm then Left MO_F32_Fabs else Right $ genericFabsOp W32 + DoubleFabsOp -> \_ -> OpDest_CallishHandledLater $ if (ncg && x86ish || ppc) || llvm then Left MO_F64_Fabs @@ -1870,6 +1877,31 @@ genericWordMul2Op [res_h, res_l] [arg_x, arg_y] topHalf (CmmReg r)])] genericWordMul2Op _ _ = panic "genericWordMul2Op" +genericIntMul2Op :: GenericOp +genericIntMul2Op [res_c, res_h, res_l] [arg_x, arg_y] + = do dflags <- getDynFlags + -- Implement algorithm from Hacker's Delight, 2nd edition, p.174 + let t = cmmExprType dflags arg_x + p <- newTemp t + -- 1) compute the multiplication as if numbers were unsigned + let wordMul2 = fromMaybe (panic "Unsupported out-of-line WordMul2Op") + (emitPrimOp dflags WordMul2Op [arg_x,arg_y]) + wordMul2 [p,res_l] + -- 2) correct the high bits of the unsigned result + let carryFill x = CmmMachOp (MO_S_Shr ww) [x, wwm1] + sub x y = CmmMachOp (MO_Sub ww) [x, y] + and x y = CmmMachOp (MO_And ww) [x, y] + neq x y = CmmMachOp (MO_Ne ww) [x, y] + f x y = (carryFill x) `and` y + wwm1 = CmmLit (CmmInt (fromIntegral (widthInBits ww - 1)) ww) + rl x = CmmReg (CmmLocal x) + ww = wordWidth dflags + emit $ catAGraphs + [ mkAssign (CmmLocal res_h) (rl p `sub` f arg_x arg_y `sub` f arg_y arg_x) + , mkAssign (CmmLocal res_c) (rl res_h `neq` carryFill (rl res_l)) + ] +genericIntMul2Op _ _ = panic "genericIntMul2Op" + -- This replicates what we had in libraries/base/GHC/Float.hs: -- -- abs x | x == 0 = 0 -- handles (-0.0) diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 9740d21bef..f8b7d4fb74 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -583,6 +583,7 @@ data CallishMachOp | MO_UF_Conv Width + | MO_S_Mul2 Width | MO_S_QuotRem Width | MO_U_QuotRem Width | MO_U_QuotRem2 Width diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 506116c584..d94bc01e03 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -825,6 +825,7 @@ pprCallishMachOp_for_C mop (MO_AtomicWrite w) -> ptext (sLit $ atomicWriteLabel w) (MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w) + MO_S_Mul2 {} -> unsupported MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index f86207e081..c8d88a8c2a 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -833,6 +833,7 @@ cmmPrimOpFunctions mop = do MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow." ++ showSDoc dflags (ppr $ widthToLlvmInt w) + MO_S_Mul2 {} -> unsupported MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 11759fb1a5..05883d0e5a 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -2021,6 +2021,7 @@ genCCall' dflags gcp target dest_regs args MO_AtomicRead _ -> unsupported MO_AtomicWrite _ -> unsupported + MO_S_Mul2 {} -> unsupported MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index e24180e535..46b29d0a03 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -681,6 +681,7 @@ outOfLineMachOp_table mop MO_AtomicRead w -> fsLit $ atomicReadLabel w MO_AtomicWrite w -> fsLit $ atomicWriteLabel w + MO_S_Mul2 {} -> unsupported MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 1807bdcea1..59a1e4115b 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -2613,6 +2613,26 @@ genCCall' _ is32Bit target dest_regs args bid = do MOV format (OpReg rax) (OpReg reg_l)] return code _ -> panic "genCCall: Wrong number of arguments/results for mul2" + (PrimTarget (MO_S_Mul2 width), [res_c, res_h, res_l]) -> + case args of + [arg_x, arg_y] -> + do (y_reg, y_code) <- getRegOrMem arg_y + x_code <- getAnyReg arg_x + reg_tmp <- getNewRegNat II8 + let format = intFormat width + reg_h = getRegisterReg platform (CmmLocal res_h) + reg_l = getRegisterReg platform (CmmLocal res_l) + reg_c = getRegisterReg platform (CmmLocal res_c) + code = y_code `appOL` + x_code rax `appOL` + toOL [ IMUL2 format y_reg + , MOV format (OpReg rdx) (OpReg reg_h) + , MOV format (OpReg rax) (OpReg reg_l) + , SETCC CARRY (OpReg reg_tmp) + , MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c) + ] + return code + _ -> panic "genCCall: Wrong number of arguments/results for imul2" _ -> if is32Bit then genCCall32' dflags target dest_regs args @@ -3204,6 +3224,7 @@ outOfLineCmmOp bid mop res args MO_UF_Conv _ -> unsupported + MO_S_Mul2 {} -> unsupported MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 076854b4d2..0faf180061 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -251,6 +251,13 @@ primop IntMulOp "*#" with commutable = True fixity = infixl 7 +primop IntMul2Op "timesInt2#" GenPrimOp + Int# -> Int# -> (# Int#, Int#, Int# #) + {Return a triple (isHighNeeded,high,low) where high and low are respectively + the high and low bits of the double-word result. isHighNeeded is a cheap way + to test if the high word is a sign-extension of the low word (isHighNeeded = + 0#) or not (isHighNeeded = 1#).} + primop IntMulMayOfloOp "mulIntMayOflo#" Dyadic Int# -> Int# -> Int# {Return non-zero if there is any possibility that the upper word of a diff --git a/libraries/ghc-prim/changelog.md b/libraries/ghc-prim/changelog.md index 411d118aa1..cf14d21c81 100644 --- a/libraries/ghc-prim/changelog.md +++ b/libraries/ghc-prim/changelog.md @@ -27,6 +27,16 @@ reverses the order of its bits e.g. `0b110001` becomes `0b100011`. These primitives use optimized machine instructions when available. +- Add Int# multiplication primop: + + timesInt2# :: Int# -> Int# -> (# Int#, Int#, Int# #) + + `timesInt2#` computes the multiplication of its two parameters and returns a + triple (isHighNeeded,high,low) where high and low are respectively the high + and low bits of the double-word result. isHighNeeded is a cheap way to test + if the high word is a sign-extension of the low word (isHighNeeded = 0#) or + not (isHighNeeded = 1#). + ## 0.6.0 - Shipped with GHC 8.8.1 diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index f96820de81..01516136c2 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -88,6 +88,7 @@ test('cgrun075', normal, compile_and_run, ['']) test('cgrun076', normal, compile_and_run, ['']) test('cgrun077', [when(have_cpu_feature('bmi2'), extra_hc_opts('-mbmi2'))], compile_and_run, ['']) test('cgrun078', omit_ways(['ghci']), compile_and_run, ['']) +test('cgrun079', normal, compile_and_run, ['']) test('T1852', normal, compile_and_run, ['']) test('T1861', extra_run_opts('0'), compile_and_run, ['']) diff --git a/testsuite/tests/codeGen/should_run/cgrun079.hs b/testsuite/tests/codeGen/should_run/cgrun079.hs new file mode 100644 index 0000000000..e299c860c3 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/cgrun079.hs @@ -0,0 +1,98 @@ +{-# LANGUAGE CPP, MagicHash, BlockArguments, UnboxedTuples #-} + +-- Tests for the timesInt2# primop +module Main ( main ) where + +import Data.Bits +import GHC.Int +import GHC.Prim +import GHC.Word +import Control.Monad + +#include "MachDeps.h" + + +imul2 :: Int -> Int -> (Int,Int,Int) +imul2 (I# x) (I# y) = case timesInt2# x y of + (# c, h, l #) -> (I# c, I# h, I# l) + +checkImul2 :: Int -> Int -> IO () +checkImul2 x y = do + -- First we compare against Integer result. Note that this test will become + -- moot when Integer implementation will use this primitive + let + w2 = fromIntegral x * (fromIntegral y :: Integer) + (c,h,l) = imul2 x y + w = case c of + 0 -> fromIntegral l + _ -> int2ToInteger h l + + unless (w == w2) do + putStrLn $ mconcat + [ "Failed: " + , show x + , " * " + , show y + , "\n Got: " + , show w + , "\n Expected: " + , show w2 + ] + + -- Now we compare with a generic version using unsigned multiply. + -- This reimplements the fallback generic version that the compiler uses when + -- the mach-op isn't available so it'd better be correct too. + let (c',h',l') = genericIMul2 x y + + unless ((c,h,l) == (c',h',l')) do + putStrLn $ mconcat + [ "Failed: " + , show x + , " * " + , show y + , "\n Got: " + , show (c,h,l) + , "\n Expected: " + , show (c',h',l') + ] + +addWordC :: Word -> Word -> (Word,Word) +addWordC (W# x) (W# y) = case addWordC# x y of + (# l,c #) -> (W# (int2Word# c), W# l) + +int2ToInteger :: Int -> Int -> Integer +int2ToInteger h l + | h < 0 = case addWordC (complement (fromIntegral l)) 1 of + (c,w) -> -1 * word2ToInteger (c + complement (fromIntegral h)) w + | otherwise = word2ToInteger (fromIntegral h) (fromIntegral l) + where + word2ToInteger :: Word -> Word -> Integer + word2ToInteger x y = (fromIntegral x) `shiftL` WORD_SIZE_IN_BITS + fromIntegral y + +timesWord2 :: Word -> Word -> (Int,Int) +timesWord2 (W# x) (W# y) = case timesWord2# x y of + (# h, l #) -> (I# (word2Int# h), I# (word2Int# l)) + +genericIMul2 :: Int -> Int -> (Int,Int,Int) +genericIMul2 x y = (c,h,l) + where + (p,l) = timesWord2 (fromIntegral x) (fromIntegral y) + h = p - f x y - f y x + c = if h == carryFill l then 0 else 1 + f u v = carryFill u .&. v + + -- Return either 00..00 or FF..FF depending on the carry + carryFill :: Int -> Int + carryFill x = x `shiftR` (WORD_SIZE_IN_BITS - 1) + + +main = do + checkImul2 10 10 + checkImul2 10 (-10) + checkImul2 minBound (-1) + checkImul2 maxBound (-1) + checkImul2 minBound 0 + checkImul2 maxBound 0 + checkImul2 minBound minBound + checkImul2 minBound maxBound + checkImul2 maxBound maxBound |