summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/codeGen/StgCmmPrim.hs3
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs34
-rw-r--r--testsuite/tests/primops/should_run/T9430.hs18
3 files changed, 54 insertions, 1 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 563f6dcc4a..243e2a32ac 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -823,7 +823,8 @@ callishPrimOpSupported dflags op
|| llvm -> Left (MO_SubIntC (wordWidth dflags))
| otherwise -> Right genericIntSubCOp
- WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags))
+ WordMul2Op | ncg && x86ish
+ || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
| otherwise -> Right genericWordMul2Op
_ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 15350bca7d..fb02120747 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -256,6 +256,38 @@ genCall t@(PrimTarget op) [] args
`appOL` stmts4 `snocOL` call
return (stmts, top1 ++ top2)
+-- We handle MO_U_Mul2 by simply using a 'mul' instruction, but with operands
+-- twice the width (we first zero-extend them), e.g., on 64-bit arch we will
+-- generate 'mul' on 128-bit operands. Then we only need some plumbing to
+-- extract the two 64-bit values out of 128-bit result.
+genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = do
+ let width = widthToLlvmInt w
+ bitWidth = widthInBits w
+ width2x = LMInt (bitWidth * 2)
+ -- First zero-extend the operands ('mul' instruction requires the operands
+ -- and the result to be of the same type). Note that we don't use 'castVars'
+ -- because it tries to do LM_Sext.
+ (lhsVar, stmts1, decls1) <- exprToVar lhs
+ (rhsVar, stmts2, decls2) <- exprToVar rhs
+ (lhsExt, stmt3) <- doExpr width2x $ Cast LM_Zext lhsVar width2x
+ (rhsExt, stmt4) <- doExpr width2x $ Cast LM_Zext rhsVar width2x
+ -- Do the actual multiplication (note that the result is also 2x width).
+ (retV, stmt5) <- doExpr width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt
+ -- Extract the lower bits of the result into retL.
+ (retL, stmt6) <- doExpr width $ Cast LM_Trunc retV width
+ -- Now we right-shift the higher bits by width.
+ let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width
+ (retShifted, stmt7) <- doExpr width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit
+ -- And extract them into retH.
+ (retH, stmt8) <- doExpr width $ Cast LM_Trunc retShifted width
+ dstRegL <- getCmmReg (CmmLocal dstL)
+ dstRegH <- getCmmReg (CmmLocal dstH)
+ let storeL = Store retL dstRegL
+ storeH = Store retH dstRegH
+ stmts = stmts1 `appOL` stmts2 `appOL`
+ toOL [ stmt3 , stmt4, stmt5, stmt6, stmt7, stmt8, storeL, storeH ]
+ return (stmts, decls1 ++ decls2)
+
-- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from
-- which we need to extract the actual values.
genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] =
@@ -621,6 +653,8 @@ cmmPrimOpFunctions mop = do
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
+ -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the
+ -- appropriate case of genCall.
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
diff --git a/testsuite/tests/primops/should_run/T9430.hs b/testsuite/tests/primops/should_run/T9430.hs
index 571b6db37d..aec2d264a1 100644
--- a/testsuite/tests/primops/should_run/T9430.hs
+++ b/testsuite/tests/primops/should_run/T9430.hs
@@ -73,3 +73,21 @@ main = do
checkW (1, minBound + 1) plusWord2# maxBound 2
check "plusWord2# 2 maxBound" $
checkW (1, minBound + 1) plusWord2# 2 maxBound
+
+ check "timesWord2# maxBound 0" $ checkW (0, 0) timesWord2# maxBound 0
+ check "timesWord2# 0 maxBound" $ checkW (0, 0) timesWord2# 0 maxBound
+ check "timesWord2# maxBound 1" $ checkW (0, maxBound) timesWord2# maxBound 1
+ check "timesWord2# 1 maxBound" $ checkW (0, maxBound) timesWord2# 1 maxBound
+ -- Overflows
+ check "timesWord2# " $ checkW (1, 0) timesWord2# (2 ^ 63) 2
+ check "timesWord2# " $ checkW (2, 0) timesWord2# (2 ^ 63) (2 ^ 2)
+ check "timesWord2# " $ checkW (4, 0) timesWord2# (2 ^ 63) (2 ^ 3)
+ check "timesWord2# " $ checkW (8, 0) timesWord2# (2 ^ 63) (2 ^ 4)
+ check "timesWord2# maxBound 2" $
+ checkW (1, maxBound - 1) timesWord2# maxBound 2
+ check "timesWord2# 2 maxBound" $
+ checkW (1, maxBound - 1) timesWord2# 2 maxBound
+ check "timesWord2# maxBound 3" $
+ checkW (2, maxBound - 2) timesWord2# maxBound 3
+ check "timesWord2# 3 maxBound" $
+ checkW (2, maxBound - 2) timesWord2# 3 maxBound