diff options
author | Michal Terepeta <michal.terepeta@gmail.com> | 2015-07-20 15:43:31 +0200 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2015-07-20 16:43:34 +0200 |
commit | 82ffc80df573512f788524c4616db3c08fc9f125 (patch) | |
tree | bf35684068e0376bb30013d22288a81966427965 | |
parent | 029367e5894d4ac3ea3845f39f2cc1b5a6a9fac1 (diff) | |
download | haskell-82ffc80df573512f788524c4616db3c08fc9f125.tar.gz |
LlvmCodeGen: add support for MO_U_Mul2 CallishMachOp
This adds support MO_U_Mul2 to the LLVM backend by simply using 'mul'
instruction but operating at twice the bit width (e.g., for 64 bit
words we will generate mul that operates on 128 bits and then extract
the two 64 bit values for the result of the CallishMachOp).
Test Plan: validate
Reviewers: rwbarton, austin, bgamari
Reviewed By: bgamari
Subscribers: thomie
Differential Revision: https://phabricator.haskell.org/D1068
GHC Trac Issues: #9430
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 3 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 34 | ||||
-rw-r--r-- | testsuite/tests/primops/should_run/T9430.hs | 18 |
3 files changed, 54 insertions, 1 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 563f6dcc4a..243e2a32ac 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -823,7 +823,8 @@ callishPrimOpSupported dflags op || llvm -> Left (MO_SubIntC (wordWidth dflags)) | otherwise -> Right genericIntSubCOp - WordMul2Op | ncg && x86ish -> Left (MO_U_Mul2 (wordWidth dflags)) + WordMul2Op | ncg && x86ish + || llvm -> Left (MO_U_Mul2 (wordWidth dflags)) | otherwise -> Right genericWordMul2Op _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op) diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 15350bca7d..fb02120747 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -256,6 +256,38 @@ genCall t@(PrimTarget op) [] args `appOL` stmts4 `snocOL` call return (stmts, top1 ++ top2) +-- We handle MO_U_Mul2 by simply using a 'mul' instruction, but with operands +-- twice the width (we first zero-extend them), e.g., on 64-bit arch we will +-- generate 'mul' on 128-bit operands. Then we only need some plumbing to +-- extract the two 64-bit values out of 128-bit result. +genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = do + let width = widthToLlvmInt w + bitWidth = widthInBits w + width2x = LMInt (bitWidth * 2) + -- First zero-extend the operands ('mul' instruction requires the operands + -- and the result to be of the same type). Note that we don't use 'castVars' + -- because it tries to do LM_Sext. + (lhsVar, stmts1, decls1) <- exprToVar lhs + (rhsVar, stmts2, decls2) <- exprToVar rhs + (lhsExt, stmt3) <- doExpr width2x $ Cast LM_Zext lhsVar width2x + (rhsExt, stmt4) <- doExpr width2x $ Cast LM_Zext rhsVar width2x + -- Do the actual multiplication (note that the result is also 2x width). + (retV, stmt5) <- doExpr width2x $ LlvmOp LM_MO_Mul lhsExt rhsExt + -- Extract the lower bits of the result into retL. + (retL, stmt6) <- doExpr width $ Cast LM_Trunc retV width + -- Now we right-shift the higher bits by width. + let widthLlvmLit = LMLitVar $ LMIntLit (fromIntegral bitWidth) width + (retShifted, stmt7) <- doExpr width2x $ LlvmOp LM_MO_LShr retV widthLlvmLit + -- And extract them into retH. + (retH, stmt8) <- doExpr width $ Cast LM_Trunc retShifted width + dstRegL <- getCmmReg (CmmLocal dstL) + dstRegH <- getCmmReg (CmmLocal dstH) + let storeL = Store retL dstRegL + storeH = Store retH dstRegH + stmts = stmts1 `appOL` stmts2 `appOL` + toOL [ stmt3 , stmt4, stmt5, stmt6, stmt7, stmt8, storeL, storeH ] + return (stmts, decls1 ++ decls2) + -- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from -- which we need to extract the actual values. genCall t@(PrimTarget (MO_AddIntC w)) [dstV, dstO] [lhs, rhs] = @@ -621,6 +653,8 @@ cmmPrimOpFunctions mop = do MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported + -- We support MO_U_Mul2 through ordinary LLVM mul instruction, see the + -- appropriate case of genCall. MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported diff --git a/testsuite/tests/primops/should_run/T9430.hs b/testsuite/tests/primops/should_run/T9430.hs index 571b6db37d..aec2d264a1 100644 --- a/testsuite/tests/primops/should_run/T9430.hs +++ b/testsuite/tests/primops/should_run/T9430.hs @@ -73,3 +73,21 @@ main = do checkW (1, minBound + 1) plusWord2# maxBound 2 check "plusWord2# 2 maxBound" $ checkW (1, minBound + 1) plusWord2# 2 maxBound + + check "timesWord2# maxBound 0" $ checkW (0, 0) timesWord2# maxBound 0 + check "timesWord2# 0 maxBound" $ checkW (0, 0) timesWord2# 0 maxBound + check "timesWord2# maxBound 1" $ checkW (0, maxBound) timesWord2# maxBound 1 + check "timesWord2# 1 maxBound" $ checkW (0, maxBound) timesWord2# 1 maxBound + -- Overflows + check "timesWord2# " $ checkW (1, 0) timesWord2# (2 ^ 63) 2 + check "timesWord2# " $ checkW (2, 0) timesWord2# (2 ^ 63) (2 ^ 2) + check "timesWord2# " $ checkW (4, 0) timesWord2# (2 ^ 63) (2 ^ 3) + check "timesWord2# " $ checkW (8, 0) timesWord2# (2 ^ 63) (2 ^ 4) + check "timesWord2# maxBound 2" $ + checkW (1, maxBound - 1) timesWord2# maxBound 2 + check "timesWord2# 2 maxBound" $ + checkW (1, maxBound - 1) timesWord2# 2 maxBound + check "timesWord2# maxBound 3" $ + checkW (2, maxBound - 2) timesWord2# maxBound 3 + check "timesWord2# 3 maxBound" $ + checkW (2, maxBound - 2) timesWord2# 3 maxBound |