summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorSebastian Graf <sebastian.graf@kit.edu>2018-05-05 13:30:32 -0400
committerBen Gamari <ben@smart-cactus.org>2018-05-05 17:06:35 -0400
commit6243bba73d14cbee4219a16d45f57d1b254a6456 (patch)
tree56c4528dd27b7c0634ef3707adb0a47771f62011 /compiler
parent418881f7181cbfa31c44f0794db65bf00916bde2 (diff)
downloadhaskell-6243bba73d14cbee4219a16d45f57d1b254a6456.tar.gz
Add 'addWordC#' PrimOp
This is mostly for congruence with 'subWordC#' and '{add,sub}IntC#'. I found 'plusWord2#' while implementing this, which both lacks documentation and has a slightly different specification than 'addWordC#', which means the generic implementation is unnecessarily complex. While I was at it, I also added lacking meta-information on PrimOps and refactored 'subWordC#'s generic implementation to be branchless. Reviewers: bgamari, simonmar, jrtc27, dfeuer Reviewed By: bgamari, dfeuer Subscribers: dfeuer, thomie, carter Differential Revision: https://phabricator.haskell.org/D4592
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CmmMachOp.hs1
-rw-r--r--compiler/cmm/PprC.hs1
-rw-r--r--compiler/codeGen/StgCmmPrim.hs72
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs6
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs7
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs1
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs3
-rw-r--r--compiler/prelude/primops.txt.pp19
8 files changed, 96 insertions, 14 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 9203911141..15a5827643 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -556,6 +556,7 @@ data CallishMachOp
| MO_U_QuotRem Width
| MO_U_QuotRem2 Width
| MO_Add2 Width
+ | MO_AddWordC Width
| MO_SubWordC Width
| MO_AddIntC Width
| MO_SubIntC Width
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 76e4d4cb94..1e50c8591b 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -803,6 +803,7 @@ pprCallishMachOp_for_C mop
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
MO_Add2 {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 809dc55a58..fe89955285 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -907,6 +907,11 @@ callishPrimOpSupported dflags op
|| llvm -> Left (MO_Add2 (wordWidth dflags))
| otherwise -> Right genericWordAdd2Op
+ WordAddCOp | (ncg && (x86ish
+ || ppc))
+ || llvm -> Left (MO_AddWordC (wordWidth dflags))
+ | otherwise -> Right genericWordAddCOp
+
WordSubCOp | (ncg && (x86ish
|| ppc))
|| llvm -> Left (MO_SubWordC (wordWidth dflags))
@@ -1043,17 +1048,64 @@ genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
(bottomHalf (CmmReg (CmmLocal r1))))]
genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
+-- | Implements branchless recovery of the carry flag @c@ by checking the
+-- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
+--
+-- @
+-- c = a&b | (a|b)&~r
+-- @
+--
+-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
+genericWordAddCOp :: GenericOp
+genericWordAddCOp [res_r, res_c] [aa, bb]
+ = do dflags <- getDynFlags
+ emit $ catAGraphs [
+ mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
+ mkAssign (CmmLocal res_c) $
+ CmmMachOp (mo_wordUShr dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordAnd dflags) [aa,bb],
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordOr dflags) [aa,bb],
+ CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)]
+ ]
+ ],
+ mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
+ ]
+ ]
+genericWordAddCOp _ _ = panic "genericWordAddCOp"
+
+-- | Implements branchless recovery of the carry flag @c@ by checking the
+-- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
+--
+-- @
+-- c = ~a&b | (~a|b)&r
+-- @
+--
+-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
genericWordSubCOp :: GenericOp
-genericWordSubCOp [res_r, res_c] [aa, bb] = do
- dflags <- getDynFlags
- emit $ catAGraphs
- [ -- Put the result into 'res_r'.
- mkAssign (CmmLocal res_r) $
- CmmMachOp (mo_wordSub dflags) [aa, bb]
- -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise.
- , mkAssign (CmmLocal res_c) $
- CmmMachOp (mo_wordUGt dflags) [bb, aa]
- ]
+genericWordSubCOp [res_r, res_c] [aa, bb]
+ = do dflags <- getDynFlags
+ emit $ catAGraphs [
+ mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
+ mkAssign (CmmLocal res_c) $
+ CmmMachOp (mo_wordUShr dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordNot dflags) [aa],
+ bb
+ ],
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordNot dflags) [aa],
+ bb
+ ],
+ CmmReg (CmmLocal res_r)
+ ]
+ ],
+ mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
+ ]
+ ]
genericWordSubCOp _ _ = panic "genericWordSubCOp"
genericIntAddCOp :: GenericOp
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index e812dd445f..9be0876e21 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -377,6 +377,9 @@ genCall t@(PrimTarget (MO_SubIntC w)) [dstV, dstO] [lhs, rhs] =
genCall t@(PrimTarget (MO_Add2 w)) [dstO, dstV] [lhs, rhs] =
genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
+genCall t@(PrimTarget (MO_AddWordC w)) [dstV, dstO] [lhs, rhs] =
+ genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
+
genCall t@(PrimTarget (MO_SubWordC w)) [dstV, dstO] [lhs, rhs] =
genCallWithOverflow t w [dstV, dstO] [lhs, rhs]
@@ -490,6 +493,7 @@ genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do
let valid = op `elem` [ MO_Add2 w
, MO_AddIntC w
, MO_SubIntC w
+ , MO_AddWordC w
, MO_SubWordC w
]
MASSERT(valid)
@@ -800,6 +804,8 @@ cmmPrimOpFunctions mop = do
++ showSDoc dflags (ppr $ widthToLlvmInt w)
MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow."
++ showSDoc dflags (ppr $ widthToLlvmInt w)
+ MO_AddWordC w -> fsLit $ "llvm.usub.with.overflow."
+ ++ showSDoc dflags (ppr $ widthToLlvmInt w)
MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow."
++ showSDoc dflags (ppr $ widthToLlvmInt w)
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 7c345f2328..e53d994c25 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -1333,6 +1333,7 @@ genCCall target dest_regs argsAndHints
PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
argsAndHints
PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
+ PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
dest_regs argsAndHints
@@ -1523,6 +1524,11 @@ genCCall target dest_regs argsAndHints
add2Op _ _ _
= panic "genCCall: Wrong number of arguments/results for add2"
+ addcOp platform [res_r, res_c] [arg_x, arg_y]
+ = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
+ addcOp _ _ _
+ = panic "genCCall: Wrong number of arguments/results for addc"
+
-- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
-- which is 0 for borrow and 1 otherwise. We need 1 and 0
-- so xor with 1.
@@ -2025,6 +2031,7 @@ genCCall' dflags gcp target dest_regs args
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
MO_Add2 {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 6dfd58950e..90d6b0d67b 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -667,6 +667,7 @@ outOfLineMachOp_table mop
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
MO_Add2 {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index a0b0673d27..9dc1053683 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -2229,6 +2229,8 @@ genCCall _ is32Bit target dest_regs args = do
ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
return code
_ -> panic "genCCall: Wrong number of arguments/results for add2"
+ (PrimTarget (MO_AddWordC width), [res_r, res_c]) ->
+ addSubIntC platform ADD_CC (const Nothing) CARRY width res_r res_c args
(PrimTarget (MO_SubWordC width), [res_r, res_c]) ->
addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args
(PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
@@ -2788,6 +2790,7 @@ outOfLineCmmOp mop res args
MO_Add2 {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 4098e80d47..763a2ca37d 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -265,6 +265,7 @@ primop IntAddCOp "addIntC#" GenPrimOp Int# -> Int# -> (# Int#, Int# #)
nonzero if overflow occurred (the sum is either too large
or too small to fit in an {\tt Int#}).}
with code_size = 2
+ commutable = True
primop IntSubCOp "subIntC#" GenPrimOp Int# -> Int# -> (# Int#, Int# #)
{Subtract signed integers reporting overflow.
@@ -328,15 +329,25 @@ primtype Word#
primop WordAddOp "plusWord#" Dyadic Word# -> Word# -> Word#
with commutable = True
+primop WordAddCOp "addWordC#" GenPrimOp Word# -> Word# -> (# Word#, Int# #)
+ {Add unsigned integers reporting overflow.
+ The first element of the pair is the result. The second element is
+ the carry flag, which is nonzero on overflow. See also {\tt plusWord2#}.}
+ with code_size = 2
+ commutable = True
+
primop WordSubCOp "subWordC#" GenPrimOp Word# -> Word# -> (# Word#, Int# #)
{Subtract unsigned integers reporting overflow.
The first element of the pair is the result. The second element is
the carry flag, which is nonzero on overflow.}
+ with code_size = 2
--- Returns (# high, low #) (or equivalently, (# carry, low #))
-primop WordAdd2Op "plusWord2#" GenPrimOp
- Word# -> Word# -> (# Word#, Word# #)
- with commutable = True
+primop WordAdd2Op "plusWord2#" GenPrimOp Word# -> Word# -> (# Word#, Word# #)
+ {Add unsigned integers, with the high part (carry) in the first
+ component of the returned pair and the low part in the second
+ component of the pair. See also {\tt addWordC#}.}
+ with code_size = 2
+ commutable = True
primop WordSubOp "minusWord#" Dyadic Word# -> Word# -> Word#