diff options
author | Sebastian Graf <sebastian.graf@kit.edu> | 2018-05-05 13:30:32 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-05-05 17:06:35 -0400 |
commit | 6243bba73d14cbee4219a16d45f57d1b254a6456 (patch) | |
tree | 56c4528dd27b7c0634ef3707adb0a47771f62011 /compiler | |
parent | 418881f7181cbfa31c44f0794db65bf00916bde2 (diff) | |
download | haskell-6243bba73d14cbee4219a16d45f57d1b254a6456.tar.gz |
Add 'addWordC#' PrimOp
This is mostly for congruence with 'subWordC#' and '{add,sub}IntC#'.
I found 'plusWord2#' while implementing this, which both lacks
documentation and has a slightly different specification than
'addWordC#', which means the generic implementation is unnecessarily
complex.
While I was at it, I also added lacking meta-information on PrimOps
and refactored 'subWordC#'s generic implementation to be branchless.
Reviewers: bgamari, simonmar, jrtc27, dfeuer
Reviewed By: bgamari, dfeuer
Subscribers: dfeuer, thomie, carter
Differential Revision: https://phabricator.haskell.org/D4592
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 1 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 72 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 6 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 7 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 3 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 19 |
8 files changed, 96 insertions, 14 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 9203911141..15a5827643 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -556,6 +556,7 @@ data CallishMachOp | MO_U_QuotRem Width | MO_U_QuotRem2 Width | MO_Add2 Width + | MO_AddWordC Width | MO_SubWordC Width | MO_AddIntC Width | MO_SubIntC Width diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 76e4d4cb94..1e50c8591b 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -803,6 +803,7 @@ pprCallishMachOp_for_C mop MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 809dc55a58..fe89955285 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -907,6 +907,11 @@ callishPrimOpSupported dflags op || llvm -> Left (MO_Add2 (wordWidth dflags)) | otherwise -> Right genericWordAdd2Op + WordAddCOp | (ncg && (x86ish + || ppc)) + || llvm -> Left (MO_AddWordC (wordWidth dflags)) + | otherwise -> Right genericWordAddCOp + WordSubCOp | (ncg && (x86ish || ppc)) || llvm -> Left (MO_SubWordC (wordWidth dflags)) @@ -1043,17 +1048,64 @@ genericWordAdd2Op [res_h, res_l] [arg_x, arg_y] (bottomHalf (CmmReg (CmmLocal r1))))] genericWordAdd2Op _ _ = panic "genericWordAdd2Op" +-- | Implements branchless recovery of the carry flag @c@ by checking the +-- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@: +-- +-- @ +-- c = a&b | (a|b)&~r +-- @ +-- +-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/ +genericWordAddCOp :: GenericOp +genericWordAddCOp [res_r, res_c] [aa, bb] + = do dflags <- getDynFlags + emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp (mo_wordUShr dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordAnd dflags) [aa,bb], + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordOr dflags) [aa,bb], + CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)] + ] + ], + mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) + ] + ] +genericWordAddCOp _ _ = panic "genericWordAddCOp" + +-- | Implements branchless recovery of the carry flag @c@ by checking the +-- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@: +-- +-- @ +-- c = ~a&b | (~a|b)&r +-- @ +-- +-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/ genericWordSubCOp :: GenericOp -genericWordSubCOp [res_r, res_c] [aa, bb] = do - dflags <- getDynFlags - emit $ catAGraphs - [ -- Put the result into 'res_r'. - mkAssign (CmmLocal res_r) $ - CmmMachOp (mo_wordSub dflags) [aa, bb] - -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise. - , mkAssign (CmmLocal res_c) $ - CmmMachOp (mo_wordUGt dflags) [bb, aa] - ] +genericWordSubCOp [res_r, res_c] [aa, bb] + = do dflags <- getDynFlags + emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp (mo_wordUShr dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordNot dflags) [aa], + bb + ], + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordNot dflags) [aa], + bb + ], + CmmReg (CmmLocal res_r) + ] + ], + mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) + ] + ] genericWordSubCOp _ _ = panic "genericWordSubCOp" genericIntAddCOp :: GenericOp diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index e812dd445f..9be0876e21 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -377,6 +377,9 @@ genCall t@(PrimTarget (MO_SubIntC w)) [dstV, dstO] [lhs, rhs] = genCall t@(PrimTarget (MO_Add2 w)) [dstO, dstV] [lhs, rhs] = genCallWithOverflow t w [dstV, dstO] [lhs, rhs] +genCall t@(PrimTarget (MO_AddWordC w)) [dstV, dstO] [lhs, rhs] = + genCallWithOverflow t w [dstV, dstO] [lhs, rhs] + genCall t@(PrimTarget (MO_SubWordC w)) [dstV, dstO] [lhs, rhs] = genCallWithOverflow t w [dstV, dstO] [lhs, rhs] @@ -490,6 +493,7 @@ genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do let valid = op `elem` [ MO_Add2 w , MO_AddIntC w , MO_SubIntC w + , MO_AddWordC w , MO_SubWordC w ] MASSERT(valid) @@ -800,6 +804,8 @@ cmmPrimOpFunctions mop = do ++ showSDoc dflags (ppr $ widthToLlvmInt w) MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow." ++ showSDoc dflags (ppr $ widthToLlvmInt w) + MO_AddWordC w -> fsLit $ "llvm.usub.with.overflow." + ++ showSDoc dflags (ppr $ widthToLlvmInt w) MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow." ++ showSDoc dflags (ppr $ widthToLlvmInt w) diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 7c345f2328..e53d994c25 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1333,6 +1333,7 @@ genCCall target dest_regs argsAndHints PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs argsAndHints PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints + PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width dest_regs argsAndHints @@ -1523,6 +1524,11 @@ genCCall target dest_regs argsAndHints add2Op _ _ _ = panic "genCCall: Wrong number of arguments/results for add2" + addcOp platform [res_r, res_c] [arg_x, arg_y] + = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y] + addcOp _ _ _ + = panic "genCCall: Wrong number of arguments/results for addc" + -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1, -- which is 0 for borrow and 1 otherwise. We need 1 and 0 -- so xor with 1. @@ -2025,6 +2031,7 @@ genCCall' dflags gcp target dest_regs args MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 6dfd58950e..90d6b0d67b 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -667,6 +667,7 @@ outOfLineMachOp_table mop MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index a0b0673d27..9dc1053683 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -2229,6 +2229,8 @@ genCCall _ is32Bit target dest_regs args = do ADC format (OpImm (ImmInteger 0)) (OpReg reg_h) return code _ -> panic "genCCall: Wrong number of arguments/results for add2" + (PrimTarget (MO_AddWordC width), [res_r, res_c]) -> + addSubIntC platform ADD_CC (const Nothing) CARRY width res_r res_c args (PrimTarget (MO_SubWordC width), [res_r, res_c]) -> addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args (PrimTarget (MO_AddIntC width), [res_r, res_c]) -> @@ -2788,6 +2790,7 @@ outOfLineCmmOp mop res args MO_Add2 {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 4098e80d47..763a2ca37d 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -265,6 +265,7 @@ primop IntAddCOp "addIntC#" GenPrimOp Int# -> Int# -> (# Int#, Int# #) nonzero if overflow occurred (the sum is either too large or too small to fit in an {\tt Int#}).} with code_size = 2 + commutable = True primop IntSubCOp "subIntC#" GenPrimOp Int# -> Int# -> (# Int#, Int# #) {Subtract signed integers reporting overflow. @@ -328,15 +329,25 @@ primtype Word# primop WordAddOp "plusWord#" Dyadic Word# -> Word# -> Word# with commutable = True +primop WordAddCOp "addWordC#" GenPrimOp Word# -> Word# -> (# Word#, Int# #) + {Add unsigned integers reporting overflow. + The first element of the pair is the result. The second element is + the carry flag, which is nonzero on overflow. See also {\tt plusWord2#}.} + with code_size = 2 + commutable = True + primop WordSubCOp "subWordC#" GenPrimOp Word# -> Word# -> (# Word#, Int# #) {Subtract unsigned integers reporting overflow. The first element of the pair is the result. The second element is the carry flag, which is nonzero on overflow.} + with code_size = 2 --- Returns (# high, low #) (or equivalently, (# carry, low #)) -primop WordAdd2Op "plusWord2#" GenPrimOp - Word# -> Word# -> (# Word#, Word# #) - with commutable = True +primop WordAdd2Op "plusWord2#" GenPrimOp Word# -> Word# -> (# Word#, Word# #) + {Add unsigned integers, with the high part (carry) in the first + component of the returned pair and the low part in the second + component of the pair. See also {\tt addWordC#}.} + with code_size = 2 + commutable = True primop WordSubOp "minusWord#" Dyadic Word# -> Word# -> Word# |