diff options
author | Reid Barton <rwbarton@gmail.com> | 2014-08-22 18:57:50 -0400 |
---|---|---|
committer | Reid Barton <rwbarton@gmail.com> | 2014-08-23 14:55:57 -0400 |
commit | cfd08a992c91c0a9c629912a5d7234610256121e (patch) | |
tree | 958982daff6933b00804d5b703f2a362d7c82acf /compiler/nativeGen | |
parent | 104a66a461f4f89b8e5ad9c829923bb7ca8ceddb (diff) | |
download | haskell-cfd08a992c91c0a9c629912a5d7234610256121e.tar.gz |
Add MO_AddIntC, MO_SubIntC MachOps and implement in X86 backend
Summary:
These MachOps are used by addIntC# and subIntC#, which in turn are
used in integer-gmp when adding or subtracting small Integers. The
following benchmark shows a ~6% speedup after this commit on x86_64
(building GHC with BuildFlavour=perf).
{-# LANGUAGE MagicHash #-}
import GHC.Exts
import Criterion.Main
count :: Int -> Integer
count (I# n#) = go n# 0
where go :: Int# -> Integer -> Integer
go 0# acc = acc
go n# acc = go (n# -# 1#) $! acc + 1
main = defaultMain [bgroup "count"
[bench "100" $ whnf count 100]]
Differential Revision: https://phabricator.haskell.org/D140
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 20 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 3 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 2 |
5 files changed, 29 insertions, 0 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 3d3dff2e73..ddf483a85e 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1162,6 +1162,8 @@ genCCall' dflags gcp target dest_regs args0 MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddIntC {} -> unsupported + MO_SubIntC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index c192b8bda6..864f87f1c6 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -665,6 +665,8 @@ outOfLineMachOp_table mop MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddIntC {} -> unsupported + MO_SubIntC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index bc79e5e264..9d7cb78a6c 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1989,6 +1989,10 @@ genCCall _ is32Bit target dest_regs args = do ADC size (OpImm (ImmInteger 0)) (OpReg reg_h) return code _ -> panic "genCCall: Wrong number of arguments/results for add2" + (PrimTarget (MO_AddIntC width), [res_r, res_c]) -> + addSubIntC platform ADD_CC (Just . ADD_CC) width res_r res_c args + (PrimTarget (MO_SubIntC width), [res_r, res_c]) -> + addSubIntC platform SUB_CC (const Nothing) width res_r res_c args (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) -> case args of [arg_x, arg_y] -> @@ -2042,6 +2046,20 @@ genCCall _ is32Bit target dest_regs args = do divOp _ _ _ _ _ _ _ = panic "genCCall: Wrong number of results for divOp" + addSubIntC platform instr mrevinstr width res_r res_c [arg_x, arg_y] + = do let size = intSize width + rCode <- anyReg =<< trivialCode width (instr size) + (mrevinstr size) arg_x arg_y + reg_tmp <- getNewRegNat II8 + let reg_c = getRegisterReg platform True (CmmLocal res_c) + reg_r = getRegisterReg platform True (CmmLocal res_r) + code = rCode reg_r `snocOL` + SETCC OFLO (OpReg reg_tmp) `snocOL` + MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c) + return code + addSubIntC _ _ _ _ _ _ _ + = panic "genCCall: Wrong number of arguments/results for addSubIntC" + genCCall32' :: DynFlags -> ForeignTarget -- function to call -> [CmmFormal] -- where to put the result @@ -2480,6 +2498,8 @@ outOfLineCmmOp mop res args MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddIntC {} -> unsupported + MO_SubIntC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index ef0ceeabf3..a43d42ec2a 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -210,6 +210,7 @@ data Instr -- Do not rewrite these instructions to "equivalent" ones that -- have different effect on the condition register! (See #9013.) | ADD_CC Size Operand Operand + | SUB_CC Size Operand Operand -- Simple bit-twiddling. | AND Size Operand Operand @@ -371,6 +372,7 @@ x86_regUsageOfInstr platform instr DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx] IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx] ADD_CC _ src dst -> usageRM src dst + SUB_CC _ src dst -> usageRM src dst AND _ src dst -> usageRM src dst OR _ src dst -> usageRM src dst @@ -548,6 +550,7 @@ x86_patchRegsOfInstr instr env IDIV sz op -> patch1 (IDIV sz) op DIV sz op -> patch1 (DIV sz) op ADD_CC sz src dst -> patch2 (ADD_CC sz) src dst + SUB_CC sz src dst -> patch2 (SUB_CC sz) src dst AND sz src dst -> patch2 (AND sz) src dst OR sz src dst -> patch2 (OR sz) src dst XOR sz src dst -> patch2 (XOR sz) src dst diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 89bb0b01fc..7c65195a97 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -568,6 +568,8 @@ pprInstr (IMUL size op1 op2) = pprSizeOpOp (sLit "imul") size op1 op2 pprInstr (ADD_CC size src dst) = pprSizeOpOp (sLit "add") size src dst +pprInstr (SUB_CC size src dst) + = pprSizeOpOp (sLit "sub") size src dst {- A hack. The Intel documentation says that "The two and three operand forms [of IMUL] may also be used with unsigned operands |