summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/GHC/CmmToLlvm/CodeGen.hs179
1 files changed, 145 insertions, 34 deletions
diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs
index fe8d5fb977..7034d83b30 100644
--- a/compiler/GHC/CmmToLlvm/CodeGen.hs
+++ b/compiler/GHC/CmmToLlvm/CodeGen.hs
@@ -800,10 +800,8 @@ cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString
cmmPrimOpFunctions mop = do
cfg <- getConfig
platform <- getPlatform
- let render = renderWithContext (lcgContext cfg)
- lcgIsBmi2Enabled = lcgBmiVersion cfg >= Just BMI2
- intrinTy1 = "p0i8.p0i8." ++ render (ppr $ llvmWord platform)
- intrinTy2 = "p0i8." ++ render (ppr $ llvmWord platform)
+ let !isBmi2Enabled = lcgBmiVersion cfg >= Just BMI2
+ !is32bit = platformWordSize platform == PW4
unsupported = panic ("cmmPrimOpFunctions: " ++ show mop
++ " not supported here")
dontReach64 = panic ("cmmPrimOpFunctions: " ++ show mop
@@ -858,36 +856,149 @@ cmmPrimOpFunctions mop = do
MO_F64_Acosh -> fsLit "acosh"
MO_F64_Atanh -> fsLit "atanh"
- MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
- MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1
- MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
- MO_Memcmp _ -> fsLit $ "memcmp"
-
- MO_SuspendThread -> fsLit $ "suspendThread"
- MO_ResumeThread -> fsLit $ "resumeThread"
-
- (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ render (ppr $ widthToLlvmInt w)
- (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ render (ppr $ widthToLlvmInt w)
- (MO_BRev w) -> fsLit $ "llvm.bitreverse." ++ render (ppr $ widthToLlvmInt w)
- (MO_Clz w) -> fsLit $ "llvm.ctlz." ++ render (ppr $ widthToLlvmInt w)
- (MO_Ctz w) -> fsLit $ "llvm.cttz." ++ render (ppr $ widthToLlvmInt w)
-
- (MO_Pdep w) -> let w' = render (ppr $ widthInBits w)
- in if lcgIsBmi2Enabled
- then fsLit $ "llvm.x86.bmi.pdep." ++ w'
- else fsLit $ "hs_pdep" ++ w'
- (MO_Pext w) -> let w' = render (ppr $ widthInBits w)
- in if lcgIsBmi2Enabled
- then fsLit $ "llvm.x86.bmi.pext." ++ w'
- else fsLit $ "hs_pext" ++ w'
-
- (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
-
- MO_AddIntC w -> fsLit $ "llvm.sadd.with.overflow." ++ render (ppr $ widthToLlvmInt w)
- MO_SubIntC w -> fsLit $ "llvm.ssub.with.overflow." ++ render (ppr $ widthToLlvmInt w)
- MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow." ++ render (ppr $ widthToLlvmInt w)
- MO_AddWordC w -> fsLit $ "llvm.uadd.with.overflow." ++ render (ppr $ widthToLlvmInt w)
- MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow." ++ render (ppr $ widthToLlvmInt w)
+ -- In the following ops, it looks like we could factorize the concatenation
+ -- of the bit size, and indeed it was like this before, e.g.
+ --
+ -- MO_PopCnt w -> fsLit $ "llvm.ctpop.i" ++ wbits w
+ -- or
+ -- MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
+ --
+ -- however it meant that FastStrings were not built from constant string
+ -- literals, hence they weren't matching the "fslit" rewrite rule in
+ -- GHC.Data.FastString that computes the string size at compilation time.
+
+ MO_Memcpy _
+ | is32bit -> fsLit "llvm.memcpy.p0i8.p0i8.i32"
+ | otherwise -> fsLit "llvm.memcpy.p0i8.p0i8.i64"
+ MO_Memmove _
+ | is32bit -> fsLit "llvm.memmove.p0i8.p0i8.i32"
+ | otherwise -> fsLit "llvm.memmove.p0i8.p0i8.i64"
+ MO_Memset _
+ | is32bit -> fsLit "llvm.memset.p0i8.i32"
+ | otherwise -> fsLit "llvm.memset.p0i8.i64"
+ MO_Memcmp _ -> fsLit "memcmp"
+
+ MO_SuspendThread -> fsLit "suspendThread"
+ MO_ResumeThread -> fsLit "resumeThread"
+
+ MO_PopCnt w -> case w of
+ W8 -> fsLit "llvm.ctpop.i8"
+ W16 -> fsLit "llvm.ctpop.i16"
+ W32 -> fsLit "llvm.ctpop.i32"
+ W64 -> fsLit "llvm.ctpop.i64"
+ W128 -> fsLit "llvm.ctpop.i128"
+ W256 -> fsLit "llvm.ctpop.i256"
+ W512 -> fsLit "llvm.ctpop.i512"
+ MO_BSwap w -> case w of
+ W8 -> fsLit "llvm.bswap.i8"
+ W16 -> fsLit "llvm.bswap.i16"
+ W32 -> fsLit "llvm.bswap.i32"
+ W64 -> fsLit "llvm.bswap.i64"
+ W128 -> fsLit "llvm.bswap.i128"
+ W256 -> fsLit "llvm.bswap.i256"
+ W512 -> fsLit "llvm.bswap.i512"
+ MO_BRev w -> case w of
+ W8 -> fsLit "llvm.bitreverse.i8"
+ W16 -> fsLit "llvm.bitreverse.i16"
+ W32 -> fsLit "llvm.bitreverse.i32"
+ W64 -> fsLit "llvm.bitreverse.i64"
+ W128 -> fsLit "llvm.bitreverse.i128"
+ W256 -> fsLit "llvm.bitreverse.i256"
+ W512 -> fsLit "llvm.bitreverse.i512"
+ MO_Clz w -> case w of
+ W8 -> fsLit "llvm.ctlz.i8"
+ W16 -> fsLit "llvm.ctlz.i16"
+ W32 -> fsLit "llvm.ctlz.i32"
+ W64 -> fsLit "llvm.ctlz.i64"
+ W128 -> fsLit "llvm.ctlz.i128"
+ W256 -> fsLit "llvm.ctlz.i256"
+ W512 -> fsLit "llvm.ctlz.i512"
+ MO_Ctz w -> case w of
+ W8 -> fsLit "llvm.cttz.i8"
+ W16 -> fsLit "llvm.cttz.i16"
+ W32 -> fsLit "llvm.cttz.i32"
+ W64 -> fsLit "llvm.cttz.i64"
+ W128 -> fsLit "llvm.cttz.i128"
+ W256 -> fsLit "llvm.cttz.i256"
+ W512 -> fsLit "llvm.cttz.i512"
+ MO_Pdep w
+ | isBmi2Enabled -> case w of
+ W8 -> fsLit "llvm.x86.bmi.pdep.8"
+ W16 -> fsLit "llvm.x86.bmi.pdep.16"
+ W32 -> fsLit "llvm.x86.bmi.pdep.32"
+ W64 -> fsLit "llvm.x86.bmi.pdep.64"
+ W128 -> fsLit "llvm.x86.bmi.pdep.128"
+ W256 -> fsLit "llvm.x86.bmi.pdep.256"
+ W512 -> fsLit "llvm.x86.bmi.pdep.512"
+ | otherwise -> case w of
+ W8 -> fsLit "hs_pdep8"
+ W16 -> fsLit "hs_pdep16"
+ W32 -> fsLit "hs_pdep32"
+ W64 -> fsLit "hs_pdep64"
+ W128 -> fsLit "hs_pdep128"
+ W256 -> fsLit "hs_pdep256"
+ W512 -> fsLit "hs_pdep512"
+ MO_Pext w
+ | isBmi2Enabled -> case w of
+ W8 -> fsLit "llvm.x86.bmi.pext.8"
+ W16 -> fsLit "llvm.x86.bmi.pext.16"
+ W32 -> fsLit "llvm.x86.bmi.pext.32"
+ W64 -> fsLit "llvm.x86.bmi.pext.64"
+ W128 -> fsLit "llvm.x86.bmi.pext.128"
+ W256 -> fsLit "llvm.x86.bmi.pext.256"
+ W512 -> fsLit "llvm.x86.bmi.pext.512"
+ | otherwise -> case w of
+ W8 -> fsLit "hs_pext8"
+ W16 -> fsLit "hs_pext16"
+ W32 -> fsLit "hs_pext32"
+ W64 -> fsLit "hs_pext64"
+ W128 -> fsLit "hs_pext128"
+ W256 -> fsLit "hs_pext256"
+ W512 -> fsLit "hs_pext512"
+
+ MO_AddIntC w -> case w of
+ W8 -> fsLit "llvm.sadd.with.overflow.i8"
+ W16 -> fsLit "llvm.sadd.with.overflow.i16"
+ W32 -> fsLit "llvm.sadd.with.overflow.i32"
+ W64 -> fsLit "llvm.sadd.with.overflow.i64"
+ W128 -> fsLit "llvm.sadd.with.overflow.i128"
+ W256 -> fsLit "llvm.sadd.with.overflow.i256"
+ W512 -> fsLit "llvm.sadd.with.overflow.i512"
+ MO_SubIntC w -> case w of
+ W8 -> fsLit "llvm.ssub.with.overflow.i8"
+ W16 -> fsLit "llvm.ssub.with.overflow.i16"
+ W32 -> fsLit "llvm.ssub.with.overflow.i32"
+ W64 -> fsLit "llvm.ssub.with.overflow.i64"
+ W128 -> fsLit "llvm.ssub.with.overflow.i128"
+ W256 -> fsLit "llvm.ssub.with.overflow.i256"
+ W512 -> fsLit "llvm.ssub.with.overflow.i512"
+ MO_Add2 w -> case w of
+ W8 -> fsLit "llvm.uadd.with.overflow.i8"
+ W16 -> fsLit "llvm.uadd.with.overflow.i16"
+ W32 -> fsLit "llvm.uadd.with.overflow.i32"
+ W64 -> fsLit "llvm.uadd.with.overflow.i64"
+ W128 -> fsLit "llvm.uadd.with.overflow.i128"
+ W256 -> fsLit "llvm.uadd.with.overflow.i256"
+ W512 -> fsLit "llvm.uadd.with.overflow.i512"
+ MO_AddWordC w -> case w of
+ W8 -> fsLit "llvm.uadd.with.overflow.i8"
+ W16 -> fsLit "llvm.uadd.with.overflow.i16"
+ W32 -> fsLit "llvm.uadd.with.overflow.i32"
+ W64 -> fsLit "llvm.uadd.with.overflow.i64"
+ W128 -> fsLit "llvm.uadd.with.overflow.i128"
+ W256 -> fsLit "llvm.uadd.with.overflow.i256"
+ W512 -> fsLit "llvm.uadd.with.overflow.i512"
+ MO_SubWordC w -> case w of
+ W8 -> fsLit "llvm.usub.with.overflow.i8"
+ W16 -> fsLit "llvm.usub.with.overflow.i16"
+ W32 -> fsLit "llvm.usub.with.overflow.i32"
+ W64 -> fsLit "llvm.usub.with.overflow.i64"
+ W128 -> fsLit "llvm.usub.with.overflow.i128"
+ W256 -> fsLit "llvm.usub.with.overflow.i256"
+ W512 -> fsLit "llvm.usub.with.overflow.i512"
+
+
+ MO_Prefetch_Data _ -> fsLit "llvm.prefetch"
MO_S_Mul2 {} -> unsupported
MO_S_QuotRem {} -> unsupported