summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordoyougnu <jeffrey.young@iohk.io>2021-12-10 14:02:38 -0800
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-12-14 20:55:06 -0500
commit800160229900aa97bb4ae1fa77c80521bf4379ce (patch)
tree47bf85c6620db873d56bf782481768447399518b
parent6b0fb9a078ae415e0720c7db961f5a1692099ff1 (diff)
downloadhaskell-800160229900aa97bb4ae1fa77c80521bf4379ce.tar.gz
LLVM.CodeGen: use fast-string literals
That is remove factorization of common strings and string building code for the LLVM code gen ops. Replace these with string literals to obey the FastString rewrite rule in GHC.Data.FastString and compute the string length at compile time
-rw-r--r--compiler/GHC/CmmToLlvm/CodeGen.hs179
1 files changed, 145 insertions, 34 deletions
diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs
index fe8d5fb977..7034d83b30 100644
--- a/compiler/GHC/CmmToLlvm/CodeGen.hs
+++ b/compiler/GHC/CmmToLlvm/CodeGen.hs
@@ -800,10 +800,8 @@ cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString
cmmPrimOpFunctions mop = do
cfg <- getConfig
platform <- getPlatform
- let render = renderWithContext (lcgContext cfg)
- lcgIsBmi2Enabled = lcgBmiVersion cfg >= Just BMI2
- intrinTy1 = "p0i8.p0i8." ++ render (ppr $ llvmWord platform)
- intrinTy2 = "p0i8." ++ render (ppr $ llvmWord platform)
+ let !isBmi2Enabled = lcgBmiVersion cfg >= Just BMI2
+ !is32bit = platformWordSize platform == PW4
unsupported = panic ("cmmPrimOpFunctions: " ++ show mop
++ " not supported here")
dontReach64 = panic ("cmmPrimOpFunctions: " ++ show mop
@@ -858,36 +856,149 @@ cmmPrimOpFunctions mop = do
MO_F64_Acosh -> fsLit "acosh"
MO_F64_Atanh -> fsLit "atanh"
- MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
- MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1
- MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2
- MO_Memcmp _ -> fsLit $ "memcmp"
-
- MO_SuspendThread -> fsLit $ "suspendThread"
- MO_ResumeThread -> fsLit $ "resumeThread"
-
- (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ render (ppr $ widthToLlvmInt w)
- (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ render (ppr $ widthToLlvmInt w)
- (MO_BRev w) -> fsLit $ "llvm.bitreverse." ++ render (ppr $ widthToLlvmInt w)
- (MO_Clz w) -> fsLit $ "llvm.ctlz." ++ render (ppr $ widthToLlvmInt w)
- (MO_Ctz w) -> fsLit $ "llvm.cttz." ++ render (ppr $ widthToLlvmInt w)
-
- (MO_Pdep w) -> let w' = render (ppr $ widthInBits w)
- in if lcgIsBmi2Enabled
- then fsLit $ "llvm.x86.bmi.pdep." ++ w'
- else fsLit $ "hs_pdep" ++ w'
- (MO_Pext w) -> let w' = render (ppr $ widthInBits w)
- in if lcgIsBmi2Enabled
- then fsLit $ "llvm.x86.bmi.pext." ++ w'
- else fsLit $ "hs_pext" ++ w'
-
- (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
-
- MO_AddIntC w -> fsLit $ "llvm.sadd.with.overflow." ++ render (ppr $ widthToLlvmInt w)
- MO_SubIntC w -> fsLit $ "llvm.ssub.with.overflow." ++ render (ppr $ widthToLlvmInt w)
- MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow." ++ render (ppr $ widthToLlvmInt w)
- MO_AddWordC w -> fsLit $ "llvm.uadd.with.overflow." ++ render (ppr $ widthToLlvmInt w)
- MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow." ++ render (ppr $ widthToLlvmInt w)
+ -- In the following ops, it looks like we could factorize the concatenation
+ -- of the bit size, and indeed it was like this before, e.g.
+ --
+ -- MO_PopCnt w -> fsLit $ "llvm.ctpop.i" ++ wbits w
+ -- or
+ -- MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1
+ --
+ -- however it meant that FastStrings were not built from constant string
+ -- literals, hence they weren't matching the "fslit" rewrite rule in
+ -- GHC.Data.FastString that computes the string size at compilation time.
+
+ MO_Memcpy _
+ | is32bit -> fsLit "llvm.memcpy.p0i8.p0i8.i32"
+ | otherwise -> fsLit "llvm.memcpy.p0i8.p0i8.i64"
+ MO_Memmove _
+ | is32bit -> fsLit "llvm.memmove.p0i8.p0i8.i32"
+ | otherwise -> fsLit "llvm.memmove.p0i8.p0i8.i64"
+ MO_Memset _
+ | is32bit -> fsLit "llvm.memset.p0i8.i32"
+ | otherwise -> fsLit "llvm.memset.p0i8.i64"
+ MO_Memcmp _ -> fsLit "memcmp"
+
+ MO_SuspendThread -> fsLit "suspendThread"
+ MO_ResumeThread -> fsLit "resumeThread"
+
+ MO_PopCnt w -> case w of
+ W8 -> fsLit "llvm.ctpop.i8"
+ W16 -> fsLit "llvm.ctpop.i16"
+ W32 -> fsLit "llvm.ctpop.i32"
+ W64 -> fsLit "llvm.ctpop.i64"
+ W128 -> fsLit "llvm.ctpop.i128"
+ W256 -> fsLit "llvm.ctpop.i256"
+ W512 -> fsLit "llvm.ctpop.i512"
+ MO_BSwap w -> case w of
+ W8 -> fsLit "llvm.bswap.i8"
+ W16 -> fsLit "llvm.bswap.i16"
+ W32 -> fsLit "llvm.bswap.i32"
+ W64 -> fsLit "llvm.bswap.i64"
+ W128 -> fsLit "llvm.bswap.i128"
+ W256 -> fsLit "llvm.bswap.i256"
+ W512 -> fsLit "llvm.bswap.i512"
+ MO_BRev w -> case w of
+ W8 -> fsLit "llvm.bitreverse.i8"
+ W16 -> fsLit "llvm.bitreverse.i16"
+ W32 -> fsLit "llvm.bitreverse.i32"
+ W64 -> fsLit "llvm.bitreverse.i64"
+ W128 -> fsLit "llvm.bitreverse.i128"
+ W256 -> fsLit "llvm.bitreverse.i256"
+ W512 -> fsLit "llvm.bitreverse.i512"
+ MO_Clz w -> case w of
+ W8 -> fsLit "llvm.ctlz.i8"
+ W16 -> fsLit "llvm.ctlz.i16"
+ W32 -> fsLit "llvm.ctlz.i32"
+ W64 -> fsLit "llvm.ctlz.i64"
+ W128 -> fsLit "llvm.ctlz.i128"
+ W256 -> fsLit "llvm.ctlz.i256"
+ W512 -> fsLit "llvm.ctlz.i512"
+ MO_Ctz w -> case w of
+ W8 -> fsLit "llvm.cttz.i8"
+ W16 -> fsLit "llvm.cttz.i16"
+ W32 -> fsLit "llvm.cttz.i32"
+ W64 -> fsLit "llvm.cttz.i64"
+ W128 -> fsLit "llvm.cttz.i128"
+ W256 -> fsLit "llvm.cttz.i256"
+ W512 -> fsLit "llvm.cttz.i512"
+ MO_Pdep w
+ | isBmi2Enabled -> case w of
+ W8 -> fsLit "llvm.x86.bmi.pdep.8"
+ W16 -> fsLit "llvm.x86.bmi.pdep.16"
+ W32 -> fsLit "llvm.x86.bmi.pdep.32"
+ W64 -> fsLit "llvm.x86.bmi.pdep.64"
+ W128 -> fsLit "llvm.x86.bmi.pdep.128"
+ W256 -> fsLit "llvm.x86.bmi.pdep.256"
+ W512 -> fsLit "llvm.x86.bmi.pdep.512"
+ | otherwise -> case w of
+ W8 -> fsLit "hs_pdep8"
+ W16 -> fsLit "hs_pdep16"
+ W32 -> fsLit "hs_pdep32"
+ W64 -> fsLit "hs_pdep64"
+ W128 -> fsLit "hs_pdep128"
+ W256 -> fsLit "hs_pdep256"
+ W512 -> fsLit "hs_pdep512"
+ MO_Pext w
+ | isBmi2Enabled -> case w of
+ W8 -> fsLit "llvm.x86.bmi.pext.8"
+ W16 -> fsLit "llvm.x86.bmi.pext.16"
+ W32 -> fsLit "llvm.x86.bmi.pext.32"
+ W64 -> fsLit "llvm.x86.bmi.pext.64"
+ W128 -> fsLit "llvm.x86.bmi.pext.128"
+ W256 -> fsLit "llvm.x86.bmi.pext.256"
+ W512 -> fsLit "llvm.x86.bmi.pext.512"
+ | otherwise -> case w of
+ W8 -> fsLit "hs_pext8"
+ W16 -> fsLit "hs_pext16"
+ W32 -> fsLit "hs_pext32"
+ W64 -> fsLit "hs_pext64"
+ W128 -> fsLit "hs_pext128"
+ W256 -> fsLit "hs_pext256"
+ W512 -> fsLit "hs_pext512"
+
+ MO_AddIntC w -> case w of
+ W8 -> fsLit "llvm.sadd.with.overflow.i8"
+ W16 -> fsLit "llvm.sadd.with.overflow.i16"
+ W32 -> fsLit "llvm.sadd.with.overflow.i32"
+ W64 -> fsLit "llvm.sadd.with.overflow.i64"
+ W128 -> fsLit "llvm.sadd.with.overflow.i128"
+ W256 -> fsLit "llvm.sadd.with.overflow.i256"
+ W512 -> fsLit "llvm.sadd.with.overflow.i512"
+ MO_SubIntC w -> case w of
+ W8 -> fsLit "llvm.ssub.with.overflow.i8"
+ W16 -> fsLit "llvm.ssub.with.overflow.i16"
+ W32 -> fsLit "llvm.ssub.with.overflow.i32"
+ W64 -> fsLit "llvm.ssub.with.overflow.i64"
+ W128 -> fsLit "llvm.ssub.with.overflow.i128"
+ W256 -> fsLit "llvm.ssub.with.overflow.i256"
+ W512 -> fsLit "llvm.ssub.with.overflow.i512"
+ MO_Add2 w -> case w of
+ W8 -> fsLit "llvm.uadd.with.overflow.i8"
+ W16 -> fsLit "llvm.uadd.with.overflow.i16"
+ W32 -> fsLit "llvm.uadd.with.overflow.i32"
+ W64 -> fsLit "llvm.uadd.with.overflow.i64"
+ W128 -> fsLit "llvm.uadd.with.overflow.i128"
+ W256 -> fsLit "llvm.uadd.with.overflow.i256"
+ W512 -> fsLit "llvm.uadd.with.overflow.i512"
+ MO_AddWordC w -> case w of
+ W8 -> fsLit "llvm.uadd.with.overflow.i8"
+ W16 -> fsLit "llvm.uadd.with.overflow.i16"
+ W32 -> fsLit "llvm.uadd.with.overflow.i32"
+ W64 -> fsLit "llvm.uadd.with.overflow.i64"
+ W128 -> fsLit "llvm.uadd.with.overflow.i128"
+ W256 -> fsLit "llvm.uadd.with.overflow.i256"
+ W512 -> fsLit "llvm.uadd.with.overflow.i512"
+ MO_SubWordC w -> case w of
+ W8 -> fsLit "llvm.usub.with.overflow.i8"
+ W16 -> fsLit "llvm.usub.with.overflow.i16"
+ W32 -> fsLit "llvm.usub.with.overflow.i32"
+ W64 -> fsLit "llvm.usub.with.overflow.i64"
+ W128 -> fsLit "llvm.usub.with.overflow.i128"
+ W256 -> fsLit "llvm.usub.with.overflow.i256"
+ W512 -> fsLit "llvm.usub.with.overflow.i512"
+
+
+ MO_Prefetch_Data _ -> fsLit "llvm.prefetch"
MO_S_Mul2 {} -> unsupported
MO_S_QuotRem {} -> unsupported