diff options
author | doyougnu <jeffrey.young@iohk.io> | 2021-12-10 14:02:38 -0800 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-12-14 20:55:06 -0500 |
commit | 800160229900aa97bb4ae1fa77c80521bf4379ce (patch) | |
tree | 47bf85c6620db873d56bf782481768447399518b | |
parent | 6b0fb9a078ae415e0720c7db961f5a1692099ff1 (diff) | |
download | haskell-800160229900aa97bb4ae1fa77c80521bf4379ce.tar.gz |
LLVM.CodeGen: use fast-string literals
That is remove factorization of common strings and string building
code for the LLVM code gen ops. Replace these with string literals
to obey the FastString rewrite rule in GHC.Data.FastString and compute
the string length at compile time
-rw-r--r-- | compiler/GHC/CmmToLlvm/CodeGen.hs | 179 |
1 files changed, 145 insertions, 34 deletions
diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs index fe8d5fb977..7034d83b30 100644 --- a/compiler/GHC/CmmToLlvm/CodeGen.hs +++ b/compiler/GHC/CmmToLlvm/CodeGen.hs @@ -800,10 +800,8 @@ cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString cmmPrimOpFunctions mop = do cfg <- getConfig platform <- getPlatform - let render = renderWithContext (lcgContext cfg) - lcgIsBmi2Enabled = lcgBmiVersion cfg >= Just BMI2 - intrinTy1 = "p0i8.p0i8." ++ render (ppr $ llvmWord platform) - intrinTy2 = "p0i8." ++ render (ppr $ llvmWord platform) + let !isBmi2Enabled = lcgBmiVersion cfg >= Just BMI2 + !is32bit = platformWordSize platform == PW4 unsupported = panic ("cmmPrimOpFunctions: " ++ show mop ++ " not supported here") dontReach64 = panic ("cmmPrimOpFunctions: " ++ show mop @@ -858,36 +856,149 @@ cmmPrimOpFunctions mop = do MO_F64_Acosh -> fsLit "acosh" MO_F64_Atanh -> fsLit "atanh" - MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1 - MO_Memmove _ -> fsLit $ "llvm.memmove." ++ intrinTy1 - MO_Memset _ -> fsLit $ "llvm.memset." ++ intrinTy2 - MO_Memcmp _ -> fsLit $ "memcmp" - - MO_SuspendThread -> fsLit $ "suspendThread" - MO_ResumeThread -> fsLit $ "resumeThread" - - (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ render (ppr $ widthToLlvmInt w) - (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ render (ppr $ widthToLlvmInt w) - (MO_BRev w) -> fsLit $ "llvm.bitreverse." ++ render (ppr $ widthToLlvmInt w) - (MO_Clz w) -> fsLit $ "llvm.ctlz." ++ render (ppr $ widthToLlvmInt w) - (MO_Ctz w) -> fsLit $ "llvm.cttz." ++ render (ppr $ widthToLlvmInt w) - - (MO_Pdep w) -> let w' = render (ppr $ widthInBits w) - in if lcgIsBmi2Enabled - then fsLit $ "llvm.x86.bmi.pdep." ++ w' - else fsLit $ "hs_pdep" ++ w' - (MO_Pext w) -> let w' = render (ppr $ widthInBits w) - in if lcgIsBmi2Enabled - then fsLit $ "llvm.x86.bmi.pext." ++ w' - else fsLit $ "hs_pext" ++ w' - - (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch" - - MO_AddIntC w -> fsLit $ "llvm.sadd.with.overflow." ++ render (ppr $ widthToLlvmInt w) - MO_SubIntC w -> fsLit $ "llvm.ssub.with.overflow." ++ render (ppr $ widthToLlvmInt w) - MO_Add2 w -> fsLit $ "llvm.uadd.with.overflow." ++ render (ppr $ widthToLlvmInt w) - MO_AddWordC w -> fsLit $ "llvm.uadd.with.overflow." ++ render (ppr $ widthToLlvmInt w) - MO_SubWordC w -> fsLit $ "llvm.usub.with.overflow." ++ render (ppr $ widthToLlvmInt w) + -- In the following ops, it looks like we could factorize the concatenation + -- of the bit size, and indeed it was like this before, e.g. + -- + -- MO_PopCnt w -> fsLit $ "llvm.ctpop.i" ++ wbits w + -- or + -- MO_Memcpy _ -> fsLit $ "llvm.memcpy." ++ intrinTy1 + -- + -- however it meant that FastStrings were not built from constant string + -- literals, hence they weren't matching the "fslit" rewrite rule in + -- GHC.Data.FastString that computes the string size at compilation time. + + MO_Memcpy _ + | is32bit -> fsLit "llvm.memcpy.p0i8.p0i8.i32" + | otherwise -> fsLit "llvm.memcpy.p0i8.p0i8.i64" + MO_Memmove _ + | is32bit -> fsLit "llvm.memmove.p0i8.p0i8.i32" + | otherwise -> fsLit "llvm.memmove.p0i8.p0i8.i64" + MO_Memset _ + | is32bit -> fsLit "llvm.memset.p0i8.i32" + | otherwise -> fsLit "llvm.memset.p0i8.i64" + MO_Memcmp _ -> fsLit "memcmp" + + MO_SuspendThread -> fsLit "suspendThread" + MO_ResumeThread -> fsLit "resumeThread" + + MO_PopCnt w -> case w of + W8 -> fsLit "llvm.ctpop.i8" + W16 -> fsLit "llvm.ctpop.i16" + W32 -> fsLit "llvm.ctpop.i32" + W64 -> fsLit "llvm.ctpop.i64" + W128 -> fsLit "llvm.ctpop.i128" + W256 -> fsLit "llvm.ctpop.i256" + W512 -> fsLit "llvm.ctpop.i512" + MO_BSwap w -> case w of + W8 -> fsLit "llvm.bswap.i8" + W16 -> fsLit "llvm.bswap.i16" + W32 -> fsLit "llvm.bswap.i32" + W64 -> fsLit "llvm.bswap.i64" + W128 -> fsLit "llvm.bswap.i128" + W256 -> fsLit "llvm.bswap.i256" + W512 -> fsLit "llvm.bswap.i512" + MO_BRev w -> case w of + W8 -> fsLit "llvm.bitreverse.i8" + W16 -> fsLit "llvm.bitreverse.i16" + W32 -> fsLit "llvm.bitreverse.i32" + W64 -> fsLit "llvm.bitreverse.i64" + W128 -> fsLit "llvm.bitreverse.i128" + W256 -> fsLit "llvm.bitreverse.i256" + W512 -> fsLit "llvm.bitreverse.i512" + MO_Clz w -> case w of + W8 -> fsLit "llvm.ctlz.i8" + W16 -> fsLit "llvm.ctlz.i16" + W32 -> fsLit "llvm.ctlz.i32" + W64 -> fsLit "llvm.ctlz.i64" + W128 -> fsLit "llvm.ctlz.i128" + W256 -> fsLit "llvm.ctlz.i256" + W512 -> fsLit "llvm.ctlz.i512" + MO_Ctz w -> case w of + W8 -> fsLit "llvm.cttz.i8" + W16 -> fsLit "llvm.cttz.i16" + W32 -> fsLit "llvm.cttz.i32" + W64 -> fsLit "llvm.cttz.i64" + W128 -> fsLit "llvm.cttz.i128" + W256 -> fsLit "llvm.cttz.i256" + W512 -> fsLit "llvm.cttz.i512" + MO_Pdep w + | isBmi2Enabled -> case w of + W8 -> fsLit "llvm.x86.bmi.pdep.8" + W16 -> fsLit "llvm.x86.bmi.pdep.16" + W32 -> fsLit "llvm.x86.bmi.pdep.32" + W64 -> fsLit "llvm.x86.bmi.pdep.64" + W128 -> fsLit "llvm.x86.bmi.pdep.128" + W256 -> fsLit "llvm.x86.bmi.pdep.256" + W512 -> fsLit "llvm.x86.bmi.pdep.512" + | otherwise -> case w of + W8 -> fsLit "hs_pdep8" + W16 -> fsLit "hs_pdep16" + W32 -> fsLit "hs_pdep32" + W64 -> fsLit "hs_pdep64" + W128 -> fsLit "hs_pdep128" + W256 -> fsLit "hs_pdep256" + W512 -> fsLit "hs_pdep512" + MO_Pext w + | isBmi2Enabled -> case w of + W8 -> fsLit "llvm.x86.bmi.pext.8" + W16 -> fsLit "llvm.x86.bmi.pext.16" + W32 -> fsLit "llvm.x86.bmi.pext.32" + W64 -> fsLit "llvm.x86.bmi.pext.64" + W128 -> fsLit "llvm.x86.bmi.pext.128" + W256 -> fsLit "llvm.x86.bmi.pext.256" + W512 -> fsLit "llvm.x86.bmi.pext.512" + | otherwise -> case w of + W8 -> fsLit "hs_pext8" + W16 -> fsLit "hs_pext16" + W32 -> fsLit "hs_pext32" + W64 -> fsLit "hs_pext64" + W128 -> fsLit "hs_pext128" + W256 -> fsLit "hs_pext256" + W512 -> fsLit "hs_pext512" + + MO_AddIntC w -> case w of + W8 -> fsLit "llvm.sadd.with.overflow.i8" + W16 -> fsLit "llvm.sadd.with.overflow.i16" + W32 -> fsLit "llvm.sadd.with.overflow.i32" + W64 -> fsLit "llvm.sadd.with.overflow.i64" + W128 -> fsLit "llvm.sadd.with.overflow.i128" + W256 -> fsLit "llvm.sadd.with.overflow.i256" + W512 -> fsLit "llvm.sadd.with.overflow.i512" + MO_SubIntC w -> case w of + W8 -> fsLit "llvm.ssub.with.overflow.i8" + W16 -> fsLit "llvm.ssub.with.overflow.i16" + W32 -> fsLit "llvm.ssub.with.overflow.i32" + W64 -> fsLit "llvm.ssub.with.overflow.i64" + W128 -> fsLit "llvm.ssub.with.overflow.i128" + W256 -> fsLit "llvm.ssub.with.overflow.i256" + W512 -> fsLit "llvm.ssub.with.overflow.i512" + MO_Add2 w -> case w of + W8 -> fsLit "llvm.uadd.with.overflow.i8" + W16 -> fsLit "llvm.uadd.with.overflow.i16" + W32 -> fsLit "llvm.uadd.with.overflow.i32" + W64 -> fsLit "llvm.uadd.with.overflow.i64" + W128 -> fsLit "llvm.uadd.with.overflow.i128" + W256 -> fsLit "llvm.uadd.with.overflow.i256" + W512 -> fsLit "llvm.uadd.with.overflow.i512" + MO_AddWordC w -> case w of + W8 -> fsLit "llvm.uadd.with.overflow.i8" + W16 -> fsLit "llvm.uadd.with.overflow.i16" + W32 -> fsLit "llvm.uadd.with.overflow.i32" + W64 -> fsLit "llvm.uadd.with.overflow.i64" + W128 -> fsLit "llvm.uadd.with.overflow.i128" + W256 -> fsLit "llvm.uadd.with.overflow.i256" + W512 -> fsLit "llvm.uadd.with.overflow.i512" + MO_SubWordC w -> case w of + W8 -> fsLit "llvm.usub.with.overflow.i8" + W16 -> fsLit "llvm.usub.with.overflow.i16" + W32 -> fsLit "llvm.usub.with.overflow.i32" + W64 -> fsLit "llvm.usub.with.overflow.i64" + W128 -> fsLit "llvm.usub.with.overflow.i128" + W256 -> fsLit "llvm.usub.with.overflow.i256" + W512 -> fsLit "llvm.usub.with.overflow.i512" + + + MO_Prefetch_Data _ -> fsLit "llvm.prefetch" MO_S_Mul2 {} -> unsupported MO_S_QuotRem {} -> unsupported |