diff options
author | Ian Lynagh <ian@well-typed.com> | 2013-06-09 12:10:35 +0100 |
---|---|---|
committer | Ian Lynagh <ian@well-typed.com> | 2013-06-09 12:10:35 +0100 |
commit | 1c5b0511a89488f5280523569d45ee61c0d09ffa (patch) | |
tree | ccdb8a6ff90162e4d26318b901c473ab5a7f90cc | |
parent | 972c044d5da72cee3a43209ccb41e2229914211c (diff) | |
download | haskell-1c5b0511a89488f5280523569d45ee61c0d09ffa.tar.gz |
Add support for byte endian swapping for Word 16/32/64.
* Exposes bSwap{,16,32,64}# primops
* Add a new machops MO_BSwap
* Use a Stg implementation (hs_bswap{16,32,64}) for other implementation
in NCG.
* Generate bswap in X86 NCG for 32 and 64 bits, and for 16 bits, bswap+shr
instead of using xchg.
* Generate llvm.bswap intrinsics in llvm codegen.
Patch from Vincent Hanquez.
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 1 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 12 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 60 | ||||
-rw-r--r-- | compiler/nativeGen/CPrim.hs | 9 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 1 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 14 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 3 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 1 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 9 |
11 files changed, 88 insertions, 24 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index fae84e5d53..8d42bbd2cb 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -529,6 +529,7 @@ data CallishMachOp | MO_Memmove | MO_PopCnt Width + | MO_BSwap Width deriving (Eq, Show) pprCallishMachOp :: CallishMachOp -> SDoc diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 00ba7acb06..b0c9bd3f2f 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -738,6 +738,7 @@ pprCallishMachOp_for_C mop MO_Memcpy -> ptext (sLit "memcpy") MO_Memset -> ptext (sLit "memset") MO_Memmove -> ptext (sLit "memmove") + (MO_BSwap w) -> ptext (sLit $ bSwapLabel w) (MO_PopCnt w) -> ptext (sLit $ popCntLabel w) (MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w) diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 54002e8171..7ce329a707 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -541,6 +541,11 @@ emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] = emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] = doSetByteArrayOp ba off len c +emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16 +emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32 +emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64 +emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags) + -- Population count emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8 emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16 @@ -1568,6 +1573,13 @@ emitAllocateCall res cap n = do allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing ForeignLabelInExternalPackage IsFunction)) +emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode () +emitBSwapCall res x width = do + emitPrimCall + [ res ] + (MO_BSwap width) + [ x ] + emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode () emitPopCntCall res x width = do emitPrimCall diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 915981752e..e01870f7dd 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -220,30 +220,11 @@ genCall env t@(PrimTarget MO_Prefetch_Data) [] args = do `appOL` trashStmts (getDflags env) `snocOL` call return (env2, stmts, top1 ++ top2) --- Handle popcnt function specifically since GHC only really has i32 and i64 --- types and things like Word8 are backed by an i32 and just present a logical --- i8 range. So we must handle conversions from i32 to i8 explicitly as LLVM --- is strict about types. -genCall env t@(PrimTarget (MO_PopCnt w)) [dst] args = do - let dflags = getDflags env - width = widthToLlvmInt w - dstTy = cmmToLlvmType $ localRegType dst - funTy = \n -> LMFunction $ LlvmFunctionDecl n ExternallyVisible - CC_Ccc width FixedArgs (tysToParams [width]) Nothing - (env1, dstV, stmts1, top1) = getCmmReg env (CmmLocal dst) - - let (_, arg_hints) = foreignTargetHints t - let args_hints = zip args arg_hints - (env2, argsV, stmts2, top2) <- arg_vars env1 args_hints ([], nilOL, []) - (env3, fptr, stmts3, top3) <- getFunPtr env2 funTy t - (argsV', stmts4) <- castVars dflags $ zip argsV [width] - (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] - ([retV'], stmts5) <- castVars dflags [(retV,dstTy)] - let s2 = Store retV' dstV - - let stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` - s1 `appOL` stmts5 `snocOL` s2 - return (env3, stmts, top1 ++ top2 ++ top3) +-- Handle PopCnt and BSwap that need to only convert arg and return types +genCall env t@(PrimTarget (MO_PopCnt w)) dsts args = + genCallSimpleCast env w t dsts args +genCall env t@(PrimTarget (MO_BSwap w)) dsts args = + genCallSimpleCast env w t dsts args -- Handle memcpy function specifically since llvm's intrinsic version takes -- some extra parameters. @@ -386,6 +367,36 @@ genCall env target res args = do return (env3, allStmts `snocOL` s2 `snocOL` s3 `appOL` retStmt, top1 ++ top2 ++ top3) +-- Handle simple function call that only need simple type casting, of the form: +-- truncate arg >>= \a -> call(a) >>= zext +-- +-- since GHC only really has i32 and i64 types and things like Word8 are backed +-- by an i32 and just present a logical i8 range. So we must handle conversions +-- from i32 to i8 explicitly as LLVM is strict about types. +genCallSimpleCast :: LlvmEnv -> Width -> ForeignTarget -> [CmmFormal] -> [CmmActual] + -> UniqSM StmtData +genCallSimpleCast env w t [dst] args = do + let dflags = getDflags env + width = widthToLlvmInt w + dstTy = cmmToLlvmType $ localRegType dst + funTy = \n -> LMFunction $ LlvmFunctionDecl n ExternallyVisible + CC_Ccc width FixedArgs (tysToParams [width]) Nothing + (env1, dstV, stmts1, top1) = getCmmReg env (CmmLocal dst) + + let (_, arg_hints) = foreignTargetHints t + let args_hints = zip args arg_hints + (env2, argsV, stmts2, top2) <- arg_vars env1 args_hints ([], nilOL, []) + (env3, fptr, stmts3, top3) <- getFunPtr env2 funTy t + (argsV', stmts4) <- castVars dflags $ zip argsV [width] + (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] + (retV', stmts5) <- if getVarType retV == dstTy + then return (retV, Nop) + else doExpr dstTy $ Cast LM_Zext retV dstTy + let s2 = Store retV' dstV + + let stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` + s1 `appOL` toOL [stmts5] `snocOL` s2 + return (env3, stmts, top1 ++ top2 ++ top3) -- | Create a function pointer from a target. getFunPtr :: LlvmEnv -> (LMString -> LlvmType) -> ForeignTarget @@ -539,6 +550,7 @@ cmmPrimOpFunctions env mop MO_Memset -> fsLit $ "llvm.memset." ++ intrinTy2 (MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ show (widthToLlvmInt w) + (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ show (widthToLlvmInt w) MO_Prefetch_Data -> fsLit "llvm.prefetch" diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs index dd9d38f434..a6f4cab7bd 100644 --- a/compiler/nativeGen/CPrim.hs +++ b/compiler/nativeGen/CPrim.hs @@ -1,6 +1,7 @@ -- | Generating C symbol names emitted by the compiler. module CPrim ( popCntLabel + , bSwapLabel , word2FloatLabel ) where @@ -16,6 +17,14 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w pprWidth W64 = "64" pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w) +bSwapLabel :: Width -> String +bSwapLabel w = "hs_bswap" ++ pprWidth w + where + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w) + word2FloatLabel :: Width -> String word2FloatLabel w = "hs_word2float" ++ pprWidth w where diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index b3f5a48a5d..28755e83c8 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -1155,6 +1155,7 @@ genCCall' dflags gcp target dest_regs args0 MO_Memset -> (fsLit "memset", False) MO_Memmove -> (fsLit "memmove", False) + MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False) MO_S_QuotRem {} -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 9c84a38f6a..5d2b9a9d6d 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -647,6 +647,7 @@ outOfLineMachOp_table mop MO_Memset -> fsLit "memset" MO_Memmove -> fsLit "memmove" + MO_BSwap w -> fsLit $ bSwapLabel w MO_PopCnt w -> fsLit $ popCntLabel w MO_S_QuotRem {} -> unsupported diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index ef8a628c1f..7c5811425d 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1658,6 +1658,19 @@ genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL +genCCall _ (PrimTarget (MO_BSwap width)) [dst] [src] = do + dflags <- getDynFlags + let platform = targetPlatform dflags + let dst_r = getRegisterReg platform False (CmmLocal dst) + code_src <- getAnyReg src + case width of + W16 -> return $ code_src dst_r `appOL` + unitOL (BSWAP II32 dst_r) `appOL` + unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r)) + _ -> return $ code_src dst_r `appOL` unitOL (BSWAP size dst_r) + where + size = intSize width + genCCall is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] args@[src] = do sse4_2 <- sse4_2Enabled @@ -2325,6 +2338,7 @@ outOfLineCmmOp mop res args MO_Memmove -> fsLit "memmove" MO_PopCnt _ -> fsLit "popcnt" + MO_BSwap _ -> fsLit "bswap" MO_UF_Conv _ -> unsupported diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 76f0e8bd91..266a4ea58a 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -208,6 +208,7 @@ data Instr | XOR Size Operand Operand | NOT Size Operand | NEGI Size Operand -- NEG instruction (name clash with Cond) + | BSWAP Size Reg -- Shifts (amount may be immediate or %cl only) | SHL Size Operand{-amount-} Operand @@ -351,6 +352,7 @@ x86_regUsageOfInstr platform instr XOR _ src dst -> usageRM src dst NOT _ op -> usageM op + BSWAP _ reg -> mkRU [reg] [reg] NEGI _ op -> usageM op SHL _ imm dst -> usageRM imm dst SAR _ imm dst -> usageRM imm dst @@ -489,6 +491,7 @@ x86_patchRegsOfInstr instr env OR sz src dst -> patch2 (OR sz) src dst XOR sz src dst -> patch2 (XOR sz) src dst NOT sz op -> patch1 (NOT sz) op + BSWAP sz reg -> BSWAP sz (env reg) NEGI sz op -> patch1 (NEGI sz) op SHL sz imm dst -> patch1 (SHL sz imm) dst SAR sz imm dst -> patch1 (SAR sz imm) dst diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 75d18a1ff4..7f9c6901da 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -578,6 +578,7 @@ pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst pprInstr (POPCNT size src dst) = pprOpOp (sLit "popcnt") size src (OpReg dst) pprInstr (NOT size op) = pprSizeOp (sLit "not") size op +pprInstr (BSWAP size op) = pprSizeOp (sLit "bswap") size (OpReg op) pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 45472816c0..d6bdae861d 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -363,6 +363,15 @@ primop PopCnt64Op "popCnt64#" GenPrimOp WORD64 -> Word# primop PopCntOp "popCnt#" Monadic Word# -> Word# {Count the number of set bits in a word.} +primop BSwap16Op "byteSwap16#" Monadic Word# -> Word# + {Swap bytes in the lower 16 bits of a word. The higher bytes are undefined. } +primop BSwap32Op "byteSwap32#" Monadic Word# -> Word# + {Swap bytes in the lower 32 bits of a word. The higher bytes are undefined. } +primop BSwap64Op "byteSwap64#" GenPrimOp WORD64 -> WORD64 + {Swap bytes in a 64 bits of a word.} +primop BSwapOp "byteSwap#" Monadic Word# -> Word# + {Swap bytes in a word.} + ------------------------------------------------------------------------ section "Narrowings" {Explicit narrowing of native-sized ints or words.} |