summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Lynagh <ian@well-typed.com>2013-06-09 12:10:35 +0100
committerIan Lynagh <ian@well-typed.com>2013-06-09 12:10:35 +0100
commit1c5b0511a89488f5280523569d45ee61c0d09ffa (patch)
treeccdb8a6ff90162e4d26318b901c473ab5a7f90cc
parent972c044d5da72cee3a43209ccb41e2229914211c (diff)
downloadhaskell-1c5b0511a89488f5280523569d45ee61c0d09ffa.tar.gz
Add support for byte endian swapping for Word 16/32/64.
* Exposes bSwap{,16,32,64}# primops * Add a new machops MO_BSwap * Use a Stg implementation (hs_bswap{16,32,64}) for other implementation in NCG. * Generate bswap in X86 NCG for 32 and 64 bits, and for 16 bits, bswap+shr instead of using xchg. * Generate llvm.bswap intrinsics in llvm codegen. Patch from Vincent Hanquez.
-rw-r--r--compiler/cmm/CmmMachOp.hs1
-rw-r--r--compiler/cmm/PprC.hs1
-rw-r--r--compiler/codeGen/StgCmmPrim.hs12
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs60
-rw-r--r--compiler/nativeGen/CPrim.hs9
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs1
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs1
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs14
-rw-r--r--compiler/nativeGen/X86/Instr.hs3
-rw-r--r--compiler/nativeGen/X86/Ppr.hs1
-rw-r--r--compiler/prelude/primops.txt.pp9
11 files changed, 88 insertions, 24 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index fae84e5d53..8d42bbd2cb 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -529,6 +529,7 @@ data CallishMachOp
| MO_Memmove
| MO_PopCnt Width
+ | MO_BSwap Width
deriving (Eq, Show)
pprCallishMachOp :: CallishMachOp -> SDoc
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 00ba7acb06..b0c9bd3f2f 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -738,6 +738,7 @@ pprCallishMachOp_for_C mop
MO_Memcpy -> ptext (sLit "memcpy")
MO_Memset -> ptext (sLit "memset")
MO_Memmove -> ptext (sLit "memmove")
+ (MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
(MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w)
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 54002e8171..7ce329a707 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -541,6 +541,11 @@ emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
doSetByteArrayOp ba off len c
+emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
+emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
+emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
+emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
+
-- Population count
emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
@@ -1568,6 +1573,13 @@ emitAllocateCall res cap n = do
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
+emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitBSwapCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_BSwap width)
+ [ x ]
+
emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitPopCntCall res x width = do
emitPrimCall
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 915981752e..e01870f7dd 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -220,30 +220,11 @@ genCall env t@(PrimTarget MO_Prefetch_Data) [] args = do
`appOL` trashStmts (getDflags env) `snocOL` call
return (env2, stmts, top1 ++ top2)
--- Handle popcnt function specifically since GHC only really has i32 and i64
--- types and things like Word8 are backed by an i32 and just present a logical
--- i8 range. So we must handle conversions from i32 to i8 explicitly as LLVM
--- is strict about types.
-genCall env t@(PrimTarget (MO_PopCnt w)) [dst] args = do
- let dflags = getDflags env
- width = widthToLlvmInt w
- dstTy = cmmToLlvmType $ localRegType dst
- funTy = \n -> LMFunction $ LlvmFunctionDecl n ExternallyVisible
- CC_Ccc width FixedArgs (tysToParams [width]) Nothing
- (env1, dstV, stmts1, top1) = getCmmReg env (CmmLocal dst)
-
- let (_, arg_hints) = foreignTargetHints t
- let args_hints = zip args arg_hints
- (env2, argsV, stmts2, top2) <- arg_vars env1 args_hints ([], nilOL, [])
- (env3, fptr, stmts3, top3) <- getFunPtr env2 funTy t
- (argsV', stmts4) <- castVars dflags $ zip argsV [width]
- (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
- ([retV'], stmts5) <- castVars dflags [(retV,dstTy)]
- let s2 = Store retV' dstV
-
- let stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL`
- s1 `appOL` stmts5 `snocOL` s2
- return (env3, stmts, top1 ++ top2 ++ top3)
+-- Handle PopCnt and BSwap that need to only convert arg and return types
+genCall env t@(PrimTarget (MO_PopCnt w)) dsts args =
+ genCallSimpleCast env w t dsts args
+genCall env t@(PrimTarget (MO_BSwap w)) dsts args =
+ genCallSimpleCast env w t dsts args
-- Handle memcpy function specifically since llvm's intrinsic version takes
-- some extra parameters.
@@ -386,6 +367,36 @@ genCall env target res args = do
return (env3, allStmts `snocOL` s2 `snocOL` s3
`appOL` retStmt, top1 ++ top2 ++ top3)
+-- Handle simple function call that only need simple type casting, of the form:
+-- truncate arg >>= \a -> call(a) >>= zext
+--
+-- since GHC only really has i32 and i64 types and things like Word8 are backed
+-- by an i32 and just present a logical i8 range. So we must handle conversions
+-- from i32 to i8 explicitly as LLVM is strict about types.
+genCallSimpleCast :: LlvmEnv -> Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
+ -> UniqSM StmtData
+genCallSimpleCast env w t [dst] args = do
+ let dflags = getDflags env
+ width = widthToLlvmInt w
+ dstTy = cmmToLlvmType $ localRegType dst
+ funTy = \n -> LMFunction $ LlvmFunctionDecl n ExternallyVisible
+ CC_Ccc width FixedArgs (tysToParams [width]) Nothing
+ (env1, dstV, stmts1, top1) = getCmmReg env (CmmLocal dst)
+
+ let (_, arg_hints) = foreignTargetHints t
+ let args_hints = zip args arg_hints
+ (env2, argsV, stmts2, top2) <- arg_vars env1 args_hints ([], nilOL, [])
+ (env3, fptr, stmts3, top3) <- getFunPtr env2 funTy t
+ (argsV', stmts4) <- castVars dflags $ zip argsV [width]
+ (retV, s1) <- doExpr width $ Call StdCall fptr argsV' []
+ (retV', stmts5) <- if getVarType retV == dstTy
+ then return (retV, Nop)
+ else doExpr dstTy $ Cast LM_Zext retV dstTy
+ let s2 = Store retV' dstV
+
+ let stmts = stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL`
+ s1 `appOL` toOL [stmts5] `snocOL` s2
+ return (env3, stmts, top1 ++ top2 ++ top3)
-- | Create a function pointer from a target.
getFunPtr :: LlvmEnv -> (LMString -> LlvmType) -> ForeignTarget
@@ -539,6 +550,7 @@ cmmPrimOpFunctions env mop
MO_Memset -> fsLit $ "llvm.memset." ++ intrinTy2
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ show (widthToLlvmInt w)
+ (MO_BSwap w) -> fsLit $ "llvm.bswap." ++ show (widthToLlvmInt w)
MO_Prefetch_Data -> fsLit "llvm.prefetch"
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs
index dd9d38f434..a6f4cab7bd 100644
--- a/compiler/nativeGen/CPrim.hs
+++ b/compiler/nativeGen/CPrim.hs
@@ -1,6 +1,7 @@
-- | Generating C symbol names emitted by the compiler.
module CPrim
( popCntLabel
+ , bSwapLabel
, word2FloatLabel
) where
@@ -16,6 +17,14 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w
pprWidth W64 = "64"
pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
+bSwapLabel :: Width -> String
+bSwapLabel w = "hs_bswap" ++ pprWidth w
+ where
+ pprWidth W16 = "16"
+ pprWidth W32 = "32"
+ pprWidth W64 = "64"
+ pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w)
+
word2FloatLabel :: Width -> String
word2FloatLabel w = "hs_word2float" ++ pprWidth w
where
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index b3f5a48a5d..28755e83c8 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -1155,6 +1155,7 @@ genCCall' dflags gcp target dest_regs args0
MO_Memset -> (fsLit "memset", False)
MO_Memmove -> (fsLit "memmove", False)
+ MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
MO_S_QuotRem {} -> unsupported
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 9c84a38f6a..5d2b9a9d6d 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -647,6 +647,7 @@ outOfLineMachOp_table mop
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
+ MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
MO_S_QuotRem {} -> unsupported
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index ef8a628c1f..7c5811425d 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1658,6 +1658,19 @@ genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL
genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL
+genCCall _ (PrimTarget (MO_BSwap width)) [dst] [src] = do
+ dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ let dst_r = getRegisterReg platform False (CmmLocal dst)
+ code_src <- getAnyReg src
+ case width of
+ W16 -> return $ code_src dst_r `appOL`
+ unitOL (BSWAP II32 dst_r) `appOL`
+ unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
+ _ -> return $ code_src dst_r `appOL` unitOL (BSWAP size dst_r)
+ where
+ size = intSize width
+
genCCall is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
args@[src] = do
sse4_2 <- sse4_2Enabled
@@ -2325,6 +2338,7 @@ outOfLineCmmOp mop res args
MO_Memmove -> fsLit "memmove"
MO_PopCnt _ -> fsLit "popcnt"
+ MO_BSwap _ -> fsLit "bswap"
MO_UF_Conv _ -> unsupported
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 76f0e8bd91..266a4ea58a 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -208,6 +208,7 @@ data Instr
| XOR Size Operand Operand
| NOT Size Operand
| NEGI Size Operand -- NEG instruction (name clash with Cond)
+ | BSWAP Size Reg
-- Shifts (amount may be immediate or %cl only)
| SHL Size Operand{-amount-} Operand
@@ -351,6 +352,7 @@ x86_regUsageOfInstr platform instr
XOR _ src dst -> usageRM src dst
NOT _ op -> usageM op
+ BSWAP _ reg -> mkRU [reg] [reg]
NEGI _ op -> usageM op
SHL _ imm dst -> usageRM imm dst
SAR _ imm dst -> usageRM imm dst
@@ -489,6 +491,7 @@ x86_patchRegsOfInstr instr env
OR sz src dst -> patch2 (OR sz) src dst
XOR sz src dst -> patch2 (XOR sz) src dst
NOT sz op -> patch1 (NOT sz) op
+ BSWAP sz reg -> BSWAP sz (env reg)
NEGI sz op -> patch1 (NEGI sz) op
SHL sz imm dst -> patch1 (SHL sz imm) dst
SAR sz imm dst -> patch1 (SAR sz imm) dst
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 75d18a1ff4..7f9c6901da 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -578,6 +578,7 @@ pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
pprInstr (POPCNT size src dst) = pprOpOp (sLit "popcnt") size src (OpReg dst)
pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
+pprInstr (BSWAP size op) = pprSizeOp (sLit "bswap") size (OpReg op)
pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
pprInstr (SHL size src dst) = pprShift (sLit "shl") size src dst
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 45472816c0..d6bdae861d 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -363,6 +363,15 @@ primop PopCnt64Op "popCnt64#" GenPrimOp WORD64 -> Word#
primop PopCntOp "popCnt#" Monadic Word# -> Word#
{Count the number of set bits in a word.}
+primop BSwap16Op "byteSwap16#" Monadic Word# -> Word#
+ {Swap bytes in the lower 16 bits of a word. The higher bytes are undefined. }
+primop BSwap32Op "byteSwap32#" Monadic Word# -> Word#
+ {Swap bytes in the lower 32 bits of a word. The higher bytes are undefined. }
+primop BSwap64Op "byteSwap64#" GenPrimOp WORD64 -> WORD64
+ {Swap bytes in a 64 bits of a word.}
+primop BSwapOp "byteSwap#" Monadic Word# -> Word#
+ {Swap bytes in a word.}
+
------------------------------------------------------------------------
section "Narrowings"
{Explicit narrowing of native-sized ints or words.}