summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorAustin Seipp <austin@well-typed.com>2013-10-01 21:13:14 -0500
committerAustin Seipp <austin@well-typed.com>2013-10-01 21:26:47 -0500
commitfd74014079f14bd3ab50e328e52c44ef97d40e05 (patch)
treeda31c992a76d3816a4f1012ceb1eb4e68d0fb556 /compiler
parent627d1e008cbe4d9318b2466394420a968d1659da (diff)
downloadhaskell-fd74014079f14bd3ab50e328e52c44ef97d40e05.tar.gz
Add support for prefetch with locality levels.
This patch adds support for several new primitive operations which support using processor-specific instructions to help guide data and cache locality decisions. We have levels ranging from [0..3] For LLVM, we generate llvm.prefetch intrinsics at the proper locality level (similar to GCC.) For x86 we generate prefetch{NTA, t2, t1, t0} instructions. On SPARC and PowerPC, the locality levels are ignored. This closes #8256. Authored-by: Carter Tazio Schonwald <carter.schonwald@gmail.com> Signed-off-by: Austin Seipp <austin@well-typed.com>
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CmmMachOp.hs18
-rw-r--r--compiler/cmm/CmmParse.y10
-rw-r--r--compiler/cmm/PprC.hs4
-rw-r--r--compiler/codeGen/StgCmmPrim.hs53
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs11
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs5
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs7
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs23
-rw-r--r--compiler/nativeGen/X86/Instr.hs16
-rw-r--r--compiler/nativeGen/X86/Ppr.hs12
-rw-r--r--compiler/prelude/primops.txt.pp85
11 files changed, 194 insertions, 50 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index c009d15e25..684a4b9729 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -107,10 +107,10 @@ data MachOp
-- Vector element insertion and extraction operations
| MO_V_Insert Length Width -- Insert scalar into vector
| MO_V_Extract Length Width -- Extract scalar from vector
-
+
-- Integer vector operations
- | MO_V_Add Length Width
- | MO_V_Sub Length Width
+ | MO_V_Add Length Width
+ | MO_V_Sub Length Width
| MO_V_Mul Length Width
-- Signed vector multiply/divide
@@ -127,8 +127,8 @@ data MachOp
| MO_VF_Extract Length Width -- Extract scalar from vector
-- Floating point vector operations
- | MO_VF_Add Length Width
- | MO_VF_Sub Length Width
+ | MO_VF_Add Length Width
+ | MO_VF_Sub Length Width
| MO_VF_Neg Length Width -- unary -
| MO_VF_Mul Length Width
| MO_VF_Quot Length Width
@@ -528,8 +528,14 @@ data CallishMachOp
| MO_Touch -- Keep variables live (when using interior pointers)
-- Prefetch
- | MO_Prefetch_Data -- Prefetch hint. May change program performance but not
+ | MO_Prefetch_Data Int -- Prefetch hint. May change program performance but not
-- program behavior.
+ -- the Int can be 0-3. Needs to be known at compile time
+ -- to interact with code generation correctly.
+ -- TODO: add support for prefetch WRITES,
+ -- currently only exposes prefetch reads, which
+ -- would the majority of use cases in ghc anyways
+
-- Note that these three MachOps all take 1 extra parameter than the
-- standard C lib versions. The extra (last) parameter contains
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index ebd9278e15..a0c9bc4eb5 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -952,8 +952,16 @@ callishMachOps = listToUFM $
( "write_barrier", MO_WriteBarrier ),
( "memcpy", MO_Memcpy ),
( "memset", MO_Memset ),
- ( "memmove", MO_Memmove )
+ ( "memmove", MO_Memmove ),
+
+ ("prefetch0",MO_Prefetch_Data 0),
+ ("prefetch1",MO_Prefetch_Data 1),
+ ("prefetch2",MO_Prefetch_Data 2),
+ ("prefetch3",MO_Prefetch_Data 3)
+
-- ToDo: the rest, maybe
+ -- edit: which rest?
+ -- also: how do we tell CMM Lint how to type check callish macops?
]
parseSafety :: String -> P Safety
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index c468161c73..32fd8b4feb 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -759,7 +759,9 @@ pprCallishMachOp_for_C mop
MO_Add2 {} -> unsupported
MO_U_Mul2 {} -> unsupported
MO_Touch -> unsupported
- MO_Prefetch_Data -> unsupported
+ (MO_Prefetch_Data _ ) -> unsupported
+ --- we could support prefetch via "__builtin_prefetch"
+ --- Not adding it for now
where unsupported = panic ("pprCallishMachOp_for_C: " ++ show mop
++ " not supported!")
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 523fcb21f9..6411e89a54 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -255,15 +255,6 @@ emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
emitPrimOp _ res@[] TouchOp args@[_arg]
= do emitPrimCall res MO_Touch args
-emitPrimOp _ res@[] PrefetchByteArrayOp args@[_arg]
- = do emitPrimCall res MO_Prefetch_Data args
-
-emitPrimOp _ res@[] PrefetchMutableByteArrayOp args@[_arg]
- = do emitPrimCall res MO_Prefetch_Data args
-
-emitPrimOp _ res@[] PrefetchAddrOp args@[_arg]
- = do emitPrimCall res MO_Prefetch_Data args
-
-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
emitPrimOp dflags [res] ByteArrayContents_Char [arg]
= emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
@@ -656,9 +647,22 @@ emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
ty = vecCmmCat vcat w
-- Prefetch
-emitPrimOp _ res PrefetchByteArrayOp args = doPrefetchByteArrayOp res args
-emitPrimOp _ res PrefetchMutableByteArrayOp args = doPrefetchByteArrayOp res args
-emitPrimOp _ res PrefetchAddrOp args = doPrefetchAddrOp res args
+emitPrimOp _ res PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
+emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
+emitPrimOp _ res PrefetchAddrOp3 args = doPrefetchAddrOp 3 res args
+
+emitPrimOp _ res PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
+emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
+emitPrimOp _ res PrefetchAddrOp2 args = doPrefetchAddrOp 2 res args
+
+emitPrimOp _ res PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
+emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
+emitPrimOp _ res PrefetchAddrOp1 args = doPrefetchAddrOp 1 res args
+
+emitPrimOp _ res PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
+emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
+emitPrimOp _ res PrefetchAddrOp0 args = doPrefetchAddrOp 0 res args
+
-- The rest just translate straightforwardly
emitPrimOp dflags [res] op [arg]
@@ -1370,31 +1374,34 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
------------------------------------------------------------------------------
-- Helpers for translating prefetching.
-doPrefetchByteArrayOp :: [LocalReg]
+doPrefetchByteArrayOp :: Int
+ -> [LocalReg]
-> [CmmExpr]
-> FCode ()
-doPrefetchByteArrayOp res [addr,idx]
+doPrefetchByteArrayOp locality res [addr,idx]
= do dflags <- getDynFlags
- mkBasicPrefetch (arrWordsHdrSize dflags) res addr idx
-doPrefetchByteArrayOp _ _
+ mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
+doPrefetchByteArrayOp _ _ _
= panic "StgCmmPrim: doPrefetchByteArrayOp"
-doPrefetchAddrOp :: [LocalReg]
+doPrefetchAddrOp ::Int
+ -> [LocalReg]
-> [CmmExpr]
-> FCode ()
-doPrefetchAddrOp res [addr,idx]
- = mkBasicPrefetch 0 res addr idx
-doPrefetchAddrOp _ _
+doPrefetchAddrOp locality res [addr,idx]
+ = mkBasicPrefetch locality 0 res addr idx
+doPrefetchAddrOp _ _ _
= panic "StgCmmPrim: doPrefetchAddrOp"
-mkBasicPrefetch :: ByteOff -- Initial offset in bytes
+mkBasicPrefetch :: Int -- Locality level 0-3
+ -> ByteOff -- Initial offset in bytes
-> [LocalReg] -- Destination
-> CmmExpr -- Base address
-> CmmExpr -- Index
-> FCode ()
-mkBasicPrefetch off res base idx
+mkBasicPrefetch locality off res base idx
= do dflags <- getDynFlags
- emitPrimCall [] MO_Prefetch_Data [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
+ emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
case res of
[] -> return ()
[reg] -> emitAssign (CmmLocal reg) base
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 5002b89b72..808c591d92 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -200,7 +200,8 @@ genCall (PrimTarget (MO_UF_Conv _)) [_] args =
"Can only handle 1, given" ++ show (length args) ++ "."
-- Handle prefetching data
-genCall t@(PrimTarget MO_Prefetch_Data) [] args = do
+genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
+ | 0 <= localityInt && localityInt <= 3 = do
ver <- getLlvmVer
let argTy | ver <= 29 = [i8Ptr, i32, i32]
| otherwise = [i8Ptr, i32, i32, i32]
@@ -214,12 +215,13 @@ genCall t@(PrimTarget MO_Prefetch_Data) [] args = do
(argVars', stmts3) <- castVars $ zip argVars argTy
trash <- getTrashStmts
- let argSuffix | ver <= 29 = [mkIntLit i32 0, mkIntLit i32 3]
- | otherwise = [mkIntLit i32 0, mkIntLit i32 3, mkIntLit i32 1]
+ let argSuffix | ver <= 29 = [mkIntLit i32 0, mkIntLit i32 localityInt]
+ | otherwise = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
call = Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
stmts = stmts1 `appOL` stmts2 `appOL` stmts3
`appOL` trash `snocOL` call
return (stmts, top1 ++ top2)
+ | otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
-- Handle PopCnt and BSwap that need to only convert arg and return types
genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
@@ -545,7 +547,8 @@ cmmPrimOpFunctions mop = do
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
- MO_Prefetch_Data -> fsLit "llvm.prefetch"
+ (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
+
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 65533d8f9a..3f0e7632f8 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -912,6 +912,9 @@ genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
genCCall' _ _ (PrimTarget MO_Touch) _ _
= return $ nilOL
+genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
+ = return $ nilOL
+
genCCall' dflags gcp target dest_regs args0
= ASSERT(not $ any (`elem` [II16]) $ map cmmTypeSize argReps)
-- we rely on argument promotion in the codeGen
@@ -1165,7 +1168,7 @@ genCCall' dflags gcp target dest_regs args0
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- MO_Prefetch_Data -> unsupported
+ (MO_Prefetch_Data _ ) -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported")
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 5d2b9a9d6d..5d65b427e1 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -392,7 +392,10 @@ genCCall
-- In the SPARC case we don't need a barrier.
--
genCCall (PrimTarget MO_WriteBarrier) _ _
- = do return nilOL
+ = return $ nilOL
+
+genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
+ = return $ nilOL
genCCall target dest_regs args0
= do
@@ -657,7 +660,7 @@ outOfLineMachOp_table mop
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- MO_Prefetch_Data -> unsupported
+ (MO_Prefetch_Data _) -> unsupported
where unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported here")
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index e18da25347..2456688744 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1658,7 +1658,26 @@ genCCall _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL
genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL
-genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL
+genCCall is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] =
+ case n of
+ 0 -> genPrefetch src $ PREFETCH NTA size
+ 1 -> genPrefetch src $ PREFETCH Lvl2 size
+ 2 -> genPrefetch src $ PREFETCH Lvl1 size
+ 3 -> genPrefetch src $ PREFETCH Lvl0 size
+ l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
+ -- the c / llvm prefetch convention is 0, 1, 2, and 3
+ -- the x86 corresponding names are : NTA, 2 , 1, and 0
+ where
+ size = archWordSize is32bit
+ -- need to know what register width for pointers!
+ genPrefetch inRegSrc prefetchCTor =
+ do
+ code_src <- getAnyReg inRegSrc
+ src_r <- getNewRegNat size
+ return $ code_src src_r `appOL`
+ (unitOL (prefetchCTor (OpAddr
+ ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
+ -- prefetch always takes an address
genCCall is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
dflags <- getDynFlags
@@ -2361,7 +2380,7 @@ outOfLineCmmOp mop res args
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- MO_Prefetch_Data -> unsupported
+ (MO_Prefetch_Data _ ) -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported here")
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index e584ffe8b9..d10591e37f 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -9,7 +9,7 @@
#include "HsVersions.h"
#include "nativeGen/NCG.h"
-module X86.Instr (Instr(..), Operand(..), JumpDest,
+module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest,
getJumpDestBlockId, canShortcut, shortcutStatics,
shortcutJump, i386_insert_ffrees, allocMoreStack,
maxSpillSlots, archWordSize)
@@ -319,7 +319,14 @@ data Instr
-- 1: popl %reg
-- SSE4.2
- | POPCNT Size Operand Reg -- src, dst
+ | POPCNT Size Operand Reg -- src, dst
+
+ -- prefetch
+ | PREFETCH PrefetchVariant Size Operand -- prefetch Variant, addr size, address to prefetch
+ -- variant can be NTA, Lvl0, Lvl1, or Lvl2
+
+data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
+
data Operand
= OpReg Reg -- register
@@ -417,6 +424,9 @@ x86_regUsageOfInstr platform instr
POPCNT _ src dst -> mkRU (use_R src []) [dst]
+ -- note: might be a better way to do this
+ PREFETCH _ _ src -> mkRU (use_R src []) []
+
_other -> panic "regUsage: unrecognised instr"
where
@@ -557,6 +567,8 @@ x86_patchRegsOfInstr instr env
POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst)
+ PREFETCH lvl size src -> PREFETCH lvl size (patchOp src)
+
_other -> panic "patchRegs: unrecognised instr"
where
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 7f9c6901da..f38a04d069 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -577,6 +577,11 @@ pprInstr (XOR size src dst) = pprSizeOpOp (sLit "xor") size src dst
pprInstr (POPCNT size src dst) = pprOpOp (sLit "popcnt") size src (OpReg dst)
+pprInstr (PREFETCH NTA size src ) = pprSizeOp_ (sLit "prefetchnta") size src
+pprInstr (PREFETCH Lvl0 size src) = pprSizeOp_ (sLit "prefetcht0") size src
+pprInstr (PREFETCH Lvl1 size src) = pprSizeOp_ (sLit "prefetcht1") size src
+pprInstr (PREFETCH Lvl2 size src) = pprSizeOp_ (sLit "prefetcht2") size src
+
pprInstr (NOT size op) = pprSizeOp (sLit "not") size op
pprInstr (BSWAP size op) = pprSizeOp (sLit "bswap") size (OpReg op)
pprInstr (NEGI size op) = pprSizeOp (sLit "neg") size op
@@ -1025,6 +1030,13 @@ pprSizeImmOp name size imm op1
]
+pprSizeOp_ :: LitString -> Size -> Operand -> SDoc
+pprSizeOp_ name size op1
+ = hcat [
+ pprMnemonic_ name ,
+ pprOperand size op1
+ ]
+
pprSizeOp :: LitString -> Size -> Operand -> SDoc
pprSizeOp name size op1
= hcat [
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index dcd536eeae..5bedc31a7b 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -2596,22 +2596,91 @@ primop VecWriteScalarOffAddrOp "writeOffAddrAs#" GenPrimOp
vector = ALL_VECTOR_TYPES
------------------------------------------------------------------------
+
section "Prefetch"
- {Prefetch operations}
+ {Prefetch operations: Note how every prefetch operation has a name
+ with the pattern prefetch*N#, where N is either 0,1,2, or 3.
+
+ This suffix number, N, is the "locality level" of the prefetch, following the
+ convention in GCC and other compilers.
+ Higher locality numbers correspond to the memory being loaded in more
+ levels of the cpu cache, and being retained after initial use.
+
+ On the LLVM backend, prefetch*N# uses the LLVM prefetch intrinsic
+ with locality level N. The code generated by LLVM is target architecture
+ dependent, but should agree with the GHC NCG on x86 systems.
+
+ On the Sparc and PPC native backends, prefetch*N is a No-Op.
+
+ On the x86 NCG, N=0 will generate prefetchNTA,
+ N=1 generates prefetcht2, N=2 generates prefetcht1, and
+ N=3 generates prefetcht0.
+
+ For streaming workloads, the prefetch*0 operations are recommended.
+ For workloads which do many reads or writes to a memory location in a short period of time,
+ prefetch*3 operations are recommended.
+ }
------------------------------------------------------------------------
-primop PrefetchByteArrayOp "prefetchByteArray#" GenPrimOp
+
+--- the Int# argument for prefetch is the byte offset on the byteArray or Addr#
+
+---
+primop PrefetchByteArrayOp3 "prefetchByteArray3#" GenPrimOp
ByteArray# -> Int# -> ByteArray#
- with llvm_only = True
+ with can_fail = True
-primop PrefetchMutableByteArrayOp "prefetchMutableByteArray#" GenPrimOp
+primop PrefetchMutableByteArrayOp3 "prefetchMutableByteArray3#" GenPrimOp
MutableByteArray# s -> Int# -> State# s -> State# s
- with has_side_effects = True
- llvm_only = True
+ with can_fail = True
+
+primop PrefetchAddrOp3 "prefetchAddr3#" GenPrimOp
+ Addr# -> Int# -> Addr#
+ with can_fail = True
-primop PrefetchAddrOp "prefetchAddr#" GenPrimOp
+----
+
+primop PrefetchByteArrayOp2 "prefetchByteArray2#" GenPrimOp
+ ByteArray# -> Int# -> ByteArray#
+ with can_fail = True
+
+primop PrefetchMutableByteArrayOp2 "prefetchMutableByteArray2#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> State# s
+ with can_fail = True
+
+primop PrefetchAddrOp2 "prefetchAddr2#" GenPrimOp
Addr# -> Int# -> Addr#
- with llvm_only = True
+ with can_fail = True
+
+----
+
+primop PrefetchByteArrayOp1 "prefetchByteArray1#" GenPrimOp
+ ByteArray# -> Int# -> ByteArray#
+ with can_fail = True
+
+primop PrefetchMutableByteArrayOp1 "prefetchMutableByteArray1#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> State# s
+ with can_fail = True
+
+primop PrefetchAddrOp1 "prefetchAddr1#" GenPrimOp
+ Addr# -> Int# -> Addr#
+ with can_fail = True
+
+----
+
+primop PrefetchByteArrayOp0 "prefetchByteArray0#" GenPrimOp
+ ByteArray# -> Int# -> ByteArray#
+ with can_fail = True
+
+primop PrefetchMutableByteArrayOp0 "prefetchMutableByteArray0#" GenPrimOp
+ MutableByteArray# s -> Int# -> State# s -> State# s
+ with can_fail = True
+
+primop PrefetchAddrOp0 "prefetchAddr0#" GenPrimOp
+ Addr# -> Int# -> Addr#
+ with can_fail = True
+
+
------------------------------------------------------------------------
--- ---