summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Craven <5086-clyring@users.noreply.gitlab.haskell.org>2023-02-21 12:59:50 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2023-04-04 01:04:10 -0400
commit9095e297fbb46781fd182210609ce2a3f6c59b7a (patch)
tree98585355e6e01e02465c25d2febfd276adafccfd
parent220a7a48cabdcfd2ef7bf5dbe3fd6df99e8d3c5b (diff)
downloadhaskell-9095e297fbb46781fd182210609ce2a3f6c59b7a.tar.gz
Add a few more memcpy-ish primops
* copyMutableByteArrayNonOverlapping# * copyAddrToAddr# * copyAddrToAddrNonOverlapping# * setAddrRange# The implementations of copyBytes, moveBytes, and fillBytes in base:Foreign.Marshal.Utils now use these new primops, which can cause us to work a bit harder generating code for them, resulting in the metric increase in T21839c observed by CI on some architectures. But in exchange, we get better code! Metric Increase: T21839c
-rw-r--r--compiler/GHC/Builtin/primops.txt.pp95
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs50
-rw-r--r--compiler/GHC/StgToJS/Prim.hs4
-rw-r--r--libraries/base/Data/Array/Byte.hs4
-rw-r--r--libraries/base/Foreign/Marshal/Utils.hs35
-rw-r--r--libraries/ghc-prim/changelog.md7
6 files changed, 155 insertions, 40 deletions
diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp
index e99e5dc3a3..c6f205c6a5 100644
--- a/compiler/GHC/Builtin/primops.txt.pp
+++ b/compiler/GHC/Builtin/primops.txt.pp
@@ -1890,13 +1890,14 @@ primop CompareByteArraysOp "compareByteArrays#" GenPrimOp
primop CopyByteArrayOp "copyByteArray#" GenPrimOp
ByteArray# -> Int# -> MutableByteArray# s -> Int# -> Int# -> State# s -> State# s
- {@'copyByteArray#' src src_ofs dst dst_ofs n@ copies the range
- starting at offset @src_ofs@ of length @n@ from the
- 'ByteArray#' @src@ to the 'MutableByteArray#' @dst@
- starting at offset @dst_ofs@. Both arrays must fully contain
- the specified ranges, but this is not checked. The two arrays must
- not be the same array in different states, but this is not checked
- either.}
+ { @'copyByteArray#' src src_ofs dst dst_ofs len@ copies the range
+ starting at offset @src_ofs@ of length @len@ from the
+ 'ByteArray#' @src@ to the 'MutableByteArray#' @dst@
+ starting at offset @dst_ofs@. Both arrays must fully contain
+ the specified ranges, but this is not checked. The two arrays must
+ not be the same array in different states, but this is not checked
+ either.
+ }
with
has_side_effects = True
code_size = { primOpCodeSizeForeignCall + 4}
@@ -1904,10 +1905,30 @@ primop CopyByteArrayOp "copyByteArray#" GenPrimOp
primop CopyMutableByteArrayOp "copyMutableByteArray#" GenPrimOp
MutableByteArray# s -> Int# -> MutableByteArray# s -> Int# -> Int# -> State# s -> State# s
- {Copy a range of the first MutableByteArray\# to the specified region in the second MutableByteArray\#.
- Both arrays must fully contain the specified ranges, but this is not checked. The regions are
- allowed to overlap, although this is only possible when the same array is provided
- as both the source and the destination.}
+ { @'copyMutableByteArray#' src src_ofs dst dst_ofs len@ copies the
+ range starting at offset @src_ofs@ of length @len@ from the
+ 'MutableByteArray#' @src@ to the 'MutableByteArray#' @dst@
+ starting at offset @dst_ofs@. Both arrays must fully contain the
+ specified ranges, but this is not checked. The regions are
+ allowed to overlap, although this is only possible when the same
+ array is provided as both the source and the destination.
+ }
+ with
+ has_side_effects = True
+ code_size = { primOpCodeSizeForeignCall + 4 }
+ can_fail = True
+
+primop CopyMutableByteArrayNonOverlappingOp "copyMutableByteArrayNonOverlapping#" GenPrimOp
+ MutableByteArray# s -> Int# -> MutableByteArray# s -> Int# -> Int# -> State# s -> State# s
+ { @'copyMutableByteArrayNonOverlapping#' src src_ofs dst dst_ofs len@
+ copies the range starting at offset @src_ofs@ of length @len@ from
+ the 'MutableByteArray#' @src@ to the 'MutableByteArray#' @dst@
+ starting at offset @dst_ofs@. Both arrays must fully contain the
+ specified ranges, but this is not checked. The regions are /not/
+ allowed to overlap, but this is also not checked.
+
+ @since 0.11.0
+ }
with
has_side_effects = True
code_size = { primOpCodeSizeForeignCall + 4 }
@@ -1922,7 +1943,7 @@ primop CopyByteArrayToAddrOp "copyByteArrayToAddr#" GenPrimOp
either.}
with
has_side_effects = True
- code_size = { primOpCodeSizeForeignCall + 4}
+ code_size = { primOpCodeSizeForeignCall + 4 }
can_fail = True
primop CopyMutableByteArrayToAddrOp "copyMutableByteArrayToAddr#" GenPrimOp
@@ -1934,7 +1955,7 @@ primop CopyMutableByteArrayToAddrOp "copyMutableByteArrayToAddr#" GenPrimOp
pinned), but this is not checked either.}
with
has_side_effects = True
- code_size = { primOpCodeSizeForeignCall + 4}
+ code_size = { primOpCodeSizeForeignCall + 4 }
can_fail = True
primop CopyAddrToByteArrayOp "copyAddrToByteArray#" GenPrimOp
@@ -1946,7 +1967,38 @@ primop CopyAddrToByteArrayOp "copyAddrToByteArray#" GenPrimOp
but this is not checked either.}
with
has_side_effects = True
- code_size = { primOpCodeSizeForeignCall + 4}
+ code_size = { primOpCodeSizeForeignCall + 4 }
+ can_fail = True
+
+primop CopyAddrToAddrOp "copyAddrToAddr#" GenPrimOp
+ Addr# -> Addr# -> Int# -> State# RealWorld -> State# RealWorld
+ { @'copyAddrToAddr#' src dest len@ copies @len@ bytes
+ from @src@ to @dest@. These two memory ranges are allowed to overlap.
+
+ Analogous to the standard C function @memmove@, but with a different
+ argument order.
+
+ @since 0.11.0
+ }
+ with
+ has_side_effects = True
+ code_size = { primOpCodeSizeForeignCall }
+ can_fail = True
+
+primop CopyAddrToAddrNonOverlappingOp "copyAddrToAddrNonOverlapping#" GenPrimOp
+ Addr# -> Addr# -> Int# -> State# RealWorld -> State# RealWorld
+ { @'copyAddrToAddrNonOverlapping#' src dest len@ copies @len@ bytes
+ from @src@ to @dest@. As the name suggests, these two memory ranges
+ /must not overlap/, although this pre-condition is not checked.
+
+ Analogous to the standard C function @memcpy@, but with a different
+ argument order.
+
+ @since 0.11.0
+ }
+ with
+ has_side_effects = True
+ code_size = { primOpCodeSizeForeignCall }
can_fail = True
primop SetByteArrayOp "setByteArray#" GenPrimOp
@@ -1958,6 +2010,21 @@ primop SetByteArrayOp "setByteArray#" GenPrimOp
code_size = { primOpCodeSizeForeignCall + 4 }
can_fail = True
+primop SetAddrRangeOp "setAddrRange#" GenPrimOp
+ Addr# -> Int# -> Int# -> State# RealWorld -> State# RealWorld
+ { @'setAddrRange#' dest len c@ sets all of the bytes in
+ @[dest, dest+len)@ to the value @c@.
+
+ Analogous to the standard C function @memset@, but with a different
+ argument order.
+
+ @since 0.11.0
+ }
+ with
+ has_side_effects = True
+ code_size = { primOpCodeSizeForeignCall }
+ can_fail = True
+
-- Atomic operations
primop AtomicReadByteArrayOp_Int "atomicReadIntArray#" GenPrimOp
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
index f7afeb71a9..1837d9ac37 100644
--- a/compiler/GHC/StgToCmm/Prim.hs
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -715,14 +715,22 @@ emitPrimOp cfg primop =
doCopyByteArrayOp src src_off dst dst_off n
CopyMutableByteArrayOp -> \[src,src_off,dst,dst_off,n] -> opIntoRegs $ \[] ->
doCopyMutableByteArrayOp src src_off dst dst_off n
+ CopyMutableByteArrayNonOverlappingOp -> \[src,src_off,dst,dst_off,n] -> opIntoRegs $ \[] ->
+ doCopyMutableByteArrayNonOverlappingOp src src_off dst dst_off n
CopyByteArrayToAddrOp -> \[src,src_off,dst,n] -> opIntoRegs $ \[] ->
doCopyByteArrayToAddrOp src src_off dst n
CopyMutableByteArrayToAddrOp -> \[src,src_off,dst,n] -> opIntoRegs $ \[] ->
doCopyMutableByteArrayToAddrOp src src_off dst n
CopyAddrToByteArrayOp -> \[src,dst,dst_off,n] -> opIntoRegs $ \[] ->
doCopyAddrToByteArrayOp src dst dst_off n
+ CopyAddrToAddrOp -> \[src,dst,n] -> opIntoRegs $ \[] ->
+ doCopyAddrToAddrOp src dst n
+ CopyAddrToAddrNonOverlappingOp -> \[src,dst,n] -> opIntoRegs $ \[] ->
+ doCopyAddrToAddrNonOverlappingOp src dst n
SetByteArrayOp -> \[ba,off,len,c] -> opIntoRegs $ \[] ->
doSetByteArrayOp ba off len c
+ SetAddrRangeOp -> \[dst,len,c] -> opIntoRegs $ \[] ->
+ doSetAddrRangeOp dst len c
-- Comparing byte arrays
CompareByteArraysOp -> \[ba1,ba1_off,ba2,ba2_off,n] -> opIntoRegs $ \[res] ->
@@ -2518,6 +2526,7 @@ doCopyByteArrayOp = emitCopyByteArray copy
where
-- Copy data (we assume the arrays aren't overlapping since
-- they're of different types)
+ -- TODO: Make -fcheck-prim-bounds check that the arrays are distinct
copy _src _dst dst_p src_p bytes align =
emitMemcpyCall dst_p src_p bytes align
@@ -2540,6 +2549,20 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
(getCode $ emitMemcpyCall dst_p src_p bytes align)
emit =<< mkCmmIfThenElse (cmmEqWord platform src dst) moveCall cpyCall
+-- | Takes a source 'MutableByteArray#', an offset in the source
+-- array, a destination 'MutableByteArray#', an offset into the
+-- destination array, and the number of bytes to copy. Copies the
+-- given number of bytes from the source array to the destination
+-- array. Assumes the two ranges are disjoint
+doCopyMutableByteArrayNonOverlappingOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+doCopyMutableByteArrayNonOverlappingOp = emitCopyByteArray copy
+ where
+ copy _src _dst dst_p src_p bytes align = do
+ -- TODO: Make -fcheck-prim-bounds verify no overlap here
+ emitMemcpyCall dst_p src_p bytes align
+
+
emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> Alignment -> FCode ())
-> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
@@ -2596,6 +2619,23 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
dst_p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform dst (arrWordsHdrSize profile)) dst_off
emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
+-- | Takes a source 'Addr#', a destination 'Addr#', and the number of
+-- bytes to copy. Copies the given number of bytes from the source
+-- memory region to the destination array.
+doCopyAddrToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+doCopyAddrToAddrOp src_p dst_p bytes = do
+ -- Use memmove; the ranges may overlap
+ emitMemmoveCall dst_p src_p bytes (mkAlignment 1)
+
+-- | Takes a source 'Addr#', a destination 'Addr#', and the number of
+-- bytes to copy. Copies the given number of bytes from the source
+-- memory region to the destination region. The regions may not overlap.
+doCopyAddrToAddrNonOverlappingOp :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+doCopyAddrToAddrNonOverlappingOp src_p dst_p bytes = do
+ -- Use memcpy; the ranges may not overlap
+ -- TODO: Make -fcheck-prim-bounds verify no overlap here
+ emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
+
ifNonZero :: CmmExpr -> FCode () -> FCode ()
ifNonZero e it = do
platform <- getPlatform
@@ -2608,7 +2648,7 @@ ifNonZero e it = do
-- | Takes a 'MutableByteArray#', an offset into the array, a length,
-- and a byte, and sets each of the selected bytes in the array to the
--- character.
+-- given byte.
doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode ()
doSetByteArrayOp ba off len c = do
@@ -2625,6 +2665,14 @@ doSetByteArrayOp ba off len c = do
p <- assignTempE $ cmmOffsetExpr platform (cmmOffsetB platform ba (arrWordsHdrSize profile)) off
emitMemsetCall p c len align
+-- | Takes an 'Addr#', a length, and a byte, and sets each of the
+-- selected bytes in memory to the given byte.
+doSetAddrRangeOp :: CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+doSetAddrRangeOp dst len c = do
+ emitMemsetCall dst c len (mkAlignment 1)
+
+
-- ----------------------------------------------------------------------------
-- Allocating arrays
diff --git a/compiler/GHC/StgToJS/Prim.hs b/compiler/GHC/StgToJS/Prim.hs
index 5c81744f2a..188d6167fa 100644
--- a/compiler/GHC/StgToJS/Prim.hs
+++ b/compiler/GHC/StgToJS/Prim.hs
@@ -717,15 +717,19 @@ genPrim prof bound ty op = case op of
. boundsChecked bound a2 (Add o2 (Sub n 1))
$ appS "h$copyMutableByteArray" [a1,o1,a2,o2,n]
CopyMutableByteArrayOp -> \[] xs@[_a1,_o1,_a2,_o2,_n] -> genPrim prof bound ty CopyByteArrayOp [] xs
+ CopyMutableByteArrayNonOverlappingOp -> \[] xs@[_a1,_o1,_a2,_o2,_n] -> genPrim prof bound ty CopyByteArrayOp [] xs
CopyByteArrayToAddrOp -> \[] xs@[_a1,_o1,_a2,_o2,_n] -> genPrim prof bound ty CopyByteArrayOp [] xs
CopyMutableByteArrayToAddrOp -> \[] xs@[_a1,_o1,_a2,_o2,_n] -> genPrim prof bound ty CopyByteArrayOp [] xs
CopyAddrToByteArrayOp -> \[] xs@[_ba,_bo,_aa,_ao,_n] -> genPrim prof bound ty CopyByteArrayOp [] xs
+ CopyAddrToAddrOp -> \[] xs@[_ba,_bo,_aa,_ao,_n] -> genPrim prof bound ty CopyByteArrayOp [] xs
+ CopyAddrToAddrNonOverlappingOp -> \[] xs@[_ba,_bo,_aa,_ao,_n] -> genPrim prof bound ty CopyByteArrayOp [] xs
SetByteArrayOp -> \[] [a,o,n,v] ->
PrimInline . boundsChecked bound a (Add o (Sub n 1)) $ loopBlockS zero_ (.<. n) \i ->
[ write_u8 a (Add o i) v
, postIncrS i
]
+ SetAddrRangeOp -> \[] xs@[_a,_o,_n,_v] -> genPrim prof bound ty SetByteArrayOp [] xs
AtomicReadByteArrayOp_Int -> \[r] [a,i] -> PrimInline . boundsChecked bound a (Add i 3) $ r |= read_i32 a i
AtomicWriteByteArrayOp_Int -> \[] [a,i,v] -> PrimInline . boundsChecked bound a (Add i 3) $ write_i32 a i v
diff --git a/libraries/base/Data/Array/Byte.hs b/libraries/base/Data/Array/Byte.hs
index 918e20bae8..99a7b0a36e 100644
--- a/libraries/base/Data/Array/Byte.hs
+++ b/libraries/base/Data/Array/Byte.hs
@@ -134,7 +134,7 @@ unsafeCopyByteArray (MutableByteArray dst#) (I# doff#) (ByteArray src#) (I# soff
-- | Copy a slice from one mutable byte array to another
-- or to the same mutable byte array.
--
--- /Note:/ this function does not do bounds checking.
+-- /Note:/ this function does not do bounds or overlap checking.
unsafeCopyMutableByteArray
:: MutableByteArray s -- ^ destination array
-> Int -- ^ offset into destination array
@@ -144,7 +144,7 @@ unsafeCopyMutableByteArray
-> ST s ()
{-# INLINE unsafeCopyMutableByteArray #-}
unsafeCopyMutableByteArray (MutableByteArray dst#) (I# doff#) (MutableByteArray src#) (I# soff#) (I# sz#) =
- ST (\s# -> case copyMutableByteArray# src# soff# dst# doff# sz# s# of
+ ST (\s# -> case copyMutableByteArrayNonOverlapping# src# soff# dst# doff# sz# s# of
s'# -> (# s'#, () #))
-- | @since 4.17.0.0
diff --git a/libraries/base/Foreign/Marshal/Utils.hs b/libraries/base/Foreign/Marshal/Utils.hs
index d134788e62..7d6cdcc879 100644
--- a/libraries/base/Foreign/Marshal/Utils.hs
+++ b/libraries/base/Foreign/Marshal/Utils.hs
@@ -1,6 +1,9 @@
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+
-----------------------------------------------------------------------------
-- |
-- Module : Foreign.Marshal.Utils
@@ -50,13 +53,11 @@ module Foreign.Marshal.Utils (
) where
import Data.Maybe
-import Foreign.Ptr ( Ptr, nullPtr )
+import GHC.Ptr ( Ptr(..), nullPtr )
import Foreign.Storable ( Storable(poke) )
-import Foreign.C.Types ( CSize(..), CInt(..) )
import Foreign.Marshal.Alloc ( malloc, alloca )
-import Data.Word ( Word8 )
+import GHC.Word ( Word8(..) )
-import GHC.Real ( fromIntegral )
import GHC.Num
import GHC.Base
@@ -158,9 +159,8 @@ copyBytes
-> Ptr a -- ^ Source
-> Int -- ^ Size in bytes
-> IO ()
-copyBytes dest src size = do
- _ <- memcpy dest src (fromIntegral size)
- return ()
+copyBytes = coerce $ \(Ptr dest#) (Ptr src#) (I# size#) s
+ -> (# copyAddrToAddrNonOverlapping# src# dest# size# s, () #)
-- |Copies the given number of bytes from the second area (source) into the
-- first (destination); the copied areas /may/ overlap
@@ -170,9 +170,8 @@ moveBytes
-> Ptr a -- ^ Source
-> Int -- ^ Size in bytes
-> IO ()
-moveBytes dest src size = do
- _ <- memmove dest src (fromIntegral size)
- return ()
+moveBytes = coerce $ \(Ptr dest#) (Ptr src#) (I# size#) s
+ -> (# copyAddrToAddr# src# dest# size# s, () #)
-- Filling up memory area with required values
-- -------------------------------------------
@@ -180,16 +179,6 @@ moveBytes dest src size = do
-- |Fill a given number of bytes in memory area with a byte value.
--
-- @since 4.8.0.0
-fillBytes :: Ptr a -> Word8 -> Int -> IO ()
-fillBytes dest char size = do
- _ <- memset dest (fromIntegral char) (fromIntegral size)
- return ()
-
--- auxiliary routines
--- -------------------
-
--- |Basic C routines needed for memory copying
---
-foreign import ccall unsafe "string.h" memcpy :: Ptr a -> Ptr a -> CSize -> IO (Ptr a)
-foreign import ccall unsafe "string.h" memmove :: Ptr a -> Ptr a -> CSize -> IO (Ptr a)
-foreign import ccall unsafe "string.h" memset :: Ptr a -> CInt -> CSize -> IO (Ptr a)
+fillBytes :: Ptr a -> Word8 -> Int -> IO ()
+fillBytes = coerce $ \(Ptr dest#) (W8# byte#) (I# size#) s
+ -> (# setAddrRange# dest# size# (word2Int# (word8ToWord# byte#)) s, () #)
diff --git a/libraries/ghc-prim/changelog.md b/libraries/ghc-prim/changelog.md
index c78de3c5e2..221325c029 100644
--- a/libraries/ghc-prim/changelog.md
+++ b/libraries/ghc-prim/changelog.md
@@ -14,6 +14,13 @@
- `sameMutVar#`, `sameTVar#`, `sameMVar#`
- `sameIOPort#`, `eqStableName#`.
+- Several new primops were added:
+
+ - `copyMutableByteArrayNonOverlapping#`
+ - `copyAddrToAddr#`
+ - `copyAddrToAddrNonOverlapping#`
+ - `setAddrRange#`
+
## 0.10.0
- Shipped with GHC 9.6.1