summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorArtem Pyanykh <artem.pyanykh@gmail.com>2019-04-11 14:20:03 +0300
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-04-14 01:26:35 -0400
commitedcef7b384ca5af6e67d58c39779d03f80768538 (patch)
tree0a873348e5fc80f17cabdc0f5b6dddf15b70d07d /compiler
parent6febc444c0abea6c033174aa0e813c950b9b2877 (diff)
downloadhaskell-edcef7b384ca5af6e67d58c39779d03f80768538.tar.gz
codegen: unroll memcpy calls for small bytearrays
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CmmExpr.hs11
-rw-r--r--compiler/codeGen/StgCmmPrim.hs50
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs11
3 files changed, 42 insertions, 30 deletions
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index dd4e777436..901df5d908 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -5,7 +5,7 @@
{-# LANGUAGE UndecidableInstances #-}
module CmmExpr
- ( CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr
+ ( CmmExpr(..), cmmExprType, cmmExprWidth, cmmExprAlignment, maybeInvertCmmExpr
, CmmReg(..), cmmRegType, cmmRegWidth
, CmmLit(..), cmmLitType
, LocalReg(..), localRegType
@@ -43,6 +43,8 @@ import Unique
import Data.Set (Set)
import qualified Data.Set as Set
+import BasicTypes (Alignment, mkAlignment, alignmentOf)
+
-----------------------------------------------------------------------------
-- CmmExpr
-- An expression. Expressions have no side effects.
@@ -239,6 +241,13 @@ cmmLabelType dflags lbl
cmmExprWidth :: DynFlags -> CmmExpr -> Width
cmmExprWidth dflags e = typeWidth (cmmExprType dflags e)
+-- | Returns an alignment in bytes of a CmmExpr when it's a statically
+-- known integer constant, otherwise returns an alignment of 1 byte.
+-- The caller is responsible for using with a sensible CmmExpr
+-- argument.
+cmmExprAlignment :: CmmExpr -> Alignment
+cmmExprAlignment (CmmLit (CmmInt intOff _)) = alignmentOf (fromInteger intOff)
+cmmExprAlignment _ = mkAlignment 1
--------
--- Negation for conditional branches
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 0adb2272ee..e5aacd1f1b 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -2035,8 +2035,8 @@ doCopyByteArrayOp = emitCopyByteArray copy
where
-- Copy data (we assume the arrays aren't overlapping since
-- they're of different types)
- copy _src _dst dst_p src_p bytes =
- emitMemcpyCall dst_p src_p bytes 1
+ copy _src _dst dst_p src_p bytes align =
+ emitMemcpyCall dst_p src_p bytes align
-- | Takes a source 'MutableByteArray#', an offset in the source
-- array, a destination 'MutableByteArray#', an offset into the
@@ -2050,22 +2050,26 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
-- The only time the memory might overlap is when the two arrays
-- we were provided are the same array!
-- TODO: Optimize branch for common case of no aliasing.
- copy src dst dst_p src_p bytes = do
+ copy src dst dst_p src_p bytes align = do
dflags <- getDynFlags
(moveCall, cpyCall) <- forkAltPair
- (getCode $ emitMemmoveCall dst_p src_p bytes 1)
- (getCode $ emitMemcpyCall dst_p src_p bytes 1)
+ (getCode $ emitMemmoveCall dst_p src_p bytes align)
+ (getCode $ emitMemcpyCall dst_p src_p bytes align)
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
- -> FCode ())
+ -> Alignment -> FCode ())
-> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode ()
emitCopyByteArray copy src src_off dst dst_off n = do
dflags <- getDynFlags
+ let byteArrayAlignment = wordAlignment dflags
+ srcOffAlignment = cmmExprAlignment src_off
+ dstOffAlignment = cmmExprAlignment dst_off
+ align = minimum [byteArrayAlignment, srcOffAlignment, dstOffAlignment]
dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
- copy src dst dst_p src_p n
+ copy src dst dst_p src_p n align
-- | Takes a source 'ByteArray#', an offset in the source array, a
-- destination 'Addr#', and the number of bytes to copy. Copies the given
@@ -2075,7 +2079,7 @@ doCopyByteArrayToAddrOp src src_off dst_p bytes = do
-- Use memcpy (we are allowed to assume the arrays aren't overlapping)
dflags <- getDynFlags
src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
- emitMemcpyCall dst_p src_p bytes 1
+ emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
-- | Takes a source 'MutableByteArray#', an offset in the source array, a
-- destination 'Addr#', and the number of bytes to copy. Copies the given
@@ -2092,7 +2096,7 @@ doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
-- Use memcpy (we are allowed to assume the arrays aren't overlapping)
dflags <- getDynFlags
dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
- emitMemcpyCall dst_p src_p bytes 1
+ emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
-- ----------------------------------------------------------------------------
@@ -2107,9 +2111,7 @@ doSetByteArrayOp ba off len c = do
dflags <- getDynFlags
let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
- offsetAlignment = case off of
- CmmLit (CmmInt intOff _) -> alignmentOf (fromInteger intOff)
- _ -> mkAlignment 1
+ offsetAlignment = cmmExprAlignment off
align = min byteArrayAlignment offsetAlignment
p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
@@ -2180,7 +2182,7 @@ doCopyArrayOp = emitCopyArray copy
copy _src _dst dst_p src_p bytes =
do dflags <- getDynFlags
emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags)
+ (wordAlignment dflags)
-- | Takes a source 'MutableArray#', an offset in the source array, a
@@ -2198,9 +2200,9 @@ doCopyMutableArrayOp = emitCopyArray copy
dflags <- getDynFlags
(moveCall, cpyCall) <- forkAltPair
(getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags))
+ (wordAlignment dflags))
(getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags))
+ (wordAlignment dflags))
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
@@ -2247,7 +2249,7 @@ doCopySmallArrayOp = emitCopySmallArray copy
copy _src _dst dst_p src_p bytes =
do dflags <- getDynFlags
emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags)
+ (wordAlignment dflags)
doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
@@ -2261,9 +2263,9 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy
dflags <- getDynFlags
(moveCall, cpyCall) <- forkAltPair
(getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags))
+ (wordAlignment dflags))
(getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags))
+ (wordAlignment dflags))
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
@@ -2328,7 +2330,7 @@ emitCloneArray info_p res_r src src_off n = do
(mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
- (wORD_SIZE dflags)
+ (wordAlignment dflags)
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
@@ -2365,7 +2367,7 @@ emitCloneSmallArray info_p res_r src src_off n = do
(mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
- (wORD_SIZE dflags)
+ (wordAlignment dflags)
emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
@@ -2493,19 +2495,19 @@ doCasByteArray res mba idx idx_ty old new = do
-- Helpers for emitting function calls
-- | Emit a call to @memcpy@.
-emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
+emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
emitMemcpyCall dst src n align = do
emitPrimCall
[ {-no results-} ]
- (MO_Memcpy align)
+ (MO_Memcpy (alignmentBytes align))
[ dst, src, n ]
-- | Emit a call to @memmove@.
-emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
+emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
emitMemmoveCall dst src n align = do
emitPrimCall
[ {- no results -} ]
- (MO_Memmove align)
+ (MO_Memmove (alignmentBytes align))
[ dst, src, n ]
-- | Emit a call to @memset@. The second argument must fit inside an
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 70df468857..b46ef6a935 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1767,12 +1767,11 @@ genCCall
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
--- Unroll memcpy calls if the source and destination pointers are at
--- least DWORD aligned and the number of bytes to copy isn't too
+-- Unroll memcpy calls if the number of bytes to copy isn't too
-- large. Otherwise, call C's memcpy.
-genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
+genCCall dflags _ (PrimTarget (MO_Memcpy align)) _
[dst, src, CmmLit (CmmInt n _)] _
- | fromInteger insns <= maxInlineMemcpyInsns dflags && align .&. 3 == 0 = do
+ | fromInteger insns <= maxInlineMemcpyInsns dflags = do
code_dst <- getAnyReg dst
dst_r <- getNewRegNat format
code_src <- getAnyReg src
@@ -1785,7 +1784,9 @@ genCCall dflags is32Bit (PrimTarget (MO_Memcpy align)) _
-- instructions per move.
insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
- format = if align .&. 4 /= 0 then II32 else (archWordFormat is32Bit)
+ maxAlignment = wordAlignment dflags -- only machine word wide MOVs are supported
+ effectiveAlignment = min (alignmentOf align) maxAlignment
+ format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment
-- The size of each move, in bytes.
sizeBytes :: Integer