diff options
author | Johan Tibell <johan.tibell@gmail.com> | 2014-03-23 12:06:56 +0100 |
---|---|---|
committer | Johan Tibell <johan.tibell@gmail.com> | 2014-03-29 11:24:07 +0100 |
commit | 90329b6cc183b3cd05956ae6bdeb6ac6951549c2 (patch) | |
tree | ba7d31656fe75fad2555c8a66b7ebd13dd9ebeb1 /compiler/codeGen | |
parent | 4c8edfd2c722504baaa6896d194fd3a8c3f9b652 (diff) | |
download | haskell-90329b6cc183b3cd05956ae6bdeb6ac6951549c2.tar.gz |
Add SmallArray# and SmallMutableArray# types
These array types are smaller than Array# and MutableArray# and are
faster when the array size is small, as they don't have the overhead
of a card table. Having no card table reduces the closure size with 2
words in the typical small array case and leads to less work when
updating or GC:ing the array.
Reduces both the runtime and memory allocation by 8.8% on my insert
benchmark for the HashMap type in the unordered-containers package,
which makes use of lots of small arrays. With tuned GC settings
(i.e. `+RTS -A6M`) the runtime reduction is 15%.
Fixes #8923.
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 8 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmCon.hs | 4 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 5 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 2 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 168 |
5 files changed, 149 insertions, 38 deletions
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index c29f47c7f4..06e17164dd 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -287,7 +287,7 @@ mkRhsClosure dflags bndr _cc _bi maybe_offset = assocMaybe params_w_offsets (NonVoid selectee) Just the_offset = maybe_offset offset_into_int = bytesToWordsRoundUp dflags the_offset - - fixedHdrSize dflags + - fixedHdrSizeW dflags ---------- Note [Ap thunks] ------------------ mkRhsClosure dflags bndr _cc _bi @@ -621,7 +621,7 @@ emitBlackHoleCode node = do -- work with profiling. when eager_blackholing $ do - emitStore (cmmOffsetW dflags node (fixedHdrSize dflags)) + emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) (CmmReg (CmmGlobal CurrentTSO)) emitPrimCall [] MO_WriteBarrier [] emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo)) @@ -673,7 +673,7 @@ pushUpdateFrame lbl updatee body updfr <- getUpdFrameOff dflags <- getDynFlags let - hdr = fixedHdrSize dflags * wORD_SIZE dflags + hdr = fixedHdrSize dflags frame = updfr + hdr + sIZEOF_StgUpdateFrame_NoHdr dflags -- emitUpdateFrame dflags (CmmStackSlot Old frame) lbl updatee @@ -682,7 +682,7 @@ pushUpdateFrame lbl updatee body emitUpdateFrame :: DynFlags -> CmmExpr -> CLabel -> CmmExpr -> FCode () emitUpdateFrame dflags frame lbl updatee = do let - hdr = fixedHdrSize dflags * wORD_SIZE dflags + hdr = fixedHdrSize dflags off_updatee = hdr + oFFSET_StgUpdateFrame_updatee dflags -- emitStore frame (mkLblExpr lbl) diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index b6bcf6912b..a02a5da616 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -190,7 +190,7 @@ buildDynCon' dflags platform binder _ _cc con [arg] , val >= fromIntegral (mIN_INTLIKE dflags) -- ...ditto... = do { let intlike_lbl = mkCmmClosureLabel rtsPackageId (fsLit "stg_INTLIKE") val_int = fromIntegral val :: Int - offsetW = (val_int - mIN_INTLIKE dflags) * (fixedHdrSize dflags + 1) + offsetW = (val_int - mIN_INTLIKE dflags) * (fixedHdrSizeW dflags + 1) -- INTLIKE closures consist of a header and one word payload intlike_amode = cmmLabelOffW dflags intlike_lbl offsetW ; return ( litIdInfo dflags binder (mkConLFInfo con) intlike_amode @@ -204,7 +204,7 @@ buildDynCon' dflags platform binder _ _cc con [arg] , val_int <= mAX_CHARLIKE dflags , val_int >= mIN_CHARLIKE dflags = do { let charlike_lbl = mkCmmClosureLabel rtsPackageId (fsLit "stg_CHARLIKE") - offsetW = (val_int - mIN_CHARLIKE dflags) * (fixedHdrSize dflags + 1) + offsetW = (val_int - mIN_CHARLIKE dflags) * (fixedHdrSizeW dflags + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = cmmLabelOffW dflags charlike_lbl offsetW ; return ( litIdInfo dflags binder (mkConLFInfo con) charlike_amode diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index a688074b9e..bf88f1ccb3 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -358,7 +358,7 @@ stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags) closureField :: DynFlags -> ByteOff -> ByteOff -closureField dflags off = off + fixedHdrSize dflags * wORD_SIZE dflags +closureField dflags off = off + fixedHdrSize dflags stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr stgSp = CmmReg sp @@ -405,6 +405,9 @@ add_shim dflags arg_ty expr | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon = cmmOffsetB dflags expr (arrPtrsHdrSize dflags) + | tycon == smallArrayPrimTyCon || tycon == smallMutableArrayPrimTyCon + = cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags) + | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon = cmmOffsetB dflags expr (arrWordsHdrSize dflags) diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 59afc897dc..a56248dcb9 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -404,7 +404,7 @@ mkVirtHeapOffsets dflags is_thunk things ) where hdr_words | is_thunk = thunkHdrSize dflags - | otherwise = fixedHdrSize dflags + | otherwise = fixedHdrSizeW dflags hdr_bytes = wordsToBytes dflags hdr_words non_void_things = filterOut (isVoidRep . fst) things diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index da30700bef..2c4ad4e3ce 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -1,4 +1,4 @@ ------------------------------------------------------------------------------ +---------------------------------------------------------------------------- -- -- Stg to C--: primitive operations -- @@ -139,7 +139,14 @@ shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n _))] shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n _)), init] | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = - Just $ \ [res] -> doNewArrayOp res (fromInteger n) init + Just $ \ [res] -> + doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel + [ (mkIntExpr dflags (fromInteger n), + fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) + , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))), + fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags) + ] + (fromInteger n) init shouldInlinePrimOp _ CopyArrayOp [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] = @@ -173,6 +180,31 @@ shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n _))] | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n) +shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n _)), init] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> + doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel + [ (mkIntExpr dflags (fromInteger n), + fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags) + ] + (fromInteger n) init + +shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n _))] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n) + +shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n _))] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n) + +shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n _))] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n) + +shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n _))] + | wordsToBytes dflags (fromInteger n) <= maxInlineAllocSize dflags = + Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n) + shouldInlinePrimOp dflags primop args | primOpOutOfLine primop = Nothing | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args @@ -298,10 +330,10 @@ emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg] = emitAssign (CmmLocal res) curCCS emitPrimOp dflags [res] ReadMutVarOp [mutv] - = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSize dflags) (gcWord dflags)) + = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags)) emitPrimOp dflags [] WriteMutVarOp [mutv,var] - = do emitStore (cmmOffsetW dflags mutv (fixedHdrSize dflags)) var + = do emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var emitCCall [{-no results-}] (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) @@ -310,7 +342,7 @@ emitPrimOp dflags [] WriteMutVarOp [mutv,var] -- #define sizzeofByteArrayzh(r,a) \ -- r = ((StgArrWords *)(a))->bytes emitPrimOp dflags [res] SizeofByteArrayOp [arg] - = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags)) + = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags)) -- #define sizzeofMutableByteArrayzh(r,a) \ -- r = ((StgArrWords *)(a))->bytes @@ -328,14 +360,14 @@ emitPrimOp dflags [res] ByteArrayContents_Char [arg] -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) emitPrimOp dflags [res] StableNameToIntOp [arg] - = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags) (bWord dflags)) + = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags)) -- #define eqStableNamezh(r,sn1,sn2) \ -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) emitPrimOp dflags [res] EqStableNameOp [arg1,arg2] = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [ - cmmLoadIndexW dflags arg1 (fixedHdrSize dflags) (bWord dflags), - cmmLoadIndexW dflags arg2 (fixedHdrSize dflags) (bWord dflags) + cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags), + cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags) ]) @@ -369,6 +401,10 @@ emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg] = emit $ catAGraphs [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)), mkAssign (CmmLocal res) arg ] +emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg] + = emit $ catAGraphs + [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)), + mkAssign (CmmLocal res) arg ] -- #define unsafeFreezzeByteArrayzh(r,a) r=(a) emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg] @@ -391,8 +427,14 @@ emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePt emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v +emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix +emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix +emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v + +-- Getting the size of pointer arrays + emitPrimOp dflags [res] SizeofArrayOp [arg] - = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags)) + = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags + oFFSET_StgMutArrPtrs_ptrs dflags) (bWord dflags)) emitPrimOp dflags [res] SizeofMutableArrayOp [arg] = emitPrimOp dflags [res] SizeofArrayOp [arg] emitPrimOp dflags [res] SizeofArrayArrayOp [arg] @@ -400,6 +442,13 @@ emitPrimOp dflags [res] SizeofArrayArrayOp [arg] emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg] = emitPrimOp dflags [res] SizeofArrayOp [arg] +emitPrimOp dflags [res] SizeofSmallArrayOp [arg] = + emit $ mkAssign (CmmLocal res) + (cmmLoadIndexW dflags arg + (fixedHdrSizeW dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags) (bWord dflags)) +emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] = + emitPrimOp dflags [res] SizeofSmallArrayOp [arg] + -- IndexXXXoffAddr emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args @@ -1060,6 +1109,7 @@ translateOp dflags SameMVarOp = Just (mo_wordEq dflags) translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags) translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags) translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags) +translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags) translateOp dflags SameTVarOp = Just (mo_wordEq dflags) translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags) @@ -1196,7 +1246,7 @@ doWritePtrArrayOp addr idx val loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags) - where off = fixedHdrSize dflags * wORD_SIZE dflags + oFFSET_StgMutArrPtrs_ptrs dflags + where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags mkBasicIndexedRead :: ByteOff -- Initial offset in bytes -> Maybe MachOp -- Optional result cast @@ -1471,7 +1521,7 @@ doNewByteArrayOp res_r n = do (mkIntExpr dflags (nonHdrSize dflags rep)) (zeroExpr dflags) - let hdr_size = wordsToBytes dflags (fixedHdrSize dflags) + let hdr_size = fixedHdrSize dflags base <- allocHeapClosure rep info_ptr curCCS [ (mkIntExpr dflags n, @@ -1571,34 +1621,30 @@ doSetByteArrayOp ba off len c -- ---------------------------------------------------------------------------- -- Allocating arrays --- | Takes a register to return the newly allocated array in, the size --- of the new array, and an initial value for the elements. Allocates --- a new 'MutableArray#'. -doNewArrayOp :: CmmFormal -> WordOff -> CmmExpr -> FCode () -doNewArrayOp res_r n init = do +-- | Allocate a new array. +doNewArrayOp :: CmmFormal -- ^ return register + -> SMRep -- ^ representation of the array + -> CLabel -- ^ info pointer + -> [(CmmExpr, ByteOff)] -- ^ header payload + -> WordOff -- ^ array size + -> CmmExpr -- ^ initial element + -> FCode () +doNewArrayOp res_r rep info payload n init = do dflags <- getDynFlags - let info_ptr = mkLblExpr mkMAP_DIRTY_infoLabel - rep = arrPtrsRep dflags n + let info_ptr = mkLblExpr info - tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) + tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep)) (mkIntExpr dflags (nonHdrSize dflags rep)) (zeroExpr dflags) - let hdr_size = wordsToBytes dflags (fixedHdrSize dflags) - - base <- allocHeapClosure rep info_ptr curCCS - [ (mkIntExpr dflags n, - hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags) - , (mkIntExpr dflags (nonHdrSizeW rep), - hdr_size + oFFSET_StgMutArrPtrs_size dflags) - ] + base <- allocHeapClosure rep info_ptr curCCS payload arr <- CmmLocal `fmap` newTemp (bWord dflags) emit $ mkAssign arr base -- Initialise all elements of the the array - p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (arrPtrsHdrSize dflags) + p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep) for <- newLabelC emitLabel for let loopBody = @@ -1608,7 +1654,7 @@ doNewArrayOp res_r n init = do emit =<< mkCmmIfThen (cmmULtWord dflags (CmmReg (CmmLocal p)) (cmmOffsetW dflags (CmmReg arr) - (arrPtrsHdrSizeW dflags + n))) + (hdrSizeW dflags rep + n))) (catAGraphs loopBody) emit $ mkAssign (CmmLocal res_r) (CmmReg arr) @@ -1717,7 +1763,7 @@ emitCloneArray info_p res_r src src_off n = do (mkIntExpr dflags (nonHdrSize dflags rep)) (zeroExpr dflags) - let hdr_size = wordsToBytes dflags (fixedHdrSize dflags) + let hdr_size = fixedHdrSize dflags base <- allocHeapClosure rep info_ptr curCCS [ (mkIntExpr dflags n, @@ -1740,6 +1786,43 @@ emitCloneArray info_p res_r src src_off n = do emit $ mkAssign (CmmLocal res_r) (CmmReg arr) +-- | Takes an info table label, a register to return the newly +-- allocated array in, a source array, an offset in the source array, +-- and the number of elements to copy. Allocates a new array and +-- initializes it from the source array. +emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff + -> FCode () +emitCloneSmallArray info_p res_r src src_off n = do + dflags <- getDynFlags + + let info_ptr = mkLblExpr info_p + rep = smallArrPtrsRep n + + tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags)) + (mkIntExpr dflags (nonHdrSize dflags rep)) + (zeroExpr dflags) + + let hdr_size = fixedHdrSize dflags + + base <- allocHeapClosure rep info_ptr curCCS + [ (mkIntExpr dflags n, + hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags) + ] + + arr <- CmmLocal `fmap` newTemp (bWord dflags) + emit $ mkAssign arr base + + dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr) + (smallArrPtrsHdrSize dflags) + src_p <- assignTempE $ cmmOffsetExprW dflags src + (cmmAddWord dflags + (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off) + + emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n)) + (mkIntExpr dflags (wORD_SIZE dflags)) + + emit $ mkAssign (CmmLocal res_r) (CmmReg arr) + -- | Takes and offset in the destination array, the base address of -- the card table, and the number of elements affected (*not* the -- number of cards). The number of elements may not be zero. @@ -1762,6 +1845,31 @@ cardCmm :: DynFlags -> CmmExpr -> CmmExpr cardCmm dflags i = cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)) +------------------------------------------------------------------------------ +-- SmallArray PrimOp implementations + +doReadSmallPtrArrayOp :: LocalReg + -> CmmExpr + -> CmmExpr + -> FCode () +doReadSmallPtrArrayOp res addr idx = do + dflags <- getDynFlags + mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr + (gcWord dflags) idx + +doWriteSmallPtrArrayOp :: CmmExpr + -> CmmExpr + -> CmmExpr + -> FCode () +doWriteSmallPtrArrayOp addr idx val = do + dflags <- getDynFlags + let ty = cmmExprType dflags val + mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val + emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel))) + +------------------------------------------------------------------------------ +-- Helpers for emitting function calls + -- | Emit a call to @memcpy@. emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () emitMemcpyCall dst src n align = do |