diff options
Diffstat (limited to 'compiler/codeGen/StgCmmPrim.hs')
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 336 |
1 files changed, 281 insertions, 55 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 1ecd72f9db..266ab3a0f6 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -1,4 +1,6 @@ {-# LANGUAGE CPP #-} +-- emitPrimOp is quite large +{-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-} ---------------------------------------------------------------------------- -- @@ -17,6 +19,8 @@ module StgCmmPrim ( #include "HsVersions.h" +import GhcPrelude hiding ((<*>)) + import StgCmmLayout import StgCmmForeign import StgCmmEnv @@ -24,7 +28,7 @@ import StgCmmMonad import StgCmmUtils import StgCmmTicky import StgCmmHeap -import StgCmmProf ( costCentreFrom, curCCS ) +import StgCmmProf ( costCentreFrom ) import DynFlags import Platform @@ -44,10 +48,8 @@ import FastString import Outputable import Util -import Prelude hiding ((<*>)) - import Data.Bits ((.&.), bit) -import Control.Monad (liftM, when) +import Control.Monad (liftM, when, unless) ------------------------------------------------------------------------ -- Primitive operations and foreign calls @@ -192,7 +194,7 @@ shouldInlinePrimOp _ CopyMutableArrayArrayOp shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n) + Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = @@ -200,7 +202,7 @@ shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n) + Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = @@ -225,7 +227,7 @@ shouldInlinePrimOp _ CopySmallMutableArrayOp shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n) + Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = @@ -233,7 +235,7 @@ shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmIn shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n) + Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = @@ -281,7 +283,7 @@ emitPrimOp _ [res] ParOp [arg] emitCCall [(res,NoHint)] (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction))) - [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)] + [(baseExpr, AddrHint), (arg,AddrHint)] emitPrimOp dflags [res] SparkOp [arg] = do @@ -293,7 +295,7 @@ emitPrimOp dflags [res] SparkOp [arg] emitCCall [(tmp2,NoHint)] (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction))) - [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] + [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp)) emitPrimOp dflags [res] GetCCSOfOp [arg] @@ -304,7 +306,10 @@ emitPrimOp dflags [res] GetCCSOfOp [arg] | otherwise = CmmLit (zeroCLit dflags) emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg] - = emitAssign (CmmLocal res) curCCS + = emitAssign (CmmLocal res) cccsExpr + +emitPrimOp _ [res] MyThreadIdOp [] + = emitAssign (CmmLocal res) currentTSOExpr emitPrimOp dflags [res] ReadMutVarOp [mutv] = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags)) @@ -317,7 +322,7 @@ emitPrimOp dflags res@[] WriteMutVarOp [mutv,var] emitCCall [{-no results-}] (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) - [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)] + [(baseExpr, AddrHint), (mutv,AddrHint)] -- #define sizzeofByteArrayzh(r,a) \ -- r = ((StgArrBytes *)(a))->bytes @@ -347,14 +352,6 @@ emitPrimOp dflags [res] ByteArrayContents_Char [arg] emitPrimOp dflags [res] StableNameToIntOp [arg] = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags)) --- #define eqStableNamezh(r,sn1,sn2) \ --- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) -emitPrimOp dflags [res] EqStableNameOp [arg1,arg2] - = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [ - cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags), - cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags) - ]) - emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2] = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2]) @@ -378,20 +375,20 @@ emitPrimOp dflags [res] DataToTagOp [arg] -- #define unsafeFreezzeArrayzh(r,a) -- { --- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info); +-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info); -- r = a; -- } emitPrimOp _ [res] UnsafeFreezeArrayOp [arg] = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)), + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)), mkAssign (CmmLocal res) arg ] emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg] = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)), + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)), mkAssign (CmmLocal res) arg ] emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg] = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)), + [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)), mkAssign (CmmLocal res) arg ] -- #define unsafeFreezzeByteArrayzh(r,a) r=(a) @@ -516,6 +513,40 @@ emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args +-- IndexWord8ArrayAsXXX + +emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args +emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args +emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args +emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args + +-- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX + +emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args +emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args +emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args +emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args + -- WriteXXXoffAddr emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args @@ -554,6 +585,23 @@ emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayO emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args +-- WriteInt8ArrayAsXXX + +emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args + -- Copying and setting byte arrays emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] = doCopyByteArrayOp src src_off dst dst_off n @@ -568,6 +616,10 @@ emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] = emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] = doSetByteArrayOp ba off len c +-- Comparing byte arrays +emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] = + doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n + emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64 @@ -580,6 +632,20 @@ emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags) +-- Parallel bit deposit +emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8 +emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16 +emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32 +emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64 +emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags) + +-- Parallel bit extract +emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8 +emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16 +emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32 +emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64 +emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags) + -- count leading zeros emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16 @@ -833,6 +899,11 @@ callishPrimOpSupported dflags op || llvm -> Left (MO_Add2 (wordWidth dflags)) | otherwise -> Right genericWordAdd2Op + WordAddCOp | (ncg && (x86ish + || ppc)) + || llvm -> Left (MO_AddWordC (wordWidth dflags)) + | otherwise -> Right genericWordAddCOp + WordSubCOp | (ncg && (x86ish || ppc)) || llvm -> Left (MO_SubWordC (wordWidth dflags)) @@ -969,17 +1040,64 @@ genericWordAdd2Op [res_h, res_l] [arg_x, arg_y] (bottomHalf (CmmReg (CmmLocal r1))))] genericWordAdd2Op _ _ = panic "genericWordAdd2Op" +-- | Implements branchless recovery of the carry flag @c@ by checking the +-- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@: +-- +-- @ +-- c = a&b | (a|b)&~r +-- @ +-- +-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/ +genericWordAddCOp :: GenericOp +genericWordAddCOp [res_r, res_c] [aa, bb] + = do dflags <- getDynFlags + emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp (mo_wordUShr dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordAnd dflags) [aa,bb], + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordOr dflags) [aa,bb], + CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)] + ] + ], + mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) + ] + ] +genericWordAddCOp _ _ = panic "genericWordAddCOp" + +-- | Implements branchless recovery of the carry flag @c@ by checking the +-- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@: +-- +-- @ +-- c = ~a&b | (~a|b)&r +-- @ +-- +-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/ genericWordSubCOp :: GenericOp -genericWordSubCOp [res_r, res_c] [aa, bb] = do - dflags <- getDynFlags - emit $ catAGraphs - [ -- Put the result into 'res_r'. - mkAssign (CmmLocal res_r) $ - CmmMachOp (mo_wordSub dflags) [aa, bb] - -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise. - , mkAssign (CmmLocal res_c) $ - CmmMachOp (mo_wordUGt dflags) [bb, aa] - ] +genericWordSubCOp [res_r, res_c] [aa, bb] + = do dflags <- getDynFlags + emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp (mo_wordUShr dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordNot dflags) [aa], + bb + ], + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordNot dflags) [aa], + bb + ], + CmmReg (CmmLocal res_r) + ] + ], + mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) + ] + ] genericWordSubCOp _ _ = panic "genericWordSubCOp" genericIntAddCOp :: GenericOp @@ -1279,9 +1397,22 @@ translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags) translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags) translateOp dflags SameTVarOp = Just (mo_wordEq dflags) translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags) +-- See Note [Comparing stable names] +translateOp dflags EqStableNameOp = Just (mo_wordEq dflags) translateOp _ _ = Nothing +-- Note [Comparing stable names] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- A StableName# is actually a pointer to a stable name object (SNO) +-- containing an index into the stable name table (SNT). We +-- used to compare StableName#s by following the pointers to the +-- SNOs and checking whether they held the same SNT indices. However, +-- this is not necessary: there is a one-to-one correspondence +-- between SNOs and entries in the SNT, so simple pointer equality +-- does the trick. + -- These primops are implemented by CallishMachOps, because they sometimes -- turn into foreign calls depending on the backend. @@ -1296,6 +1427,9 @@ callishOp DoubleTanhOp = Just MO_F64_Tanh callishOp DoubleAsinOp = Just MO_F64_Asin callishOp DoubleAcosOp = Just MO_F64_Acos callishOp DoubleAtanOp = Just MO_F64_Atan +callishOp DoubleAsinhOp = Just MO_F64_Asinh +callishOp DoubleAcoshOp = Just MO_F64_Acosh +callishOp DoubleAtanhOp = Just MO_F64_Atanh callishOp DoubleLogOp = Just MO_F64_Log callishOp DoubleExpOp = Just MO_F64_Exp callishOp DoubleSqrtOp = Just MO_F64_Sqrt @@ -1310,6 +1444,9 @@ callishOp FloatTanhOp = Just MO_F32_Tanh callishOp FloatAsinOp = Just MO_F32_Asin callishOp FloatAcosOp = Just MO_F32_Acos callishOp FloatAtanOp = Just MO_F32_Atan +callishOp FloatAsinhOp = Just MO_F32_Asinh +callishOp FloatAcoshOp = Just MO_F32_Acosh +callishOp FloatAtanhOp = Just MO_F32_Atanh callishOp FloatLogOp = Just MO_F32_Log callishOp FloatExpOp = Just MO_F32_Exp callishOp FloatSqrtOp = Just MO_F32_Sqrt @@ -1712,7 +1849,7 @@ doNewByteArrayOp res_r n = do let hdr_size = fixedHdrSize dflags - base <- allocHeapClosure rep info_ptr curCCS + base <- allocHeapClosure rep info_ptr cccsExpr [ (mkIntExpr dflags n, hdr_size + oFFSET_StgArrBytes_bytes dflags) ] @@ -1720,6 +1857,60 @@ doNewByteArrayOp res_r n = do emit $ mkAssign (CmmLocal res_r) base -- ---------------------------------------------------------------------------- +-- Comparing byte arrays + +doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr + -> FCode () +doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do + dflags <- getDynFlags + ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off + ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off + + -- short-cut in case of equal pointers avoiding a costly + -- subroutine call to the memcmp(3) routine; the Cmm logic below + -- results in assembly code being generated for + -- + -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int# + -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10# + -- + -- that looks like + -- + -- leaq 16(%r14),%rax + -- leaq 16(%rsi),%rbx + -- xorl %ecx,%ecx + -- cmpq %rbx,%rax + -- je l_ptr_eq + -- + -- ; NB: the common case (unequal pointers) falls-through + -- ; the conditional jump, and therefore matches the + -- ; usual static branch prediction convention of modern cpus + -- + -- subq $8,%rsp + -- movq %rbx,%rsi + -- movq %rax,%rdi + -- movl $10,%edx + -- xorl %eax,%eax + -- call memcmp + -- addq $8,%rsp + -- movslq %eax,%rax + -- movq %rax,%rcx + -- l_ptr_eq: + -- movq %rcx,%rbx + -- jmp *(%rbp) + + l_ptr_eq <- newBlockId + l_ptr_ne <- newBlockId + + emit (mkAssign (CmmLocal res) (zeroExpr dflags)) + emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p) + l_ptr_eq l_ptr_ne (Just False)) + + emitLabel l_ptr_ne + emitMemcmpCall res ba1_p ba2_p n 1 + + emitLabel l_ptr_eq + +-- ---------------------------------------------------------------------------- -- Copying byte arrays -- | Takes a source 'ByteArray#', an offset in the source array, a @@ -1749,10 +1940,9 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy -- TODO: Optimize branch for common case of no aliasing. copy src dst dst_p src_p bytes = do dflags <- getDynFlags - [moveCall, cpyCall] <- forkAlts [ - getCode $ emitMemmoveCall dst_p src_p bytes 1, - getCode $ emitMemcpyCall dst_p src_p bytes 1 - ] + (moveCall, cpyCall) <- forkAltPair + (getCode $ emitMemmoveCall dst_p src_p bytes 1) + (getCode $ emitMemcpyCall dst_p src_p bytes 1) emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr @@ -1826,12 +2016,12 @@ doNewArrayOp res_r rep info payload n init = do (mkIntExpr dflags (nonHdrSize dflags rep)) (zeroExpr dflags) - base <- allocHeapClosure rep info_ptr curCCS payload + base <- allocHeapClosure rep info_ptr cccsExpr payload arr <- CmmLocal `fmap` newTemp (bWord dflags) emit $ mkAssign arr base - -- Initialise all elements of the the array + -- Initialise all elements of the array p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep) for <- newBlockId emitLabel for @@ -1893,12 +2083,11 @@ doCopyMutableArrayOp = emitCopyArray copy -- TODO: Optimize branch for common case of no aliasing. copy src dst dst_p src_p bytes = do dflags <- getDynFlags - [moveCall, cpyCall] <- forkAlts [ - getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) - (wORD_SIZE dflags), - getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) - (wORD_SIZE dflags) - ] + (moveCall, cpyCall) <- forkAltPair + (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) + (wORD_SIZE dflags)) + (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) + (wORD_SIZE dflags)) emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff @@ -1956,12 +2145,11 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy -- TODO: Optimize branch for common case of no aliasing. copy src dst dst_p src_p bytes = do dflags <- getDynFlags - [moveCall, cpyCall] <- forkAlts - [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) - (wORD_SIZE dflags) - , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) - (wORD_SIZE dflags) - ] + (moveCall, cpyCall) <- forkAltPair + (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) + (wORD_SIZE dflags)) + (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) + (wORD_SIZE dflags)) emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff @@ -2008,7 +2196,7 @@ emitCloneArray info_p res_r src src_off n = do let hdr_size = fixedHdrSize dflags - base <- allocHeapClosure rep info_ptr curCCS + base <- allocHeapClosure rep info_ptr cccsExpr [ (mkIntExpr dflags n, hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags) , (mkIntExpr dflags (nonHdrSizeW rep), @@ -2047,7 +2235,7 @@ emitCloneSmallArray info_p res_r src src_off n = do let hdr_size = fixedHdrSize dflags - base <- allocHeapClosure rep info_ptr curCCS + base <- allocHeapClosure rep info_ptr cccsExpr [ (mkIntExpr dflags n, hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags) ] @@ -2213,6 +2401,30 @@ emitMemsetCall dst c n align = do (MO_Memset align) [ dst, c, n ] +emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode () +emitMemcmpCall res ptr1 ptr2 n align = do + -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all + -- code-gens currently call out to the @memcmp(3)@ C function. + -- This was easier than moving the sign-extensions into + -- all the code-gens. + dflags <- getDynFlags + let is32Bit = typeWidth (localRegType res) == W32 + + cres <- if is32Bit + then return res + else newTemp b32 + + emitPrimCall + [ cres ] + (MO_Memcmp align) + [ ptr1, ptr2, n ] + + unless is32Bit $ do + emit $ mkAssign (CmmLocal res) + (CmmMachOp + (mo_s_32ToWord dflags) + [(CmmReg (CmmLocal cres))]) + emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode () emitBSwapCall res x width = do emitPrimCall @@ -2227,6 +2439,20 @@ emitPopCntCall res x width = do (MO_PopCnt width) [ x ] +emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode () +emitPdepCall res x y width = do + emitPrimCall + [ res ] + (MO_Pdep width) + [ x, y ] + +emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode () +emitPextCall res x y width = do + emitPrimCall + [ res ] + (MO_Pext width) + [ x, y ] + emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode () emitClzCall res x width = do emitPrimCall |