summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmPrim.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/codeGen/StgCmmPrim.hs')
-rw-r--r--compiler/codeGen/StgCmmPrim.hs336
1 files changed, 281 insertions, 55 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 1ecd72f9db..266ab3a0f6 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE CPP #-}
+-- emitPrimOp is quite large
+{-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-}
----------------------------------------------------------------------------
--
@@ -17,6 +19,8 @@ module StgCmmPrim (
#include "HsVersions.h"
+import GhcPrelude hiding ((<*>))
+
import StgCmmLayout
import StgCmmForeign
import StgCmmEnv
@@ -24,7 +28,7 @@ import StgCmmMonad
import StgCmmUtils
import StgCmmTicky
import StgCmmHeap
-import StgCmmProf ( costCentreFrom, curCCS )
+import StgCmmProf ( costCentreFrom )
import DynFlags
import Platform
@@ -44,10 +48,8 @@ import FastString
import Outputable
import Util
-import Prelude hiding ((<*>))
-
import Data.Bits ((.&.), bit)
-import Control.Monad (liftM, when)
+import Control.Monad (liftM, when, unless)
------------------------------------------------------------------------
-- Primitive operations and foreign calls
@@ -192,7 +194,7 @@ shouldInlinePrimOp _ CopyMutableArrayArrayOp
shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
| wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
+ Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
| wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
@@ -200,7 +202,7 @@ shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w
shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
| wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n)
+ Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
| wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
@@ -225,7 +227,7 @@ shouldInlinePrimOp _ CopySmallMutableArrayOp
shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
| wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
+ Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
| wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
@@ -233,7 +235,7 @@ shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmIn
shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
| wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
- Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n)
+ Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
| wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
@@ -281,7 +283,7 @@ emitPrimOp _ [res] ParOp [arg]
emitCCall
[(res,NoHint)]
(CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
- [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)]
+ [(baseExpr, AddrHint), (arg,AddrHint)]
emitPrimOp dflags [res] SparkOp [arg]
= do
@@ -293,7 +295,7 @@ emitPrimOp dflags [res] SparkOp [arg]
emitCCall
[(tmp2,NoHint)]
(CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
- [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
+ [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
emitPrimOp dflags [res] GetCCSOfOp [arg]
@@ -304,7 +306,10 @@ emitPrimOp dflags [res] GetCCSOfOp [arg]
| otherwise = CmmLit (zeroCLit dflags)
emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
- = emitAssign (CmmLocal res) curCCS
+ = emitAssign (CmmLocal res) cccsExpr
+
+emitPrimOp _ [res] MyThreadIdOp []
+ = emitAssign (CmmLocal res) currentTSOExpr
emitPrimOp dflags [res] ReadMutVarOp [mutv]
= emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
@@ -317,7 +322,7 @@ emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
emitCCall
[{-no results-}]
(CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
- [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)]
+ [(baseExpr, AddrHint), (mutv,AddrHint)]
-- #define sizzeofByteArrayzh(r,a) \
-- r = ((StgArrBytes *)(a))->bytes
@@ -347,14 +352,6 @@ emitPrimOp dflags [res] ByteArrayContents_Char [arg]
emitPrimOp dflags [res] StableNameToIntOp [arg]
= emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
--- #define eqStableNamezh(r,sn1,sn2) \
--- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
-emitPrimOp dflags [res] EqStableNameOp [arg1,arg2]
- = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [
- cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags),
- cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags)
- ])
-
emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
= emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
@@ -378,20 +375,20 @@ emitPrimOp dflags [res] DataToTagOp [arg]
-- #define unsafeFreezzeArrayzh(r,a)
-- {
--- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
+-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
-- r = a;
-- }
emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
= emit $ catAGraphs
- [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
+ [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
mkAssign (CmmLocal res) arg ]
emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
= emit $ catAGraphs
- [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)),
+ [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
mkAssign (CmmLocal res) arg ]
emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
= emit $ catAGraphs
- [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)),
+ [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
mkAssign (CmmLocal res) arg ]
-- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
@@ -516,6 +513,40 @@ emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp
emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
+-- IndexWord8ArrayAsXXX
+
+emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
+emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
+emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
+emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
+
+-- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
+
+emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
+emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
+emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
+emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
+
-- WriteXXXoffAddr
emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
@@ -554,6 +585,23 @@ emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayO
emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
+-- WriteInt8ArrayAsXXX
+
+emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args
+
-- Copying and setting byte arrays
emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
doCopyByteArrayOp src src_off dst dst_off n
@@ -568,6 +616,10 @@ emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
doSetByteArrayOp ba off len c
+-- Comparing byte arrays
+emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
+ doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
+
emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
@@ -580,6 +632,20 @@ emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
+-- Parallel bit deposit
+emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8
+emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
+emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
+emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
+emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags)
+
+-- Parallel bit extract
+emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8
+emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16
+emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32
+emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64
+emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags)
+
-- count leading zeros
emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
@@ -833,6 +899,11 @@ callishPrimOpSupported dflags op
|| llvm -> Left (MO_Add2 (wordWidth dflags))
| otherwise -> Right genericWordAdd2Op
+ WordAddCOp | (ncg && (x86ish
+ || ppc))
+ || llvm -> Left (MO_AddWordC (wordWidth dflags))
+ | otherwise -> Right genericWordAddCOp
+
WordSubCOp | (ncg && (x86ish
|| ppc))
|| llvm -> Left (MO_SubWordC (wordWidth dflags))
@@ -969,17 +1040,64 @@ genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
(bottomHalf (CmmReg (CmmLocal r1))))]
genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
+-- | Implements branchless recovery of the carry flag @c@ by checking the
+-- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
+--
+-- @
+-- c = a&b | (a|b)&~r
+-- @
+--
+-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
+genericWordAddCOp :: GenericOp
+genericWordAddCOp [res_r, res_c] [aa, bb]
+ = do dflags <- getDynFlags
+ emit $ catAGraphs [
+ mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
+ mkAssign (CmmLocal res_c) $
+ CmmMachOp (mo_wordUShr dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordAnd dflags) [aa,bb],
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordOr dflags) [aa,bb],
+ CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)]
+ ]
+ ],
+ mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
+ ]
+ ]
+genericWordAddCOp _ _ = panic "genericWordAddCOp"
+
+-- | Implements branchless recovery of the carry flag @c@ by checking the
+-- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
+--
+-- @
+-- c = ~a&b | (~a|b)&r
+-- @
+--
+-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
genericWordSubCOp :: GenericOp
-genericWordSubCOp [res_r, res_c] [aa, bb] = do
- dflags <- getDynFlags
- emit $ catAGraphs
- [ -- Put the result into 'res_r'.
- mkAssign (CmmLocal res_r) $
- CmmMachOp (mo_wordSub dflags) [aa, bb]
- -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise.
- , mkAssign (CmmLocal res_c) $
- CmmMachOp (mo_wordUGt dflags) [bb, aa]
- ]
+genericWordSubCOp [res_r, res_c] [aa, bb]
+ = do dflags <- getDynFlags
+ emit $ catAGraphs [
+ mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
+ mkAssign (CmmLocal res_c) $
+ CmmMachOp (mo_wordUShr dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordNot dflags) [aa],
+ bb
+ ],
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordNot dflags) [aa],
+ bb
+ ],
+ CmmReg (CmmLocal res_r)
+ ]
+ ],
+ mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
+ ]
+ ]
genericWordSubCOp _ _ = panic "genericWordSubCOp"
genericIntAddCOp :: GenericOp
@@ -1279,9 +1397,22 @@ translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
+-- See Note [Comparing stable names]
+translateOp dflags EqStableNameOp = Just (mo_wordEq dflags)
translateOp _ _ = Nothing
+-- Note [Comparing stable names]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- A StableName# is actually a pointer to a stable name object (SNO)
+-- containing an index into the stable name table (SNT). We
+-- used to compare StableName#s by following the pointers to the
+-- SNOs and checking whether they held the same SNT indices. However,
+-- this is not necessary: there is a one-to-one correspondence
+-- between SNOs and entries in the SNT, so simple pointer equality
+-- does the trick.
+
-- These primops are implemented by CallishMachOps, because they sometimes
-- turn into foreign calls depending on the backend.
@@ -1296,6 +1427,9 @@ callishOp DoubleTanhOp = Just MO_F64_Tanh
callishOp DoubleAsinOp = Just MO_F64_Asin
callishOp DoubleAcosOp = Just MO_F64_Acos
callishOp DoubleAtanOp = Just MO_F64_Atan
+callishOp DoubleAsinhOp = Just MO_F64_Asinh
+callishOp DoubleAcoshOp = Just MO_F64_Acosh
+callishOp DoubleAtanhOp = Just MO_F64_Atanh
callishOp DoubleLogOp = Just MO_F64_Log
callishOp DoubleExpOp = Just MO_F64_Exp
callishOp DoubleSqrtOp = Just MO_F64_Sqrt
@@ -1310,6 +1444,9 @@ callishOp FloatTanhOp = Just MO_F32_Tanh
callishOp FloatAsinOp = Just MO_F32_Asin
callishOp FloatAcosOp = Just MO_F32_Acos
callishOp FloatAtanOp = Just MO_F32_Atan
+callishOp FloatAsinhOp = Just MO_F32_Asinh
+callishOp FloatAcoshOp = Just MO_F32_Acosh
+callishOp FloatAtanhOp = Just MO_F32_Atanh
callishOp FloatLogOp = Just MO_F32_Log
callishOp FloatExpOp = Just MO_F32_Exp
callishOp FloatSqrtOp = Just MO_F32_Sqrt
@@ -1712,7 +1849,7 @@ doNewByteArrayOp res_r n = do
let hdr_size = fixedHdrSize dflags
- base <- allocHeapClosure rep info_ptr curCCS
+ base <- allocHeapClosure rep info_ptr cccsExpr
[ (mkIntExpr dflags n,
hdr_size + oFFSET_StgArrBytes_bytes dflags)
]
@@ -1720,6 +1857,60 @@ doNewByteArrayOp res_r n = do
emit $ mkAssign (CmmLocal res_r) base
-- ----------------------------------------------------------------------------
+-- Comparing byte arrays
+
+doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
+ dflags <- getDynFlags
+ ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
+ ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
+
+ -- short-cut in case of equal pointers avoiding a costly
+ -- subroutine call to the memcmp(3) routine; the Cmm logic below
+ -- results in assembly code being generated for
+ --
+ -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
+ -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
+ --
+ -- that looks like
+ --
+ -- leaq 16(%r14),%rax
+ -- leaq 16(%rsi),%rbx
+ -- xorl %ecx,%ecx
+ -- cmpq %rbx,%rax
+ -- je l_ptr_eq
+ --
+ -- ; NB: the common case (unequal pointers) falls-through
+ -- ; the conditional jump, and therefore matches the
+ -- ; usual static branch prediction convention of modern cpus
+ --
+ -- subq $8,%rsp
+ -- movq %rbx,%rsi
+ -- movq %rax,%rdi
+ -- movl $10,%edx
+ -- xorl %eax,%eax
+ -- call memcmp
+ -- addq $8,%rsp
+ -- movslq %eax,%rax
+ -- movq %rax,%rcx
+ -- l_ptr_eq:
+ -- movq %rcx,%rbx
+ -- jmp *(%rbp)
+
+ l_ptr_eq <- newBlockId
+ l_ptr_ne <- newBlockId
+
+ emit (mkAssign (CmmLocal res) (zeroExpr dflags))
+ emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
+ l_ptr_eq l_ptr_ne (Just False))
+
+ emitLabel l_ptr_ne
+ emitMemcmpCall res ba1_p ba2_p n 1
+
+ emitLabel l_ptr_eq
+
+-- ----------------------------------------------------------------------------
-- Copying byte arrays
-- | Takes a source 'ByteArray#', an offset in the source array, a
@@ -1749,10 +1940,9 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
-- TODO: Optimize branch for common case of no aliasing.
copy src dst dst_p src_p bytes = do
dflags <- getDynFlags
- [moveCall, cpyCall] <- forkAlts [
- getCode $ emitMemmoveCall dst_p src_p bytes 1,
- getCode $ emitMemcpyCall dst_p src_p bytes 1
- ]
+ (moveCall, cpyCall) <- forkAltPair
+ (getCode $ emitMemmoveCall dst_p src_p bytes 1)
+ (getCode $ emitMemcpyCall dst_p src_p bytes 1)
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
@@ -1826,12 +2016,12 @@ doNewArrayOp res_r rep info payload n init = do
(mkIntExpr dflags (nonHdrSize dflags rep))
(zeroExpr dflags)
- base <- allocHeapClosure rep info_ptr curCCS payload
+ base <- allocHeapClosure rep info_ptr cccsExpr payload
arr <- CmmLocal `fmap` newTemp (bWord dflags)
emit $ mkAssign arr base
- -- Initialise all elements of the the array
+ -- Initialise all elements of the array
p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
for <- newBlockId
emitLabel for
@@ -1893,12 +2083,11 @@ doCopyMutableArrayOp = emitCopyArray copy
-- TODO: Optimize branch for common case of no aliasing.
copy src dst dst_p src_p bytes = do
dflags <- getDynFlags
- [moveCall, cpyCall] <- forkAlts [
- getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags),
- getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags)
- ]
+ (moveCall, cpyCall) <- forkAltPair
+ (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
+ (wORD_SIZE dflags))
+ (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
+ (wORD_SIZE dflags))
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
@@ -1956,12 +2145,11 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy
-- TODO: Optimize branch for common case of no aliasing.
copy src dst dst_p src_p bytes = do
dflags <- getDynFlags
- [moveCall, cpyCall] <- forkAlts
- [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags)
- , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
- (wORD_SIZE dflags)
- ]
+ (moveCall, cpyCall) <- forkAltPair
+ (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
+ (wORD_SIZE dflags))
+ (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
+ (wORD_SIZE dflags))
emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
@@ -2008,7 +2196,7 @@ emitCloneArray info_p res_r src src_off n = do
let hdr_size = fixedHdrSize dflags
- base <- allocHeapClosure rep info_ptr curCCS
+ base <- allocHeapClosure rep info_ptr cccsExpr
[ (mkIntExpr dflags n,
hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
, (mkIntExpr dflags (nonHdrSizeW rep),
@@ -2047,7 +2235,7 @@ emitCloneSmallArray info_p res_r src src_off n = do
let hdr_size = fixedHdrSize dflags
- base <- allocHeapClosure rep info_ptr curCCS
+ base <- allocHeapClosure rep info_ptr cccsExpr
[ (mkIntExpr dflags n,
hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
]
@@ -2213,6 +2401,30 @@ emitMemsetCall dst c n align = do
(MO_Memset align)
[ dst, c, n ]
+emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
+emitMemcmpCall res ptr1 ptr2 n align = do
+ -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
+ -- code-gens currently call out to the @memcmp(3)@ C function.
+ -- This was easier than moving the sign-extensions into
+ -- all the code-gens.
+ dflags <- getDynFlags
+ let is32Bit = typeWidth (localRegType res) == W32
+
+ cres <- if is32Bit
+ then return res
+ else newTemp b32
+
+ emitPrimCall
+ [ cres ]
+ (MO_Memcmp align)
+ [ ptr1, ptr2, n ]
+
+ unless is32Bit $ do
+ emit $ mkAssign (CmmLocal res)
+ (CmmMachOp
+ (mo_s_32ToWord dflags)
+ [(CmmReg (CmmLocal cres))])
+
emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitBSwapCall res x width = do
emitPrimCall
@@ -2227,6 +2439,20 @@ emitPopCntCall res x width = do
(MO_PopCnt width)
[ x ]
+emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
+emitPdepCall res x y width = do
+ emitPrimCall
+ [ res ]
+ (MO_Pdep width)
+ [ x, y ]
+
+emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
+emitPextCall res x y width = do
+ emitPrimCall
+ [ res ]
+ (MO_Pext width)
+ [ x, y ]
+
emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
emitClzCall res x width = do
emitPrimCall