summaryrefslogtreecommitdiff
path: root/compiler/GHC
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC')
-rw-r--r--compiler/GHC/ByteCode/Asm.hs63
-rw-r--r--compiler/GHC/ByteCode/Instr.hs16
-rw-r--r--compiler/GHC/ByteCode/Types.hs44
-rw-r--r--compiler/GHC/Cmm/CallConv.hs113
-rw-r--r--compiler/GHC/Cmm/Parser.y4
-rw-r--r--compiler/GHC/Cmm/Reg.hs2
-rw-r--r--compiler/GHC/StgToByteCode.hs211
-rw-r--r--compiler/GHC/StgToCmm/Foreign.hs22
8 files changed, 346 insertions, 129 deletions
diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs
index 24e2645052..391949d448 100644
--- a/compiler/GHC/ByteCode/Asm.hs
+++ b/compiler/GHC/ByteCode/Asm.hs
@@ -12,7 +12,7 @@ module GHC.ByteCode.Asm (
bcoFreeNames,
SizedSeq, sizeSS, ssElts,
iNTERP_STACK_CHECK_THRESH,
- mkTupleInfoLit
+ mkNativeCallInfoLit
) where
import GHC.Prelude
@@ -32,7 +32,6 @@ import GHC.Types.Unique.DSet
import GHC.Utils.Outputable
import GHC.Utils.Panic
-import GHC.Utils.Panic.Plain
import GHC.Core.TyCon
import GHC.Data.FastString
@@ -40,7 +39,7 @@ import GHC.Data.SizedSeq
import GHC.StgToCmm.Layout ( ArgRep(..) )
import GHC.Cmm.Expr
-import GHC.Cmm.CallConv ( tupleRegsCover )
+import GHC.Cmm.CallConv ( allArgRegsCover )
import GHC.Platform
import GHC.Platform.Profile
@@ -202,7 +201,8 @@ assembleBCO platform (ProtoBCO { protoBCOName = nm
(final_insns, final_lits, final_ptrs) <- flip execStateT initial_state $ runAsm platform long_jumps env asm
-- precomputed size should be equal to final size
- massert (n_insns == sizeSS final_insns)
+ massertPpr (n_insns == sizeSS final_insns)
+ (text "bytecode instruction count mismatch")
let asm_insns = ssElts final_insns
insns_arr = Array.listArray (0, fromIntegral n_insns - 1) asm_insns
@@ -351,7 +351,8 @@ largeArg platform w = case platformWordSize platform of
fromIntegral (w `shiftR` 32),
fromIntegral (w `shiftR` 16),
fromIntegral w]
- PW4 -> assert (w < fromIntegral (maxBound :: Word32)) $
+ PW4 -> assertPpr (w < fromIntegral (maxBound :: Word32))
+ (text "largeArg too big:" <+> ppr w) $
[fromIntegral (w `shiftR` 16),
fromIntegral w]
@@ -388,14 +389,14 @@ assembleI platform i = case i of
-> do let ul_bco = assembleBCO platform proto
p <- ioptr (liftM BCOPtrBCO ul_bco)
emit (push_alts pk) [Op p]
- PUSH_ALTS_TUPLE proto tuple_info tuple_proto
+ PUSH_ALTS_TUPLE proto call_info tuple_proto
-> do let ul_bco = assembleBCO platform proto
ul_tuple_bco = assembleBCO platform
tuple_proto
p <- ioptr (liftM BCOPtrBCO ul_bco)
p_tup <- ioptr (liftM BCOPtrBCO ul_tuple_bco)
info <- int (fromIntegral $
- mkTupleInfoSig platform tuple_info)
+ mkNativeCallInfoSig platform call_info)
emit bci_PUSH_ALTS_T
[Op p, Op info, Op p_tup]
PUSH_PAD8 -> emit bci_PUSH_PAD8 []
@@ -491,6 +492,7 @@ assembleI platform i = case i of
RETURN_TUPLE -> emit bci_RETURN_T []
CCALL off m_addr i -> do np <- addr m_addr
emit bci_CCALL [SmallOp off, Op np, SmallOp i]
+ PRIMCALL -> emit bci_PRIMCALL []
BRK_FUN index uniq cc -> do p1 <- ptr BCOPtrBreakArray
q <- int (getKey uniq)
np <- addr cc
@@ -580,41 +582,44 @@ return_unlifted V64 = error "return_unlifted: vector"
maximum number of tuple elements may be larger. Elements can also
take multiple words on the stack (for example Double# on a 32 bit
platform).
-
-}
-maxTupleNativeStackSize :: WordOff
-maxTupleNativeStackSize = 62
+maxTupleReturnNativeStackSize :: WordOff
+maxTupleReturnNativeStackSize = 62
{-
- Construct the tuple_info word that stg_ctoi_t and stg_ret_t use
- to convert a tuple between the native calling convention and the
+ Construct the call_info word that stg_ctoi_t, stg_ret_t and stg_primcall
+ use to convert arguments between the native calling convention and the
interpreter.
- See Note [GHCi tuple layout] for more information.
+ See Note [GHCi and native call registers] for more information.
-}
-mkTupleInfoSig :: Platform -> TupleInfo -> Word32
-mkTupleInfoSig platform TupleInfo{..}
- | tupleNativeStackSize > maxTupleNativeStackSize
- = pprPanic "mkTupleInfoSig: tuple too big for the bytecode compiler"
- (ppr tupleNativeStackSize <+> text "stack words." <+>
+mkNativeCallInfoSig :: Platform -> NativeCallInfo -> Word32
+mkNativeCallInfoSig platform NativeCallInfo{..}
+ | nativeCallType == NativeTupleReturn && nativeCallStackSpillSize > maxTupleReturnNativeStackSize
+ = pprPanic "mkNativeCallInfoSig: tuple too big for the bytecode compiler"
+ (ppr nativeCallStackSpillSize <+> text "stack words." <+>
text "Use -fobject-code to get around this limit"
)
| otherwise
- = assert (length regs <= 24) {- 24 bits for bitmap -}
- assert (tupleNativeStackSize < 255) {- 8 bits for stack size -}
- assert (all (`elem` regs) (regSetToList tupleRegs)) {- all regs accounted for -}
- foldl' reg_bit 0 (zip regs [0..]) .|.
- (fromIntegral tupleNativeStackSize `shiftL` 24)
+ = assertPpr (length regs <= 24) (text "too many registers for bitmap:" <+> ppr (length regs)) {- 24 bits for register bitmap -}
+ assertPpr (cont_offset < 255) (text "continuation offset too large:" <+> ppr cont_offset) {- 8 bits for continuation offset (only for NativeTupleReturn) -}
+ assertPpr (all (`elem` regs) (regSetToList nativeCallRegs)) (text "not all registers accounted for") {- all regs accounted for -}
+ foldl' reg_bit 0 (zip regs [0..]) .|. (cont_offset `shiftL` 24)
where
+ cont_offset :: Word32
+ cont_offset
+ | nativeCallType == NativeTupleReturn = fromIntegral nativeCallStackSpillSize
+ | otherwise = 0 -- there is no continuation for primcalls
+
reg_bit :: Word32 -> (GlobalReg, Int) -> Word32
reg_bit x (r, n)
- | r `elemRegSet` tupleRegs = x .|. 1 `shiftL` n
- | otherwise = x
- regs = tupleRegsCover platform
+ | r `elemRegSet` nativeCallRegs = x .|. 1 `shiftL` n
+ | otherwise = x
+ regs = allArgRegsCover platform
-mkTupleInfoLit :: Platform -> TupleInfo -> Literal
-mkTupleInfoLit platform tuple_info =
- mkLitWord platform . fromIntegral $ mkTupleInfoSig platform tuple_info
+mkNativeCallInfoLit :: Platform -> NativeCallInfo -> Literal
+mkNativeCallInfoLit platform call_info =
+ mkLitWord platform . fromIntegral $ mkNativeCallInfoSig platform call_info
-- Make lists of host-sized words for literals, so that when the
-- words are placed in memory at increasing addresses, the
diff --git a/compiler/GHC/ByteCode/Instr.hs b/compiler/GHC/ByteCode/Instr.hs
index 498152c471..34baa57d40 100644
--- a/compiler/GHC/ByteCode/Instr.hs
+++ b/compiler/GHC/ByteCode/Instr.hs
@@ -90,7 +90,7 @@ data BCInstr
| PUSH_ALTS (ProtoBCO Name)
| PUSH_ALTS_UNLIFTED (ProtoBCO Name) ArgRep
| PUSH_ALTS_TUPLE (ProtoBCO Name) -- continuation
- !TupleInfo
+ !NativeCallInfo
(ProtoBCO Name) -- tuple return BCO
-- Pushing 8, 16 and 32 bits of padding (for constructors).
@@ -184,6 +184,8 @@ data BCInstr
-- (XXX: inefficient, but I don't know
-- what the alignment constraints are.)
+ | PRIMCALL
+
-- For doing magic ByteArray passing to foreign calls
| SWIZZLE Word16 -- to the ptr N words down the stack,
Word16 -- add M (interpreted as a signed 16-bit entity)
@@ -269,8 +271,8 @@ instance Outputable BCInstr where
ppr (PUSH_ALTS bco) = hang (text "PUSH_ALTS") 2 (ppr bco)
ppr (PUSH_ALTS_UNLIFTED bco pk) = hang (text "PUSH_ALTS_UNLIFTED" <+> ppr pk) 2 (ppr bco)
- ppr (PUSH_ALTS_TUPLE bco tuple_info tuple_bco) =
- hang (text "PUSH_ALTS_TUPLE" <+> ppr tuple_info)
+ ppr (PUSH_ALTS_TUPLE bco call_info tuple_bco) =
+ hang (text "PUSH_ALTS_TUPLE" <+> ppr call_info)
2
(ppr tuple_bco $+$ ppr bco)
@@ -340,6 +342,7 @@ instance Outputable BCInstr where
0x1 -> text "(interruptible)"
0x2 -> text "(unsafe)"
_ -> empty)
+ ppr PRIMCALL = text "PRIMCALL"
ppr (SWIZZLE stkoff n) = text "SWIZZLE " <+> text "stkoff" <+> ppr stkoff
<+> text "by" <+> ppr n
ppr ENTER = text "ENTER"
@@ -382,11 +385,11 @@ bciStackUse (PUSH_ALTS bco) = 2 {- profiling only, restore CCCS -} +
bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 {- profiling only, restore CCCS -} +
4 + protoBCOStackUse bco
bciStackUse (PUSH_ALTS_TUPLE bco info _) =
- -- (tuple_bco, tuple_info word, cont_bco, stg_ctoi_t)
+ -- (tuple_bco, call_info word, cont_bco, stg_ctoi_t)
-- tuple
- -- (tuple_info, tuple_bco, stg_ret_t)
+ -- (call_info, tuple_bco, stg_ret_t)
1 {- profiling only -} +
- 7 + fromIntegral (tupleSize info) + protoBCOStackUse bco
+ 7 + fromIntegral (nativeCallSize info) + protoBCOStackUse bco
bciStackUse (PUSH_PAD8) = 1 -- overapproximation
bciStackUse (PUSH_PAD16) = 1 -- overapproximation
bciStackUse (PUSH_PAD32) = 1 -- overapproximation on 64bit arch
@@ -443,6 +446,7 @@ bciStackUse RETURN{} = 0
bciStackUse RETURN_UNLIFTED{} = 1 -- pushes stg_ret_X for some X
bciStackUse RETURN_TUPLE{} = 1 -- pushes stg_ret_t header
bciStackUse CCALL{} = 0
+bciStackUse PRIMCALL{} = 1 -- pushes stg_primcall
bciStackUse SWIZZLE{} = 0
bciStackUse BRK_FUN{} = 0
diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs
index 830b60a4ca..a4b025ce92 100644
--- a/compiler/GHC/ByteCode/Types.hs
+++ b/compiler/GHC/ByteCode/Types.hs
@@ -10,7 +10,7 @@ module GHC.ByteCode.Types
( CompiledByteCode(..), seqCompiledByteCode
, FFIInfo(..)
, RegBitmap(..)
- , TupleInfo(..), voidTupleInfo
+ , NativeCallType(..), NativeCallInfo(..), voidTupleReturnInfo, voidPrimCallInfo
, ByteOff(..), WordOff(..)
, UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
, ItblEnv, ItblPtr(..)
@@ -105,22 +105,32 @@ newtype RegBitmap = RegBitmap { unRegBitmap :: Word32 }
See GHC.StgToByteCode.layoutTuple for more details.
-}
-data TupleInfo = TupleInfo
- { tupleSize :: !WordOff -- total size of tuple in words
- , tupleRegs :: !GlobalRegSet
- , tupleNativeStackSize :: !WordOff {- words spilled on the stack by
- GHCs native calling convention -}
- } deriving (Show)
-
-instance Outputable TupleInfo where
- ppr TupleInfo{..} = text "<size" <+> ppr tupleSize <+>
- text "stack" <+> ppr tupleNativeStackSize <+>
- text "regs" <+>
- ppr (map (text @SDoc . show) $ regSetToList tupleRegs) <>
- char '>'
-
-voidTupleInfo :: TupleInfo
-voidTupleInfo = TupleInfo 0 emptyRegSet 0
+
+data NativeCallType = NativePrimCall
+ | NativeTupleReturn
+ deriving (Eq)
+
+data NativeCallInfo = NativeCallInfo
+ { nativeCallType :: !NativeCallType
+ , nativeCallSize :: !WordOff -- total size of arguments in words
+ , nativeCallRegs :: !GlobalRegSet
+ , nativeCallStackSpillSize :: !WordOff {- words spilled on the stack by
+ GHCs native calling convention -}
+ }
+
+instance Outputable NativeCallInfo where
+ ppr NativeCallInfo{..} = text "<arg_size" <+> ppr nativeCallSize <+>
+ text "stack" <+> ppr nativeCallStackSpillSize <+>
+ text "regs" <+>
+ ppr (map (text @SDoc . show) $ regSetToList nativeCallRegs) <>
+ char '>'
+
+
+voidTupleReturnInfo :: NativeCallInfo
+voidTupleReturnInfo = NativeCallInfo NativeTupleReturn 0 emptyRegSet 0
+
+voidPrimCallInfo :: NativeCallInfo
+voidPrimCallInfo = NativeCallInfo NativePrimCall 0 emptyRegSet 0
type ItblEnv = NameEnv (Name, ItblPtr)
-- We need the Name in the range so we know which
diff --git a/compiler/GHC/Cmm/CallConv.hs b/compiler/GHC/Cmm/CallConv.hs
index a0fee0e5c6..97cebf99e6 100644
--- a/compiler/GHC/Cmm/CallConv.hs
+++ b/compiler/GHC/Cmm/CallConv.hs
@@ -3,7 +3,7 @@ module GHC.Cmm.CallConv (
assignArgumentsPos,
assignStack,
realArgRegsCover,
- tupleRegsCover
+ allArgRegsCover
) where
import GHC.Prelude
@@ -220,12 +220,109 @@ realArgRegsCover platform
realLongRegs platform
-- we don't save XMM registers if they are not used for parameter passing
--- Like realArgRegsCover but always includes the node. This covers the real
--- and virtual registers used for unboxed tuples.
---
--- Note: if anything changes in how registers for unboxed tuples overlap,
--- make sure to also update GHC.StgToByteCode.layoutTuple.
-tupleRegsCover :: Platform -> [GlobalReg]
-tupleRegsCover platform =
+{-
+
+ Note [GHCi and native call registers]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ The GHCi bytecode interpreter does not have access to the STG registers
+ that the native calling convention uses for passing arguments. It uses
+ helper stack frames to move values between the stack and registers.
+
+ If only a single register needs to be moved, GHCi uses a specific stack
+ frame. For example stg_ctoi_R1p saves a heap pointer value from STG register
+ R1 and stg_ctoi_D1 saves a double precision floating point value from D1.
+ In the other direction, helpers stg_ret_p and stg_ret_d move a value from
+ the stack to the R1 and D1 registers, respectively.
+
+ When GHCi needs to move more than one register it cannot use a specific
+ helper frame. It would simply be impossible to create a helper for all
+ possible combinations of register values. Instead, there are generic helper
+ stack frames that use a call_info word that describes the active registers
+ and the number of stack words used by the arguments of a call.
+
+ These helper stack frames are currently:
+
+ - stg_ret_t: return a tuple to the continuation at the top of
+ the stack
+ - stg_ctoi_t: convert a tuple return value to be used in
+ bytecode
+ - stg_primcall: call a function
+
+
+ The call_info word contains a bitmap of the active registers
+ for the call and and a stack offset. The layout is as follows:
+
+ - bit 0-23: Bitmap of active registers for the call, the
+ order corresponds to the list returned by
+ allArgRegsCover. For example if bit 0 (the least
+ significant bit) is set, the first register in the
+ allArgRegsCover list is active. Bit 1 for the
+ second register in the list and so on.
+
+ - bit 24-31: Unsigned byte indicating the stack offset
+ of the continuation in words. For tuple returns
+ this is the number of words returned on the
+ stack. For primcalls this field is unused, since
+ we don't jump to a continuation.
+
+ The upper 32 bits on 64 bit platforms are currently unused.
+
+ If a register is smaller than a word on the stack (for example a
+ single precision float on a 64 bit system), then the stack slot
+ is padded to a whole word.
+
+ Example:
+
+ If a tuple is returned in three registers and an additional two
+ words on the stack, then three bits in the register bitmap
+ (bits 0-23) would be set. And bit 24-31 would be
+ 00000010 (two in binary).
+
+ The values on the stack before a call to POP_ARG_REGS would
+ be as follows:
+
+ ...
+ continuation
+ stack_arg_1
+ stack_arg_2
+ register_arg_3
+ register_arg_2
+ register_arg_1 <- Sp
+
+ A call to POP_ARG_REGS(call_info) would move register_arg_1
+ to the register corresponding to the lowest set bit in the
+ call_info word. register_arg_2 would be moved to the register
+ corresponding to the second lowest set bit, and so on.
+
+ After POP_ARG_REGS(call_info), the stack pointer Sp points
+ to the topmost stack argument, so the stack looks as follows:
+
+ ...
+ continuation
+ stack_arg_1
+ stack_arg_2 <- Sp
+
+ At this point all the arguments are in place and we are ready
+ to jump to the continuation, the location (offset from Sp) of
+ which is found by inspecting the value of bits 24-31. In this
+ case the offset is two words.
+
+ On x86_64, the double precision (Dn) and single precision
+ floating (Fn) point registers overlap, e.g. D1 uses the same
+ physical register as F1. On this platform, the list returned
+ by allArgRegsCover contains only entries for the double
+ precision registers. If an argument is passed in register
+ Fn, the bit corresponding to Dn should be set.
+
+ Note: if anything changes in how registers for native calls overlap,
+ make sure to also update GHC.StgToByteCode.layoutNativeCall
+ -}
+
+-- Like realArgRegsCover but always includes the node. This covers all real
+-- and virtual registers actually used for passing arguments.
+
+allArgRegsCover :: Platform -> [GlobalReg]
+allArgRegsCover platform =
nub (VanillaReg 1 VGcPtr : realArgRegsCover platform)
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index dbb2e47030..35d8e4c40f 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -1233,8 +1233,8 @@ stmtMacros = listToUFM [
( fsLit "SAVE_REGS", \[] -> emitSaveRegs ),
( fsLit "RESTORE_REGS", \[] -> emitRestoreRegs ),
- ( fsLit "PUSH_TUPLE_REGS", \[live_regs] -> emitPushTupleRegs live_regs ),
- ( fsLit "POP_TUPLE_REGS", \[live_regs] -> emitPopTupleRegs live_regs ),
+ ( fsLit "PUSH_ARG_REGS", \[live_regs] -> emitPushArgRegs live_regs ),
+ ( fsLit "POP_ARG_REGS", \[live_regs] -> emitPopArgRegs live_regs ),
( fsLit "LDV_ENTER", \[e] -> ldvEnter e ),
( fsLit "LDV_RECORD_CREATE", \[e] -> ldvRecordCreate e ),
diff --git a/compiler/GHC/Cmm/Reg.hs b/compiler/GHC/Cmm/Reg.hs
index a9b3fce101..104702f312 100644
--- a/compiler/GHC/Cmm/Reg.hs
+++ b/compiler/GHC/Cmm/Reg.hs
@@ -223,7 +223,7 @@ instance Eq GlobalReg where
_r1 == _r2 = False
-- NOTE: this Ord instance affects the tuple layout in GHCi, see
--- Note [GHCi tuple layout]
+-- Note [GHCi and native call registers]
instance Ord GlobalReg where
compare (VanillaReg i _) (VanillaReg j _) = compare i j
-- Ignore type when seeking clashes
diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs
index b59cbfe779..de37d987cb 100644
--- a/compiler/GHC/StgToByteCode.hs
+++ b/compiler/GHC/StgToByteCode.hs
@@ -58,7 +58,7 @@ import GHC.Data.FastString
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
import GHC.Utils.Exception (evaluate)
-import GHC.StgToCmm.Closure ( NonVoid(..), fromNonVoid, nonVoidIds )
+import GHC.StgToCmm.Closure ( NonVoid(..), fromNonVoid, nonVoidIds, argPrimRep )
import GHC.StgToCmm.Layout
import GHC.Runtime.Heap.Layout hiding (WordOff, ByteOff, wordsToBytes)
import GHC.Data.Bitmap
@@ -464,10 +464,10 @@ returnUnliftedReps d s szb reps = do
[rep] -> return (unitOL $ RETURN_UNLIFTED (toArgRep platform rep))
-- otherwise use RETURN_TUPLE with a tuple descriptor
nv_reps -> do
- let (tuple_info, args_offsets) = layoutTuple profile 0 (primRepCmmType platform) nv_reps
+ let (call_info, args_offsets) = layoutNativeCall profile NativeTupleReturn 0 (primRepCmmType platform) nv_reps
args_ptrs = map (\(rep, off) -> (isFollowableArg (toArgRep platform rep), off)) args_offsets
- tuple_bco <- emitBc (tupleBCO platform tuple_info args_ptrs)
- return $ PUSH_UBX (mkTupleInfoLit platform tuple_info) 1 `consOL`
+ tuple_bco <- emitBc (tupleBCO platform call_info args_ptrs)
+ return $ PUSH_UBX (mkNativeCallInfoLit platform call_info) 1 `consOL`
PUSH_BCO tuple_bco `consOL`
unitOL RETURN_TUPLE
return ( mkSlideB platform szb (d - s) -- clear to sequel
@@ -484,7 +484,11 @@ returnUnboxedTuple d s p es = do
profile <- getProfile
let platform = profilePlatform profile
arg_ty e = primRepCmmType platform (atomPrimRep e)
- (tuple_info, tuple_components) = layoutTuple profile d arg_ty es
+ (call_info, tuple_components) = layoutNativeCall profile
+ NativeTupleReturn
+ d
+ arg_ty
+ es
go _ pushes [] = return (reverse pushes)
go !dd pushes ((a, off):cs) = do (push, szb) <- pushAtom dd p a
massert (off == dd + szb)
@@ -492,7 +496,7 @@ returnUnboxedTuple d s p es = do
pushes <- go d [] tuple_components
ret <- returnUnliftedReps d
s
- (wordsToBytes platform $ tupleSize tuple_info)
+ (wordsToBytes platform $ nativeCallSize call_info)
(map atomPrimRep es)
return (mconcat pushes `appOL` ret)
@@ -648,14 +652,14 @@ schemeT d s p app
-- Case 1
schemeT d s p (StgOpApp (StgFCallOp (CCall ccall_spec) _ty) args result_ty)
= if isSupportedCConv ccall_spec
- then generateCCall d s p ccall_spec result_ty (reverse args)
+ then generateCCall d s p ccall_spec result_ty args
else unsupportedCConvException
schemeT d s p (StgOpApp (StgPrimOp op) args _ty)
= doTailCall d s p (primOpId op) (reverse args)
-schemeT _d _s _p (StgOpApp StgPrimCallOp{} _args _ty)
- = unsupportedCConvException
+schemeT d s p (StgOpApp (StgPrimCallOp (PrimCall label unit)) args result_ty)
+ = generatePrimCall d s p label (Just unit) result_ty args
-- Case 2: Unboxed tuple
schemeT d s p (StgConApp con _cn args _tys)
@@ -840,18 +844,18 @@ doCase d s p scrut bndr alts
| ubx_frame = wordSize platform
| otherwise = 0
- (bndr_size, tuple_info, args_offsets)
+ (bndr_size, call_info, args_offsets)
| ubx_tuple_frame =
let bndr_ty = primRepCmmType platform
bndr_reps = filter (not.isVoidRep) (bcIdPrimReps bndr)
- (tuple_info, args_offsets) =
- layoutTuple profile 0 bndr_ty bndr_reps
- in ( wordsToBytes platform (tupleSize tuple_info)
- , tuple_info
+ (call_info, args_offsets) =
+ layoutNativeCall profile NativeTupleReturn 0 bndr_ty bndr_reps
+ in ( wordsToBytes platform (nativeCallSize call_info)
+ , call_info
, args_offsets
)
| otherwise = ( wordsToBytes platform (idSizeW platform bndr)
- , voidTupleInfo
+ , voidTupleReturnInfo
, []
)
@@ -885,17 +889,18 @@ doCase d s p scrut bndr alts
| isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty =
let bndr_ty = primRepCmmType platform . bcIdPrimRep
tuple_start = d_bndr
- (tuple_info, args_offsets) =
- layoutTuple profile
- 0
- bndr_ty
- bndrs
+ (call_info, args_offsets) =
+ layoutNativeCall profile
+ NativeTupleReturn
+ 0
+ bndr_ty
+ bndrs
stack_bot = d_alts
p' = Map.insertList
[ (arg, tuple_start -
- wordsToBytes platform (tupleSize tuple_info) +
+ wordsToBytes platform (nativeCallSize call_info) +
offset)
| (arg, offset) <- args_offsets
, not (isVoidRep $ bcIdPrimRep arg)]
@@ -981,8 +986,8 @@ doCase d s p scrut bndr alts
-- unboxed tuples get two more words, the second is a pointer (tuple_bco)
(extra_pointers, extra_slots)
- | ubx_tuple_frame && profiling = ([1], 3) -- tuple_info, tuple_BCO, CCCS
- | ubx_tuple_frame = ([1], 2) -- tuple_info, tuple_BCO
+ | ubx_tuple_frame && profiling = ([1], 3) -- call_info, tuple_BCO, CCCS
+ | ubx_tuple_frame = ([1], 2) -- call_info, tuple_BCO
| otherwise = ([], 0)
bitmap_size = trunc16W $ fromIntegral extra_slots +
@@ -1028,8 +1033,8 @@ doCase d s p scrut bndr alts
let args_ptrs =
map (\(rep, off) -> (isFollowableArg (toArgRep platform rep), off))
args_offsets
- tuple_bco <- emitBc (tupleBCO platform tuple_info args_ptrs)
- return (PUSH_ALTS_TUPLE alt_bco' tuple_info tuple_bco
+ tuple_bco <- emitBc (tupleBCO platform call_info args_ptrs)
+ return (PUSH_ALTS_TUPLE alt_bco' call_info tuple_bco
`consOL` scrut_code)
else let push_alts
| not ubx_frame
@@ -1050,14 +1055,15 @@ doCase d s p scrut bndr alts
-- The native calling convention uses registers for tuples, but in the
-- bytecode interpreter, all values live on the stack.
-layoutTuple :: Profile
- -> ByteOff
- -> (a -> CmmType)
- -> [a]
- -> ( TupleInfo -- See Note [GHCi TupleInfo]
- , [(a, ByteOff)] -- argument, offset on stack
- )
-layoutTuple profile start_off arg_ty reps =
+layoutNativeCall :: Profile
+ -> NativeCallType
+ -> ByteOff
+ -> (a -> CmmType)
+ -> [a]
+ -> ( NativeCallInfo -- See Note [GHCi TupleInfo]
+ , [(a, ByteOff)] -- argument, offset on stack
+ )
+layoutNativeCall profile call_type start_off arg_ty reps =
let platform = profilePlatform profile
(orig_stk_bytes, pos) = assignArgumentsPos profile
0
@@ -1070,7 +1076,7 @@ layoutTuple profile start_off arg_ty reps =
-- sort the register parameters by register and add them to the stack
regs_order :: Map.Map GlobalReg Int
- regs_order = Map.fromList $ zip (tupleRegsCover platform) [0..]
+ regs_order = Map.fromList $ zip (allArgRegsCover platform) [0..]
reg_order :: GlobalReg -> (Int, GlobalReg)
reg_order reg | Just n <- Map.lookup reg regs_order = (n, reg)
@@ -1099,10 +1105,11 @@ layoutTuple profile start_off arg_ty reps =
get_byte_off _ =
panic "GHC.StgToByteCode.layoutTuple get_byte_off"
- in ( TupleInfo
- { tupleSize = bytesToWords platform (ByteOff new_stk_bytes)
- , tupleRegs = regs_set
- , tupleNativeStackSize = bytesToWords platform
+ in ( NativeCallInfo
+ { nativeCallType = call_type
+ , nativeCallSize = bytesToWords platform (ByteOff new_stk_bytes)
+ , nativeCallRegs = regs_set
+ , nativeCallStackSpillSize = bytesToWords platform
(ByteOff orig_stk_bytes)
}
, sortBy (comparing snd) $
@@ -1127,7 +1134,7 @@ usePlainReturn t
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We have the bytecode instructions RETURN_TUPLE and PUSH_ALTS_TUPLE to
return and receive arbitrary unboxed tuples, respectively. These
- instructions use the helper data tuple_BCO and tuple_info.
+ instructions use the helper data tuple_BCO and call_info.
The helper data is used to convert tuples between GHCs native calling
convention (object code), which uses stack and registers, and the bytecode
@@ -1139,7 +1146,7 @@ usePlainReturn t
=================
Bytecode that returns a tuple first pushes all the tuple fields followed
- by the appropriate tuple_info and tuple_BCO onto the stack. It then
+ by the appropriate call_info and tuple_BCO onto the stack. It then
executes the RETURN_TUPLE instruction, which causes the interpreter
to push stg_ret_t_info to the top of the stack. The stack (growing down)
then looks as follows:
@@ -1150,14 +1157,14 @@ usePlainReturn t
tuple_field_2
...
tuple_field_n
- tuple_info
+ call_info
tuple_BCO
stg_ret_t_info <- Sp
If next_frame is bytecode, the interpreter will start executing it. If
it's object code, the interpreter jumps back to the scheduler, which in
turn jumps to stg_ret_t. stg_ret_t converts the tuple to the native
- calling convention using the description in tuple_info, and then jumps
+ calling convention using the description in call_info, and then jumps
to next_frame.
@@ -1169,13 +1176,13 @@ usePlainReturn t
tuple. The PUSH_ALTS_TUPLE instuction contains three pieces of data:
* cont_BCO: the continuation that receives the tuple
- * tuple_info: see below
+ * call_info: see below
* tuple_BCO: see below
The interpreter pushes these onto the stack when the PUSH_ALTS_TUPLE
instruction is executed, followed by stg_ctoi_tN_info, with N depending
on the number of stack words used by the tuple in the GHC native calling
- convention. N is derived from tuple_info.
+ convention. N is derived from call_info.
For example if we expect a tuple with three words on the stack, the stack
looks as follows after PUSH_ALTS_TUPLE:
@@ -1186,7 +1193,7 @@ usePlainReturn t
cont_free_var_2
...
cont_free_var_n
- tuple_info
+ call_info
tuple_BCO
cont_BCO
stg_ctoi_t3_info <- Sp
@@ -1206,20 +1213,20 @@ usePlainReturn t
that is already on the stack.
- The tuple_info word
+ The call_info word
===================
- The tuple_info word describes the stack and STG register (e.g. R1..R6,
- D1..D6) usage for the tuple. tuple_info contains enough information to
+ The call_info word describes the stack and STG register (e.g. R1..R6,
+ D1..D6) usage for the tuple. call_info contains enough information to
convert the tuple between the stack-only bytecode and stack+registers
GHC native calling conventions.
- See Note [GHCi tuple layout] for more details of how the data is packed
- in a single word.
+ See Note [GHCi and native call registers] for more details of how the
+ data is packed in a single word.
-}
-tupleBCO :: Platform -> TupleInfo -> [(Bool, ByteOff)] -> [FFIInfo] -> ProtoBCO Name
+tupleBCO :: Platform -> NativeCallInfo -> [(Bool, ByteOff)] -> [FFIInfo] -> ProtoBCO Name
tupleBCO platform info pointers =
mkProtoBCO platform invented_name body_code (Left [])
0{-no arity-} bitmap_size bitmap False{-is alts-}
@@ -1233,15 +1240,103 @@ tupleBCO platform info pointers =
-}
invented_name = mkSystemVarName (mkPseudoUniqueE 0) (fsLit "tuple")
- -- the first word in the frame is the tuple_info word,
+ -- the first word in the frame is the call_info word,
-- which is not a pointer
- bitmap_size = trunc16W $ 1 + tupleSize info
+ bitmap_size = trunc16W $ 1 + nativeCallSize info
bitmap = intsToReverseBitmap platform (fromIntegral bitmap_size) $
map ((+1) . fromIntegral . bytesToWords platform . snd)
(filter fst pointers)
body_code = mkSlideW 0 1 -- pop frame header
`snocOL` RETURN_TUPLE -- and add it again
+primCallBCO :: Platform -> NativeCallInfo -> [(Bool, ByteOff)] -> [FFIInfo] -> ProtoBCO Name
+primCallBCO platform args_info pointers =
+ mkProtoBCO platform invented_name body_code (Left [])
+ 0{-no arity-} bitmap_size bitmap False{-is alts-}
+
+ where
+ {-
+ The primcall BCO is never referred to by name, so we can get away
+ with using a fake name here. We will need to change this if we want
+ to save some memory by sharing the BCO between places that have
+ the same tuple shape
+ -}
+ invented_name = mkSystemVarName (mkPseudoUniqueE 0) (fsLit "primcall")
+
+ -- the first three words in the frame are the BCO describing the
+ -- pointers in the frame, the call_info word and the pointer
+ -- to the Cmm function being called. None of these is a pointer that
+ -- should be followed by the garbage collector
+ bitmap_size = trunc16W $ 2 + nativeCallSize args_info
+ bitmap = intsToReverseBitmap platform (fromIntegral bitmap_size) $
+ map ((+2) . fromIntegral . bytesToWords platform . snd)
+ (filter fst pointers)
+ -- if the primcall BCO is ever run it's a bug, since the BCO should only
+ -- be pushed immediately before running the PRIMCALL bytecode instruction,
+ -- which immediately leaves the interpreter to jump to the stg_primcall_info
+ -- Cmm function
+ body_code = unitOL CASEFAIL
+
+-- -----------------------------------------------------------------------------
+-- Deal with a primitive call to native code.
+
+generatePrimCall
+ :: StackDepth
+ -> Sequel
+ -> BCEnv
+ -> CLabelString -- where to call
+ -> Maybe Unit
+ -> Type
+ -> [StgArg] -- args (atoms)
+ -> BcM BCInstrList
+generatePrimCall d s p target _mb_unit _result_ty args
+ = do
+ profile <- getProfile
+ let
+ platform = profilePlatform profile
+
+ non_void VoidRep = False
+ non_void _ = True
+
+ nv_args :: [StgArg]
+ nv_args = filter (non_void . argPrimRep) args
+
+ (args_info, args_offsets) =
+ layoutNativeCall profile
+ NativePrimCall
+ d
+ (primRepCmmType platform . argPrimRep)
+ nv_args
+
+ args_ptrs :: [(Bool, ByteOff)]
+ args_ptrs =
+ map (\(r, off) ->
+ (isFollowableArg (toArgRep platform . argPrimRep $ r), off))
+ args_offsets
+
+ push_target = PUSH_UBX (LitLabel target Nothing IsFunction) 1
+ push_info = PUSH_UBX (mkNativeCallInfoLit platform args_info) 1
+ {-
+ compute size to move payload (without stg_primcall_info header)
+
+ size of arguments plus three words for:
+ - function pointer to the target
+ - call_info word
+ - BCO to describe the stack frame
+ -}
+ szb = wordsToBytes platform (nativeCallSize args_info + 3)
+ go _ pushes [] = return (reverse pushes)
+ go !dd pushes ((a, off):cs) = do (push, szb) <- pushAtom dd p a
+ massert (off == dd + szb)
+ go (dd + szb) (push:pushes) cs
+ push_args <- go d [] args_offsets
+ args_bco <- emitBc (primCallBCO platform args_info args_ptrs)
+ return $ mconcat push_args `appOL`
+ (push_target `consOL`
+ push_info `consOL`
+ PUSH_BCO args_bco `consOL`
+ (mkSlideB platform szb (d - s) `appOL` unitOL PRIMCALL))
+
-- -----------------------------------------------------------------------------
-- Deal with a CCall.
@@ -1259,11 +1354,17 @@ generateCCall
-> Type
-> [StgArg] -- args (atoms)
-> BcM BCInstrList
-generateCCall d0 s p (CCallSpec target cconv safety) result_ty args_r_to_l
+generateCCall d0 s p (CCallSpec target PrimCallConv _) result_ty args
+ | (StaticTarget _ label mb_unit _) <- target
+ = generatePrimCall d0 s p label mb_unit result_ty args
+ | otherwise
+ = panic "GHC.StgToByteCode.generateCCall: primcall convention only supports static targets"
+generateCCall d0 s p (CCallSpec target cconv safety) result_ty args
= do
profile <- getProfile
let
+ args_r_to_l = reverse args
platform = profilePlatform profile
-- useful constants
addr_size_b :: ByteOff
@@ -2007,7 +2108,7 @@ isSupportedCConv :: CCallSpec -> Bool
isSupportedCConv (CCallSpec _ cconv _) = case cconv of
CCallConv -> True -- we explicitly pattern match on every
StdCallConv -> True -- convention to ensure that a warning
- PrimCallConv -> False -- is triggered when a new one is added
+ PrimCallConv -> True -- is triggered when a new one is added
JavaScriptCallConv -> False
CApiConv -> True
diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
index e71c418530..95b7d1c5fd 100644
--- a/compiler/GHC/StgToCmm/Foreign.hs
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -15,8 +15,8 @@ module GHC.StgToCmm.Foreign (
emitLoadThreadState,
emitSaveRegs,
emitRestoreRegs,
- emitPushTupleRegs,
- emitPopTupleRegs,
+ emitPushArgRegs,
+ emitPopArgRegs,
loadThreadState,
emitOpenNursery,
emitCloseNursery,
@@ -349,7 +349,7 @@ emitRestoreRegs = do
-- bytecode interpreter.
--
-- The "live registers" bitmap corresponds to the list of registers given by
--- 'tupleRegsCover', with the least significant bit indicating liveness of
+-- 'allArgRegsCover', with the least significant bit indicating liveness of
-- the first register in the list.
--
-- Each register is saved to a stack slot of one or more machine words, even
@@ -362,12 +362,12 @@ emitRestoreRegs = do
-- if((mask & 2) != 0) { Sp_adj(-1); Sp(0) = R2; }
-- if((mask & 1) != 0) { Sp_adj(-1); Sp(0) = R1; }
--
--- See Note [GHCi tuple layout]
+-- See Note [GHCi and native call registers]
-emitPushTupleRegs :: CmmExpr -> FCode ()
-emitPushTupleRegs regs_live = do
+emitPushArgRegs :: CmmExpr -> FCode ()
+emitPushArgRegs regs_live = do
platform <- getPlatform
- let regs = zip (tupleRegsCover platform) [0..]
+ let regs = zip (allArgRegsCover platform) [0..]
save_arg (reg, n) =
let mask = CmmLit (CmmInt (1 `shiftL` n) (wordWidth platform))
live = cmmAndWord platform regs_live mask
@@ -381,11 +381,11 @@ emitPushTupleRegs regs_live = do
in mkCmmIfThen cond $ catAGraphs [adj_sp, save_reg]
emit . catAGraphs =<< mapM save_arg (reverse regs)
--- | Pop a subset of STG registers from the stack (see 'emitPushTupleRegs')
-emitPopTupleRegs :: CmmExpr -> FCode ()
-emitPopTupleRegs regs_live = do
+-- | Pop a subset of STG registers from the stack (see 'emitPushArgRegs')
+emitPopArgRegs :: CmmExpr -> FCode ()
+emitPopArgRegs regs_live = do
platform <- getPlatform
- let regs = zip (tupleRegsCover platform) [0..]
+ let regs = zip (allArgRegsCover platform) [0..]
save_arg (reg, n) =
let mask = CmmLit (CmmInt (1 `shiftL` n) (wordWidth platform))
live = cmmAndWord platform regs_live mask