From 28e0dca2e93dabee88f28ce38282dbcb8c62ab99 Mon Sep 17 00:00:00 2001 From: Luite Stegeman Date: Tue, 30 Mar 2021 04:06:59 +0200 Subject: Work around LLVM backend overlapping register limitations The stg_ctoi_t and stg_ret_t procedures which convert unboxed tuples between the bytecode an native calling convention were causing a panic when using the LLVM backend. Fixes #19591 --- compiler/GHC/ByteCode/Asm.hs | 72 +++++++++++----------------------------- compiler/GHC/ByteCode/Types.hs | 13 +++----- compiler/GHC/Cmm/CallConv.hs | 14 +++++++- compiler/GHC/Cmm/Parser.y | 3 ++ compiler/GHC/StgToByteCode.hs | 36 ++++++++++---------- compiler/GHC/StgToCmm/Foreign.hs | 59 ++++++++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 80 deletions(-) (limited to 'compiler') diff --git a/compiler/GHC/ByteCode/Asm.hs b/compiler/GHC/ByteCode/Asm.hs index 24b055fe6c..6b9d4a9223 100644 --- a/compiler/GHC/ByteCode/Asm.hs +++ b/compiler/GHC/ByteCode/Asm.hs @@ -39,6 +39,8 @@ import GHC.Data.FastString import GHC.Data.SizedSeq import GHC.StgToCmm.Layout ( ArgRep(..) ) +import GHC.Cmm.Expr +import GHC.Cmm.CallConv ( tupleRegsCover ) import GHC.Platform import GHC.Platform.Profile @@ -392,7 +394,7 @@ assembleI platform i = case i of p <- ioptr (liftM BCOPtrBCO ul_bco) p_tup <- ioptr (liftM BCOPtrBCO ul_tuple_bco) info <- int (fromIntegral $ - mkTupleInfoSig tuple_info) + mkTupleInfoSig platform tuple_info) emit bci_PUSH_ALTS_T [Op p, Op info, Op p_tup] PUSH_PAD8 -> emit bci_PUSH_PAD8 [] @@ -550,22 +552,6 @@ return_ubx V64 = error "return_ubx: vector" maxTupleNativeStackSize :: WordOff maxTupleNativeStackSize = 62 -{- - Maximum number of supported registers for returning tuples. - - If GHC uses more more than these (because of a change in the calling - convention or a new platform) mkTupleInfoSig will panic. - - You can raise the limits after modifying stg_ctoi_t and stg_ret_t - (StgMiscClosures.cmm) to save and restore the additional registers. - -} -maxTupleVanillaRegs, maxTupleFloatRegs, maxTupleDoubleRegs, - maxTupleLongRegs :: Int -maxTupleVanillaRegs = 6 -maxTupleFloatRegs = 6 -maxTupleDoubleRegs = 6 -maxTupleLongRegs = 1 - {- Construct the tuple_info word that stg_ctoi_t and stg_ret_t use to convert a tuple between the native calling convention and the @@ -573,47 +559,29 @@ maxTupleLongRegs = 1 See Note [GHCi tuple layout] for more information. -} -mkTupleInfoSig :: TupleInfo -> Word32 -mkTupleInfoSig ti@TupleInfo{..} - | tupleNativeStackSize > maxTupleNativeStackSize = - pprPanic "mkTupleInfoSig: tuple too big for the bytecode compiler" +mkTupleInfoSig :: Platform -> TupleInfo -> Word32 +mkTupleInfoSig platform TupleInfo{..} + | tupleNativeStackSize > maxTupleNativeStackSize + = pprPanic "mkTupleInfoSig: tuple too big for the bytecode compiler" (ppr tupleNativeStackSize <+> text "stack words." <+> text "Use -fobject-code to get around this limit" ) - | tupleVanillaRegs `shiftR` maxTupleVanillaRegs /= 0 = - pprPanic "mkTupleInfoSig: too many vanilla registers" (ppr tupleVanillaRegs) - | tupleLongRegs `shiftR` maxTupleLongRegs /= 0 = - pprPanic "mkTupleInfoSig: too many long registers" (ppr tupleLongRegs) - | tupleFloatRegs `shiftR` maxTupleFloatRegs /= 0 = - pprPanic "mkTupleInfoSig: too many float registers" (ppr tupleFloatRegs) - | tupleDoubleRegs `shiftR` maxTupleDoubleRegs /= 0 = - pprPanic "mkTupleInfoSig: too many double registers" (ppr tupleDoubleRegs) - {- - Check that we can pack the register counts/bitmaps and stack size - in the information word. In particular we check that each component - fits in the bits we have reserved for it. - - This overlaps with some of the above checks. It's likely that if the - number of registers changes, the number of bits will also need to be - updated. - -} - | tupleNativeStackSize < 16384 && -- 14 bits stack usage - tupleDoubleRegs < 64 && -- 6 bit bitmap (these can be shared with float) - tupleFloatRegs < 64 && -- 6 bit bitmap (these can be shared with double) - tupleLongRegs < 4 && -- 2 bit bitmap - tupleVanillaRegs < 65536 && -- 4 bit count (tupleVanillaRegs is still a bitmap) - -- check that there are no "holes", i.e. that R1..Rn are all in use - tupleVanillaRegs .&. (tupleVanillaRegs + 1) == 0 - = fromIntegral tupleNativeStackSize .|. - unRegBitmap (tupleLongRegs `shiftL` 14) .|. - unRegBitmap (tupleDoubleRegs `shiftL` 16) .|. - unRegBitmap (tupleFloatRegs `shiftL` 22) .|. - fromIntegral (countTrailingZeros (1 + tupleVanillaRegs) `shiftL` 28) - | otherwise = pprPanic "mkTupleInfoSig: unsupported tuple shape" (ppr ti) + | otherwise + = assert (length regs <= 24) {- 24 bits for bitmap -} + assert (tupleNativeStackSize < 255) {- 8 bits for stack size -} + assert (all (`elem` regs) (regSetToList tupleRegs)) {- all regs accounted for -} + foldl' reg_bit 0 (zip regs [0..]) .|. + (fromIntegral tupleNativeStackSize `shiftL` 24) + where + reg_bit :: Word32 -> (GlobalReg, Int) -> Word32 + reg_bit x (r, n) + | r `elemRegSet` tupleRegs = x .|. 1 `shiftL` n + | otherwise = x + regs = tupleRegsCover platform mkTupleInfoLit :: Platform -> TupleInfo -> Literal mkTupleInfoLit platform tuple_info = - mkLitWord platform . fromIntegral $ mkTupleInfoSig tuple_info + mkLitWord platform . fromIntegral $ mkTupleInfoSig platform tuple_info -- Make lists of host-sized words for literals, so that when the -- words are placed in memory at increasing addresses, the diff --git a/compiler/GHC/ByteCode/Types.hs b/compiler/GHC/ByteCode/Types.hs index 02c117d716..673838654d 100644 --- a/compiler/GHC/ByteCode/Types.hs +++ b/compiler/GHC/ByteCode/Types.hs @@ -43,6 +43,7 @@ import qualified Data.IntMap as IntMap import Data.Maybe (catMaybes) import qualified GHC.Exts.Heap as Heap import GHC.Stack.CCS +import GHC.Cmm.Expr ( GlobalRegSet, emptyRegSet, regSetToList ) -- ----------------------------------------------------------------------------- -- Compiled Byte Code @@ -106,10 +107,7 @@ newtype RegBitmap = RegBitmap { unRegBitmap :: Word32 } -} data TupleInfo = TupleInfo { tupleSize :: !WordOff -- total size of tuple in words - , tupleVanillaRegs :: !RegBitmap -- vanilla registers used - , tupleLongRegs :: !RegBitmap -- long registers used - , tupleFloatRegs :: !RegBitmap -- float registers used - , tupleDoubleRegs :: !RegBitmap -- double registers used + , tupleRegs :: !GlobalRegSet , tupleNativeStackSize :: !WordOff {- words spilled on the stack by GHCs native calling convention -} } deriving (Show) @@ -118,14 +116,11 @@ instance Outputable TupleInfo where ppr TupleInfo{..} = text " ppr tupleSize <+> text "stack" <+> ppr tupleNativeStackSize <+> text "regs" <+> - char 'R' <> ppr tupleVanillaRegs <+> - char 'L' <> ppr tupleLongRegs <+> - char 'F' <> ppr tupleFloatRegs <+> - char 'D' <> ppr tupleDoubleRegs <> + ppr (map (text.show) $ regSetToList tupleRegs) <> char '>' voidTupleInfo :: TupleInfo -voidTupleInfo = TupleInfo 0 0 0 0 0 0 +voidTupleInfo = TupleInfo 0 emptyRegSet 0 type ItblEnv = NameEnv (Name, ItblPtr) -- We need the Name in the range so we know which diff --git a/compiler/GHC/Cmm/CallConv.hs b/compiler/GHC/Cmm/CallConv.hs index f4b46a03f2..f376e598bf 100644 --- a/compiler/GHC/Cmm/CallConv.hs +++ b/compiler/GHC/Cmm/CallConv.hs @@ -2,10 +2,12 @@ module GHC.Cmm.CallConv ( ParamLocation(..), assignArgumentsPos, assignStack, - realArgRegsCover + realArgRegsCover, + tupleRegsCover ) where import GHC.Prelude +import Data.List (nub) import GHC.Cmm.Expr import GHC.Runtime.Heap.Layout @@ -219,3 +221,13 @@ realArgRegsCover platform realDoubleRegs platform ++ realLongRegs platform -- we don't save XMM registers if they are not used for parameter passing + +-- Like realArgRegsCover but always includes the node. This covers the real +-- and virtual registers used for unboxed tuples. +-- +-- Note: if anything changes in how registers for unboxed tuples overlap, +-- make sure to also update GHC.StgToByteCode.layoutTuple. + +tupleRegsCover :: Platform -> [GlobalReg] +tupleRegsCover platform = + nub (VanillaReg 1 VGcPtr : realArgRegsCover platform) diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index d182a6f714..490a3c4976 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -1151,6 +1151,9 @@ stmtMacros = listToUFM [ ( fsLit "SAVE_REGS", \[] -> emitSaveRegs ), ( fsLit "RESTORE_REGS", \[] -> emitRestoreRegs ), + ( fsLit "PUSH_TUPLE_REGS", \[live_regs] -> emitPushTupleRegs live_regs ), + ( fsLit "POP_TUPLE_REGS", \[live_regs] -> emitPopTupleRegs live_regs ), + ( fsLit "LDV_ENTER", \[e] -> ldvEnter e ), ( fsLit "LDV_RECORD_CREATE", \[e] -> ldvRecordCreate e ), diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs index e2f48390e5..f7bb270e16 100644 --- a/compiler/GHC/StgToByteCode.hs +++ b/compiler/GHC/StgToByteCode.hs @@ -1262,28 +1262,31 @@ layoutTuple profile start_off arg_ty reps = orig_stk_params = [(x, fromIntegral off) | (x, StackParam off) <- pos] -- sort the register parameters by register and add them to the stack + regs_order :: Map.Map GlobalReg Int + regs_order = Map.fromList $ zip (tupleRegsCover platform) [0..] + + reg_order :: GlobalReg -> (Int, GlobalReg) + reg_order reg | Just n <- Map.lookup reg regs_order = (n, reg) + -- a VanillaReg goes to the same place regardless of whether it + -- contains a pointer + reg_order (VanillaReg n VNonGcPtr) = reg_order (VanillaReg n VGcPtr) + -- if we don't have a position for a FloatReg then they must be passed + -- in the equivalent DoubleReg + reg_order (FloatReg n) = reg_order (DoubleReg n) + -- one-tuples can be passed in other registers, but then we don't need + -- to care about the order + reg_order reg = (0, reg) + (regs, reg_params) = unzip $ sortBy (comparing fst) - [(reg, x) | (x, RegisterParam reg) <- pos] + [(reg_order reg, x) | (x, RegisterParam reg) <- pos] (new_stk_bytes, new_stk_params) = assignStack platform orig_stk_bytes arg_ty reg_params - -- make live register bitmaps - bmp_reg r ~(v, f, d, l) - = case r of VanillaReg n _ -> (a v n, f, d, l ) - FloatReg n -> (v, a f n, d, l ) - DoubleReg n -> (v, f, a d n, l ) - LongReg n -> (v, f, d, a l n) - _ -> - pprPanic "GHC.StgToByteCode.layoutTuple unsupported register type" - (ppr r) - where a bmp n = bmp .|. (1 `shiftL` (n-1)) - - (vanilla_regs, float_regs, double_regs, long_regs) - = foldr bmp_reg (0, 0, 0, 0) regs + regs_set = mkRegSet (map snd regs) get_byte_off (x, StackParam y) = (x, fromIntegral y) get_byte_off _ = @@ -1291,10 +1294,7 @@ layoutTuple profile start_off arg_ty reps = in ( TupleInfo { tupleSize = bytesToWords platform (ByteOff new_stk_bytes) - , tupleVanillaRegs = vanilla_regs - , tupleLongRegs = long_regs - , tupleFloatRegs = float_regs - , tupleDoubleRegs = double_regs + , tupleRegs = regs_set , tupleNativeStackSize = bytesToWords platform (ByteOff orig_stk_bytes) } diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs index 95fa21d648..39f25a7b86 100644 --- a/compiler/GHC/StgToCmm/Foreign.hs +++ b/compiler/GHC/StgToCmm/Foreign.hs @@ -15,6 +15,8 @@ module GHC.StgToCmm.Foreign ( emitLoadThreadState, emitSaveRegs, emitRestoreRegs, + emitPushTupleRegs, + emitPopTupleRegs, loadThreadState, emitOpenNursery, emitCloseNursery, @@ -340,6 +342,63 @@ emitRestoreRegs = do restore = catAGraphs (map (callerRestoreGlobalReg platform) regs) emit restore +-- | Push a subset of STG registers onto the stack, specified by the bitmap +-- +-- Sometimes, a "live" subset of the STG registers needs to be saved on the +-- stack, for example when storing an unboxed tuple to be used in the GHCi +-- bytecode interpreter. +-- +-- The "live registers" bitmap corresponds to the list of registers given by +-- 'tupleRegsCover', with the least significant bit indicating liveness of +-- the first register in the list. +-- +-- Each register is saved to a stack slot of one or more machine words, even +-- if the register size itself is smaller. +-- +-- The resulting Cmm code looks like this, with a line for each real or +-- virtual register used for returning tuples: +-- +-- ... +-- if((mask & 2) != 0) { Sp_adj(-1); Sp(0) = R2; } +-- if((mask & 1) != 0) { Sp_adj(-1); Sp(0) = R1; } +-- +-- See Note [GHCi tuple layout] + +emitPushTupleRegs :: CmmExpr -> FCode () +emitPushTupleRegs regs_live = do + platform <- getPlatform + let regs = zip (tupleRegsCover platform) [0..] + save_arg (reg, n) = + let mask = CmmLit (CmmInt (1 `shiftL` n) (wordWidth platform)) + live = cmmAndWord platform regs_live mask + cond = cmmNeWord platform live (zeroExpr platform) + reg_ty = cmmRegType platform (CmmGlobal reg) + width = roundUpToWords platform + (widthInBytes $ typeWidth reg_ty) + adj_sp = mkAssign spReg + (cmmOffset platform spExpr (negate width)) + save_reg = mkStore spExpr (CmmReg $ CmmGlobal reg) + in mkCmmIfThen cond $ catAGraphs [adj_sp, save_reg] + emit . catAGraphs =<< mapM save_arg (reverse regs) + +-- | Pop a subset of STG registers from the stack (see 'emitPushTupleRegs') +emitPopTupleRegs :: CmmExpr -> FCode () +emitPopTupleRegs regs_live = do + platform <- getPlatform + let regs = zip (tupleRegsCover platform) [0..] + save_arg (reg, n) = + let mask = CmmLit (CmmInt (1 `shiftL` n) (wordWidth platform)) + live = cmmAndWord platform regs_live mask + cond = cmmNeWord platform live (zeroExpr platform) + reg_ty = cmmRegType platform (CmmGlobal reg) + width = roundUpToWords platform + (widthInBytes $ typeWidth reg_ty) + adj_sp = mkAssign spReg + (cmmOffset platform spExpr width) + restore_reg = mkAssign (CmmGlobal reg) (CmmLoad spExpr reg_ty) + in mkCmmIfThen cond $ catAGraphs [restore_reg, adj_sp] + emit . catAGraphs =<< mapM save_arg regs + emitCloseNursery :: FCode () emitCloseNursery = do -- cgit v1.2.1