diff options
author | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
---|---|---|
committer | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
commit | 84c2ad99582391005b5e873198b15e9e9eb4f78d (patch) | |
tree | caa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/codeGen | |
parent | 8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff) | |
parent | e68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff) | |
download | haskell-wip/T13904.tar.gz |
update to current master againwip/T13904
Diffstat (limited to 'compiler/codeGen')
27 files changed, 573 insertions, 304 deletions
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 7184153f10..6a2840294a 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, GADTs #-} +{-# LANGUAGE GADTs #-} ----------------------------------------------------------------------------- -- @@ -10,7 +10,7 @@ module CgUtils ( fixStgRegisters ) where -#include "HsVersions.h" +import GhcPrelude import CodeGen.Platform import Cmm @@ -116,7 +116,7 @@ regTableOffset dflags n = get_Regtable_addr_from_offset :: DynFlags -> CmmType -> Int -> CmmExpr get_Regtable_addr_from_offset dflags _ offset = if haveRegBase (targetPlatform dflags) - then CmmRegOff (CmmGlobal BaseReg) offset + then CmmRegOff baseReg offset else regTableOffset dflags offset -- | Fixup global registers so that they assign to locations within the diff --git a/compiler/codeGen/CodeGen/Platform.hs b/compiler/codeGen/CodeGen/Platform.hs index 80452d0585..3014a0596f 100644 --- a/compiler/codeGen/CodeGen/Platform.hs +++ b/compiler/codeGen/CodeGen/Platform.hs @@ -3,6 +3,8 @@ module CodeGen.Platform (callerSaves, activeStgRegs, haveRegBase, globalRegMaybe, freeReg) where +import GhcPrelude + import CmmExpr import Platform import Reg diff --git a/compiler/codeGen/CodeGen/Platform/ARM.hs b/compiler/codeGen/CodeGen/Platform/ARM.hs index 5d1148496c..a2cb476e04 100644 --- a/compiler/codeGen/CodeGen/Platform/ARM.hs +++ b/compiler/codeGen/CodeGen/Platform/ARM.hs @@ -2,6 +2,8 @@ module CodeGen.Platform.ARM where +import GhcPrelude + #define MACHREGS_NO_REGS 0 #define MACHREGS_arm 1 #include "../../../../includes/CodeGen.Platform.hs" diff --git a/compiler/codeGen/CodeGen/Platform/ARM64.hs b/compiler/codeGen/CodeGen/Platform/ARM64.hs index c3ebeda6bf..6ace181356 100644 --- a/compiler/codeGen/CodeGen/Platform/ARM64.hs +++ b/compiler/codeGen/CodeGen/Platform/ARM64.hs @@ -2,6 +2,8 @@ module CodeGen.Platform.ARM64 where +import GhcPrelude + #define MACHREGS_NO_REGS 0 #define MACHREGS_aarch64 1 #include "../../../../includes/CodeGen.Platform.hs" diff --git a/compiler/codeGen/CodeGen/Platform/NoRegs.hs b/compiler/codeGen/CodeGen/Platform/NoRegs.hs index 0c85ffbda7..4c074ee313 100644 --- a/compiler/codeGen/CodeGen/Platform/NoRegs.hs +++ b/compiler/codeGen/CodeGen/Platform/NoRegs.hs @@ -2,6 +2,8 @@ module CodeGen.Platform.NoRegs where +import GhcPrelude + #define MACHREGS_NO_REGS 1 #include "../../../../includes/CodeGen.Platform.hs" diff --git a/compiler/codeGen/CodeGen/Platform/PPC.hs b/compiler/codeGen/CodeGen/Platform/PPC.hs index 76a2b020ac..f7eae6b4ca 100644 --- a/compiler/codeGen/CodeGen/Platform/PPC.hs +++ b/compiler/codeGen/CodeGen/Platform/PPC.hs @@ -2,6 +2,8 @@ module CodeGen.Platform.PPC where +import GhcPrelude + #define MACHREGS_NO_REGS 0 #define MACHREGS_powerpc 1 #include "../../../../includes/CodeGen.Platform.hs" diff --git a/compiler/codeGen/CodeGen/Platform/PPC_Darwin.hs b/compiler/codeGen/CodeGen/Platform/PPC_Darwin.hs index a98e558cc1..91923fd453 100644 --- a/compiler/codeGen/CodeGen/Platform/PPC_Darwin.hs +++ b/compiler/codeGen/CodeGen/Platform/PPC_Darwin.hs @@ -2,6 +2,8 @@ module CodeGen.Platform.PPC_Darwin where +import GhcPrelude + #define MACHREGS_NO_REGS 0 #define MACHREGS_powerpc 1 #define MACHREGS_darwin 1 diff --git a/compiler/codeGen/CodeGen/Platform/SPARC.hs b/compiler/codeGen/CodeGen/Platform/SPARC.hs index 991f515eaf..5d8dbb1da9 100644 --- a/compiler/codeGen/CodeGen/Platform/SPARC.hs +++ b/compiler/codeGen/CodeGen/Platform/SPARC.hs @@ -2,6 +2,8 @@ module CodeGen.Platform.SPARC where +import GhcPrelude + #define MACHREGS_NO_REGS 0 #define MACHREGS_sparc 1 #include "../../../../includes/CodeGen.Platform.hs" diff --git a/compiler/codeGen/CodeGen/Platform/X86.hs b/compiler/codeGen/CodeGen/Platform/X86.hs index e74807ff88..84d52c1585 100644 --- a/compiler/codeGen/CodeGen/Platform/X86.hs +++ b/compiler/codeGen/CodeGen/Platform/X86.hs @@ -2,6 +2,8 @@ module CodeGen.Platform.X86 where +import GhcPrelude + #define MACHREGS_NO_REGS 0 #define MACHREGS_i386 1 #include "../../../../includes/CodeGen.Platform.hs" diff --git a/compiler/codeGen/CodeGen/Platform/X86_64.hs b/compiler/codeGen/CodeGen/Platform/X86_64.hs index 102132d679..1b2b5549ac 100644 --- a/compiler/codeGen/CodeGen/Platform/X86_64.hs +++ b/compiler/codeGen/CodeGen/Platform/X86_64.hs @@ -2,6 +2,8 @@ module CodeGen.Platform.X86_64 where +import GhcPrelude + #define MACHREGS_NO_REGS 0 #define MACHREGS_x86_64 1 #include "../../../../includes/CodeGen.Platform.hs" diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index d92b410a7f..60be1ca01b 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -12,6 +12,8 @@ module StgCmm ( codeGen ) where #include "HsVersions.h" +import GhcPrelude as Prelude + import StgCmmProf (initCostCentres, ldvEnter) import StgCmmMonad import StgCmmEnv @@ -233,8 +235,8 @@ maybeExternaliseId dflags id | gopt Opt_SplitObjs dflags, -- See Note [Externalise when splitting] -- in StgCmmMonad isInternalName name = do { mod <- getModuleName - ; returnFC (setIdName id (externalise mod)) } - | otherwise = returnFC id + ; return (setIdName id (externalise mod)) } + | otherwise = return id where externalise mod = mkExternalName uniq mod new_occ loc name = idName id diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs index 969e14f79e..2ea04079d0 100644 --- a/compiler/codeGen/StgCmmArgRep.hs +++ b/compiler/codeGen/StgCmmArgRep.hs @@ -15,6 +15,8 @@ module StgCmmArgRep ( ) where +import GhcPrelude + import StgCmmClosure ( idPrimRep ) import SMRep ( WordOff ) diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 31775d6624..aa2b954a95 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ----------------------------------------------------------------------------- -- -- Stg to C-- code generation: bindings @@ -15,14 +13,14 @@ module StgCmmBind ( pushUpdateFrame, emitUpdateFrame ) where -#include "HsVersions.h" +import GhcPrelude hiding ((<*>)) import StgCmmExpr import StgCmmMonad import StgCmmEnv import StgCmmCon import StgCmmHeap -import StgCmmProf (curCCS, ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk, +import StgCmmProf (ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk, initUpdFrameProf) import StgCmmTicky import StgCmmLayout @@ -53,8 +51,6 @@ import DynFlags import Control.Monad -import Prelude hiding ((<*>)) - ------------------------------------------------------------------------ -- Top-level bindings ------------------------------------------------------------------------ @@ -99,21 +95,20 @@ cgTopRhsClosure dflags rec id ccs _ upd_flag args body = emitDataLits closure_label closure_rep return () - gen_code dflags lf_info closure_label - = do { -- LAY OUT THE OBJECT - let name = idName id + gen_code dflags lf_info _closure_label + = do { let name = idName id ; mod_name <- getModuleName ; let descr = closureDescription dflags mod_name name closure_info = mkClosureInfo dflags True id lf_info 0 0 descr - caffy = idCafInfo id - info_tbl = mkCmmInfo closure_info -- XXX short-cut - closure_rep = mkStaticClosureFields dflags info_tbl ccs caffy [] + -- We don't generate the static closure here, because we might + -- want to add references to static closures to it later. The + -- static closure is generated by CmmBuildInfoTables.updInfoSRTs, + -- See Note [SRTs], specifically the [FUN] optimisation. - -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) - ; emitDataLits closure_label closure_rep - ; let fv_details :: [(NonVoid Id, VirtualHpOffset)] - (_, _, fv_details) = mkVirtHeapOffsets dflags (isLFThunk lf_info) [] + ; let fv_details :: [(NonVoid Id, ByteOff)] + header = if isLFThunk lf_info then ThunkHeader else StdHeader + (_, _, fv_details) = mkVirtHeapOffsets dflags header [] -- Don't drop the non-void args until the closure info has been made ; forkClosureBody (closureCodeBody True id closure_info ccs (nonVoidIds args) (length args) body fv_details) @@ -350,9 +345,9 @@ mkRhsClosure dflags bndr cc _ fvs upd_flag args body ; let name = idName bndr descr = closureDescription dflags mod_name name fv_details :: [(NonVoid Id, ByteOff)] + header = if isLFThunk lf_info then ThunkHeader else StdHeader (tot_wds, ptr_wds, fv_details) - = mkVirtHeapOffsets dflags (isLFThunk lf_info) - (addIdReps reduced_fvs) + = mkVirtHeapOffsets dflags header (addIdReps reduced_fvs) closure_info = mkClosureInfo dflags False -- Not static bndr lf_info tot_wds ptr_wds descr @@ -367,10 +362,10 @@ mkRhsClosure dflags bndr cc _ fvs upd_flag args body -- BUILD THE OBJECT -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body - ; let use_cc = curCCS; blame_cc = curCCS + ; let use_cc = cccsExpr; blame_cc = cccsExpr ; emit (mkComment $ mkFastString "calling allocDynClosure") ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off) - ; let info_tbl = mkCmmInfo closure_info + ; let info_tbl = mkCmmInfo closure_info bndr currentCCS ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info use_cc blame_cc (map toVarArg fv_details) @@ -395,9 +390,10 @@ cgRhsStdThunk bndr lf_info payload { -- LAY OUT THE OBJECT mod_name <- getModuleName ; dflags <- getDynFlags - ; let (tot_wds, ptr_wds, payload_w_offsets) - = mkVirtHeapOffsets dflags (isLFThunk lf_info) - (addArgReps (nonVoidStgArgs payload)) + ; let header = if isLFThunk lf_info then ThunkHeader else StdHeader + (tot_wds, ptr_wds, payload_w_offsets) + = mkVirtHeapOffsets dflags header + (addArgReps (nonVoidStgArgs payload)) descr = closureDescription dflags mod_name (idName bndr) closure_info = mkClosureInfo dflags False -- Not static @@ -405,11 +401,11 @@ cgRhsStdThunk bndr lf_info payload descr -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body - ; let use_cc = curCCS; blame_cc = curCCS + ; let use_cc = cccsExpr; blame_cc = cccsExpr -- BUILD THE OBJECT - ; let info_tbl = mkCmmInfo closure_info + ; let info_tbl = mkCmmInfo closure_info bndr currentCCS ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info use_cc blame_cc payload_w_offsets @@ -465,7 +461,7 @@ closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details \(_, node, _) -> thunkCode cl_info fv_details cc node arity body where lf_info = closureLFInfo cl_info - info_tbl = mkCmmInfo cl_info + info_tbl = mkCmmInfo cl_info bndr cc closureCodeBody top_lvl bndr cl_info cc args arity body fv_details = -- Note: args may be [], if all args are Void @@ -476,7 +472,7 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details ; let lf_info = closureLFInfo cl_info - info_tbl = mkCmmInfo cl_info + info_tbl = mkCmmInfo cl_info bndr cc -- Emit the main entry code ; emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args $ @@ -632,8 +628,7 @@ emitBlackHoleCode node = do -- work with profiling. when eager_blackholing $ do - emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) - (CmmReg (CmmGlobal CurrentTSO)) + emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr emitPrimCall [] MO_WriteBarrier [] emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo)) @@ -718,7 +713,7 @@ link_caf node _is_upd = do ForeignLabelInExternalPackage IsFunction ; bh <- newTemp (bWord dflags) ; emitRtsCallGen [(bh,AddrHint)] newCAF_lbl - [ (CmmReg (CmmGlobal BaseReg), AddrHint), + [ (baseExpr, AddrHint), (CmmReg (CmmLocal node), AddrHint) ] False diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 8eaee795a5..6f0feaa557 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -27,7 +27,7 @@ module StgCmmClosure ( mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, mkLFStringLit, lfDynTag, - maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable, + isLFThunk, isLFReEntrant, lfUpdatable, -- * Used by other modules CgLoc(..), SelfLoopInfo, CallMethod(..), @@ -66,11 +66,14 @@ module StgCmmClosure ( #include "HsVersions.h" +import GhcPrelude + import StgSyn import SMRep import Cmm import PprCmmExpr() +import CostCentre import BlockId import CLabel import Id @@ -384,11 +387,6 @@ lfDynTag _ _other = 0 -- Observing LambdaFormInfo ----------------------------------------------------------------------------- -------------- -maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon -maybeIsLFCon (LFCon con) = Just con -maybeIsLFCon _ = Nothing - ------------ isLFThunk :: LambdaFormInfo -> Bool isLFThunk (LFThunk {}) = True @@ -748,12 +746,15 @@ data ClosureInfo } -- | Convert from 'ClosureInfo' to 'CmmInfoTable'. -mkCmmInfo :: ClosureInfo -> CmmInfoTable -mkCmmInfo ClosureInfo {..} +mkCmmInfo :: ClosureInfo -> Id -> CostCentreStack -> CmmInfoTable +mkCmmInfo ClosureInfo {..} id ccs = CmmInfoTable { cit_lbl = closureInfoLabel , cit_rep = closureSMRep , cit_prof = closureProf - , cit_srt = NoC_SRT } + , cit_srt = Nothing + , cit_clo = if isStaticRep closureSMRep + then Just (id,ccs) + else Nothing } -------------------------------------- -- Building ClosureInfos @@ -1038,7 +1039,8 @@ mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds = CmmInfoTable { cit_lbl = info_lbl , cit_rep = sm_rep , cit_prof = prof - , cit_srt = NoC_SRT } + , cit_srt = Nothing + , cit_clo = Nothing } where name = dataConName data_con info_lbl = mkConInfoTableLabel name NoCafRefs @@ -1061,14 +1063,16 @@ cafBlackHoleInfoTable = CmmInfoTable { cit_lbl = mkCAFBlackHoleInfoTableLabel , cit_rep = blackHoleRep , cit_prof = NoProfilingInfo - , cit_srt = NoC_SRT } + , cit_srt = Nothing + , cit_clo = Nothing } indStaticInfoTable :: CmmInfoTable indStaticInfoTable = CmmInfoTable { cit_lbl = mkIndStaticInfoLabel , cit_rep = indStaticRep , cit_prof = NoProfilingInfo - , cit_srt = NoC_SRT } + , cit_srt = Nothing + , cit_clo = Nothing } staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool -- A static closure needs a link field to aid the GC when traversing @@ -1079,4 +1083,4 @@ staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool -- of the SRT. staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep } | isConRep smrep = not (isStaticNoCafCon smrep) - | otherwise = has_srt -- needsSRT (cit_srt info_tbl) + | otherwise = has_srt diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index a76b8cc0a0..a8ec300157 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -17,6 +17,8 @@ module StgCmmCon ( #include "HsVersions.h" +import GhcPrelude + import StgSyn import CoreSyn ( AltCon(..) ) @@ -26,9 +28,9 @@ import StgCmmHeap import StgCmmLayout import StgCmmUtils import StgCmmClosure -import StgCmmProf ( curCCS ) import CmmExpr +import CmmUtils import CLabel import MkGraph import SMRep @@ -79,7 +81,15 @@ cgTopRhsCon dflags id con args = ; let (tot_wds, -- #ptr_wds + #nonptr_wds ptr_wds, -- #ptr_wds - nv_args_w_offsets) = mkVirtConstrOffsets dflags (addArgReps args) + nv_args_w_offsets) = + mkVirtHeapOffsetsWithPadding dflags StdHeader (addArgReps args) + + mk_payload (Padding len _) = return (CmmInt 0 (widthFromBytes len)) + mk_payload (FieldOff arg _) = do + amode <- getArgAmode arg + case amode of + CmmLit lit -> return lit + _ -> panic "StgCmmCon.cgTopRhsCon" nonptr_wds = tot_wds - ptr_wds @@ -88,10 +98,8 @@ cgTopRhsCon dflags id con args = -- needs to poke around inside it. info_tbl = mkDataConInfoTable dflags con True ptr_wds nonptr_wds - get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg - ; return lit } - ; payload <- mapM get_lit nv_args_w_offsets + ; payload <- mapM mk_payload nv_args_w_offsets -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs -- NB2: all the amodes should be Lits! -- TODO (osa): Why? @@ -191,8 +199,8 @@ because they don't support cross package data references well. buildDynCon' dflags platform binder _ _cc con [arg] | maybeIntLikeCon con - , platformOS platform /= OSMinGW32 || not (gopt Opt_PIC dflags) - , NonVoid (StgLitArg (MachInt val)) <- arg + , platformOS platform /= OSMinGW32 || not (positionIndependent dflags) + , NonVoid (StgLitArg (LitNumber LitNumInt val _)) <- arg , val <= fromIntegral (mAX_INTLIKE dflags) -- Comparisons at type Integer! , val >= fromIntegral (mIN_INTLIKE dflags) -- ...ditto... = do { let intlike_lbl = mkCmmClosureLabel rtsUnitId (fsLit "stg_INTLIKE") @@ -205,7 +213,7 @@ buildDynCon' dflags platform binder _ _cc con [arg] buildDynCon' dflags platform binder _ _cc con [arg] | maybeCharLikeCon con - , platformOS platform /= OSMinGW32 || not (gopt Opt_PIC dflags) + , platformOS platform /= OSMinGW32 || not (positionIndependent dflags) , NonVoid (StgLitArg (MachChar val)) <- arg , let val_int = ord val :: Int , val_int <= mAX_CHARLIKE dflags @@ -239,7 +247,7 @@ buildDynCon' dflags _ binder actually_bound ccs con args ; return (mkRhsInit dflags reg lf_info hp_plus_n) } where use_cc -- cost-centre to stick in the object - | isCurrentCCS ccs = curCCS + | isCurrentCCS ccs = cccsExpr | otherwise = panic "buildDynCon: non-current CCS not implemented" blame_cc = use_cc -- cost-centre on which to blame the alloc (same) @@ -262,7 +270,7 @@ bindConArgs (DataAlt con) base args -- The binding below forces the masking out of the tag bits -- when accessing the constructor field. - bind_arg :: (NonVoid Id, VirtualHpOffset) -> FCode (Maybe LocalReg) + bind_arg :: (NonVoid Id, ByteOff) -> FCode (Maybe LocalReg) bind_arg (arg@(NonVoid b), offset) | isDeadBinder b = -- Do not load unused fields from objects to local variables. diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index 3061fb351b..f27728189f 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -24,6 +24,8 @@ module StgCmmEnv ( #include "HsVersions.h" +import GhcPrelude + import TyCon import StgCmmMonad import StgCmmUtils diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 6e6ad7e9d7..22fcfaf412 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -1,5 +1,4 @@ {-# LANGUAGE CPP #-} -{-# OPTIONS_GHC -fno-warn-unused-do-bind #-} ----------------------------------------------------------------------------- -- @@ -13,6 +12,8 @@ module StgCmmExpr ( cgExpr ) where #include "HsVersions.h" +import GhcPrelude hiding ((<*>)) + import {-# SOURCE #-} StgCmmBind ( cgBind ) import StgCmmMonad @@ -51,8 +52,6 @@ import Control.Monad (unless,void) import Control.Arrow (first) import Data.Function ( on ) -import Prelude hiding ((<*>)) - ------------------------------------------------------------------------ -- cgExpr: the main function ------------------------------------------------------------------------ @@ -61,7 +60,8 @@ cgExpr :: StgExpr -> FCode ReturnKind cgExpr (StgApp fun args) = cgIdApp fun args -{- seq# a s ==> a -} +-- seq# a s ==> a +-- See Note [seq# magic] in PrelRules cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) = cgIdApp a [] @@ -409,7 +409,8 @@ cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts ; v_info <- getCgIdInfo v ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr))) (idInfoToAmode v_info) - ; bindArgToReg (NonVoid bndr) + -- Add bndr to the environment + ; _ <- bindArgToReg (NonVoid bndr) ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts } where reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr @@ -435,7 +436,8 @@ it would be better to invoke some kind of panic function here. cgCase scrut@(StgApp v []) _ (PrimAlt _) _ = do { dflags <- getDynFlags ; mb_cc <- maybeSaveCostCentre True - ; withSequel (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut) + ; _ <- withSequel + (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut) ; restoreCurrentCostCentre mb_cc ; emitComment $ mkFastString "should be unreachable code" ; l <- newBlockId @@ -446,13 +448,14 @@ cgCase scrut@(StgApp v []) _ (PrimAlt _) _ {- Note [Handle seq#] ~~~~~~~~~~~~~~~~~~~~~ -case seq# a s of v - (# s', a' #) -> e +See Note [seq# magic] in PrelRules. +The special case for seq# in cgCase does this: + case seq# a s of v + (# s', a' #) -> e ==> - -case a of v - (# s', a' #) -> e + case a of v + (# s', a' #) -> e (taking advantage of the fact that the return convention for (# State#, a #) is the same as the return convention for just 'a') @@ -460,6 +463,7 @@ is the same as the return convention for just 'a') cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts = -- Note [Handle seq#] + -- And see Note [seq# magic] in PrelRules -- Use the same return convention as vanilla 'a'. cgCase (StgApp a []) bndr alt_type alts @@ -616,13 +620,12 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts branches' = [(tag+1,branch) | (tag,branch) <- branches] emitSwitch tag_expr branches' mb_deflt 1 fam_sz - else -- No, get tag from info table - do dflags <- getDynFlags - let -- Note that ptr _always_ has tag 1 - -- when the family size is big enough - untagged_ptr = cmmRegOffB bndr_reg (-1) - tag_expr = getConstrTag dflags (untagged_ptr) - emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) + else -- No, get tag from info table + let -- Note that ptr _always_ has tag 1 + -- when the family size is big enough + untagged_ptr = cmmRegOffB bndr_reg (-1) + tag_expr = getConstrTag dflags (untagged_ptr) + in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) ; return AssignedDirectly } diff --git a/compiler/codeGen/StgCmmExtCode.hs b/compiler/codeGen/StgCmmExtCode.hs index f12ada242b..551535d758 100644 --- a/compiler/codeGen/StgCmmExtCode.hs +++ b/compiler/codeGen/StgCmmExtCode.hs @@ -36,6 +36,8 @@ module StgCmmExtCode ( where +import GhcPrelude + import qualified StgCmmMonad as F import StgCmmMonad (FCode, newUnique) diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 2e3ed39a37..c1103e7d77 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ----------------------------------------------------------------------------- -- -- Code generation for foreign calls. @@ -20,10 +18,10 @@ module StgCmmForeign ( emitCloseNursery, ) where -#include "HsVersions.h" +import GhcPrelude hiding( succ, (<*>) ) import StgSyn -import StgCmmProf (storeCurCCS, ccsType, curCCS) +import StgCmmProf (storeCurCCS, ccsType) import StgCmmEnv import StgCmmMonad import StgCmmUtils @@ -48,8 +46,6 @@ import BasicTypes import Control.Monad -import Prelude hiding( succ, (<*>) ) - ----------------------------------------------------------------------------- -- Code generation for Foreign Calls ----------------------------------------------------------------------------- @@ -287,7 +283,7 @@ saveThreadState dflags = do close_nursery <- closeNursery dflags tso pure $ catAGraphs [ -- tso = CurrentTSO; - mkAssign (CmmLocal tso) stgCurrentTSO, + mkAssign (CmmLocal tso) currentTSOExpr, -- tso->stackobj->sp = Sp; mkStore (cmmOffset dflags (CmmLoad (cmmOffset dflags @@ -295,11 +291,11 @@ saveThreadState dflags = do (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) - stgSp, + spExpr, close_nursery, -- and save the current cost centre stack in the TSO when profiling: if gopt Opt_SccProfilingOn dflags then - mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) curCCS + mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) cccsExpr else mkNop ] @@ -308,7 +304,7 @@ emitCloseNursery = do dflags <- getDynFlags tso <- newTemp (bWord dflags) code <- closeNursery dflags tso - emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> code + emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code {- | @closeNursery dflags tso@ produces code to close the nursery. @@ -336,14 +332,14 @@ closeNursery df tso = do let tsoreg = CmmLocal tso cnreg <- CmmLocal <$> newTemp (bWord df) pure $ catAGraphs [ - mkAssign cnreg stgCurrentNursery, + mkAssign cnreg currentNurseryExpr, -- CurrentNursery->free = Hp+1; - mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df stgHp 1), + mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df hpExpr 1), let alloc = CmmMachOp (mo_wordSub df) - [ cmmOffsetW df stgHp 1 + [ cmmOffsetW df hpExpr 1 , CmmLoad (nursery_bdescr_start df cnreg) (bWord df) ] @@ -370,18 +366,18 @@ loadThreadState dflags = do open_nursery <- openNursery dflags tso pure $ catAGraphs [ -- tso = CurrentTSO; - mkAssign (CmmLocal tso) stgCurrentTSO, + mkAssign (CmmLocal tso) currentTSOExpr, -- stack = tso->stackobj; mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)), -- Sp = stack->sp; - mkAssign sp (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)), + mkAssign spReg (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)), -- SpLim = stack->stack + RESERVED_STACK_WORDS; - mkAssign spLim (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags)) + mkAssign spLimReg (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags)) (rESERVED_STACK_WORDS dflags)), -- HpAlloc = 0; -- HpAlloc is assumed to be set to non-zero only by a failed -- a heap check, see HeapStackCheck.cmm:GC_GENERIC - mkAssign hpAlloc (zeroExpr dflags), + mkAssign hpAllocReg (zeroExpr dflags), open_nursery, -- and load the current cost centre stack from the TSO when profiling: if gopt Opt_SccProfilingOn dflags @@ -397,7 +393,7 @@ emitOpenNursery = do dflags <- getDynFlags tso <- newTemp (bWord dflags) code <- openNursery dflags tso - emit $ mkAssign (CmmLocal tso) stgCurrentTSO <*> code + emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code {- | @openNursery dflags tso@ produces code to open the nursery. A local register @@ -408,8 +404,8 @@ Opening the nursery corresponds to the following code: @ tso = CurrentTSO; cn = CurrentNursery; - bdfree = CurrentNuresry->free; - bdstart = CurrentNuresry->start; + bdfree = CurrentNursery->free; + bdstart = CurrentNursery->start; // We *add* the currently occupied portion of the nursery block to // the allocation limit, because we will subtract it again in @@ -439,17 +435,17 @@ openNursery df tso = do -- what code we generate, look at the assembly for -- stg_returnToStackTop in rts/StgStartup.cmm. pure $ catAGraphs [ - mkAssign cnreg stgCurrentNursery, + mkAssign cnreg currentNurseryExpr, mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)), -- Hp = CurrentNursery->free - 1; - mkAssign hp (cmmOffsetW df (CmmReg bdfreereg) (-1)), + mkAssign hpReg (cmmOffsetW df (CmmReg bdfreereg) (-1)), mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)), -- HpLim = CurrentNursery->start + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; - mkAssign hpLim + mkAssign hpLimReg (cmmOffsetExpr df (CmmReg bdstartreg) (cmmOffset df @@ -496,21 +492,6 @@ stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags) closureField :: DynFlags -> ByteOff -> ByteOff closureField dflags off = off + fixedHdrSize dflags -stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr -stgSp = CmmReg sp -stgHp = CmmReg hp -stgCurrentTSO = CmmReg currentTSO -stgCurrentNursery = CmmReg currentNursery - -sp, spLim, hp, hpLim, currentTSO, currentNursery, hpAlloc :: CmmReg -sp = CmmGlobal Sp -spLim = CmmGlobal SpLim -hp = CmmGlobal Hp -hpLim = CmmGlobal HpLim -currentTSO = CmmGlobal CurrentTSO -currentNursery = CmmGlobal CurrentNursery -hpAlloc = CmmGlobal HpAlloc - -- ----------------------------------------------------------------------------- -- For certain types passed to foreign calls, we adjust the actual -- value passed to the call. For ByteArray#/Array# we pass the diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index db62985e3c..3be35b35fa 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ----------------------------------------------------------------------------- -- -- Stg to C--: heap management functions @@ -22,7 +20,7 @@ module StgCmmHeap ( emitSetDynHdr ) where -#include "HsVersions.h" +import GhcPrelude hiding ((<*>)) import StgSyn import CLabel @@ -49,8 +47,6 @@ import DynFlags import FastString( mkFastString, fsLit ) import Panic( sorry ) -import Prelude hiding ((<*>)) - import Control.Monad (when) import Data.Maybe (isJust) @@ -149,7 +145,7 @@ emitSetDynHdr base info_ptr ccs where header :: DynFlags -> [CmmExpr] header dflags = [info_ptr] ++ dynProfHdr dflags ccs - -- ToDof: Parallel stuff + -- ToDo: Parallel stuff -- No ticky header -- Store the item (expr,off) in base[off] @@ -221,24 +217,11 @@ mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit] mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field = [CmmLabel info_lbl] ++ staticProfHdr dflags ccs - ++ concatMap (padLitToWord dflags) payload + ++ payload ++ padding ++ static_link_field ++ saved_info_field --- JD: Simon had ellided this padding, but without it the C back end asserts --- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary? -padLitToWord :: DynFlags -> CmmLit -> [CmmLit] -padLitToWord dflags lit = lit : padding pad_length - where width = typeWidth (cmmLitType dflags lit) - pad_length = wORD_SIZE dflags - widthInBytes width :: Int - - padding n | n <= 0 = [] - | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1) - | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2) - | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4) - | otherwise = CmmInt 0 W64 : padding (n-8) - ----------------------------------------------------------- -- Heap overflow checking ----------------------------------------------------------- @@ -616,7 +599,7 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do let Just alloc_lit = mb_alloc_lit - bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit + bump_hp = cmmOffsetExprB dflags hpExpr alloc_lit -- Sp overflow if ((old + 0) - CmmHighStack < SpLim) -- At the beginning of a function old + 0 = Sp @@ -630,10 +613,9 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do -- Hp overflow if (Hp > HpLim) -- (Hp has been incremented by now) -- HpLim points to the LAST WORD of valid allocation space. - hp_oflo = CmmMachOp (mo_wordUGt dflags) - [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] + hp_oflo = CmmMachOp (mo_wordUGt dflags) [hpExpr, hpLimExpr] - alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit + alloc_n = mkAssign hpAllocReg alloc_lit case mb_stk_hwm of Nothing -> return () @@ -658,7 +640,7 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do when (checkYield && not (gopt Opt_OmitYields dflags)) $ do -- Yielding if HpLim == 0 let yielding = CmmMachOp (mo_wordEq dflags) - [CmmReg (CmmGlobal HpLim), + [CmmReg hpLimReg, CmmLit (zeroCLit dflags)] emit =<< mkCmmIfGoto' yielding gc_id (Just False) diff --git a/compiler/codeGen/StgCmmHpc.hs b/compiler/codeGen/StgCmmHpc.hs index c8e65ad126..8e9676bd33 100644 --- a/compiler/codeGen/StgCmmHpc.hs +++ b/compiler/codeGen/StgCmmHpc.hs @@ -8,6 +8,8 @@ module StgCmmHpc ( initHpc, mkTickBox ) where +import GhcPrelude + import StgCmmMonad import MkGraph diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index b123420d58..78a7cf3f85 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} ----------------------------------------------------------------------------- @@ -17,7 +18,13 @@ module StgCmmLayout ( slowCall, directCall, - mkVirtHeapOffsets, mkVirtConstrOffsets, mkVirtConstrSizes, getHpRelOffset, + FieldOffOrPadding(..), + ClosureHeader(..), + mkVirtHeapOffsets, + mkVirtHeapOffsetsWithPadding, + mkVirtConstrOffsets, + mkVirtConstrSizes, + getHpRelOffset, ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep ) where @@ -25,7 +32,7 @@ module StgCmmLayout ( #include "HsVersions.h" -import Prelude hiding ((<*>)) +import GhcPrelude hiding ((<*>)) import StgCmmClosure import StgCmmEnv @@ -33,7 +40,6 @@ import StgCmmArgRep -- notably: ( slowCallPattern ) import StgCmmTicky import StgCmmMonad import StgCmmUtils -import StgCmmProf (curCCS) import MkGraph import SMRep @@ -44,7 +50,7 @@ import CmmInfo import CLabel import StgSyn import Id -import TyCon ( PrimRep(..) ) +import TyCon ( PrimRep(..), primRepSizeB ) import BasicTypes ( RepArity ) import DynFlags import Module @@ -367,7 +373,7 @@ slowArgs dflags args -- careful: reps contains voids (V), but args does not stg_ap_pat = mkCmmRetInfoLabel rtsUnitId arg_pat this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args - save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)] + save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just cccsExpr)] save_cccs_lbl = mkCmmRetInfoLabel rtsUnitId (fsLit "stg_restore_cccs") ------------------------------------------------------------------------- @@ -387,30 +393,47 @@ getHpRelOffset virtual_offset hp_usg <- getHpUsage return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset)) -mkVirtHeapOffsets +data FieldOffOrPadding a + = FieldOff (NonVoid a) -- Something that needs an offset. + ByteOff -- Offset in bytes. + | Padding ByteOff -- Length of padding in bytes. + ByteOff -- Offset in bytes. + +-- | Used to tell the various @mkVirtHeapOffsets@ functions what kind +-- of header the object has. This will be accounted for in the +-- offsets of the fields returned. +data ClosureHeader + = NoHeader + | StdHeader + | ThunkHeader + +mkVirtHeapOffsetsWithPadding :: DynFlags - -> Bool -- True <=> is a thunk - -> [NonVoid (PrimRep,a)] -- Things to make offsets for - -> (WordOff, -- _Total_ number of words allocated - WordOff, -- Number of words allocated for *pointers* - [(NonVoid a, ByteOff)]) + -> ClosureHeader -- What kind of header to account for + -> [NonVoid (PrimRep, a)] -- Things to make offsets for + -> ( WordOff -- Total number of words allocated + , WordOff -- Number of words allocated for *pointers* + , [FieldOffOrPadding a] -- Either an offset or padding. + ) -- Things with their offsets from start of object in order of -- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER -- First in list gets lowest offset, which is initial offset + 1. -- --- mkVirtHeapOffsets always returns boxed things with smaller offsets +-- mkVirtHeapOffsetsWithPadding always returns boxed things with smaller offsets -- than the unboxed things -mkVirtHeapOffsets dflags is_thunk things - = ASSERT(not (any (isVoidRep . fst . fromNonVoid) things)) - ( bytesToWordsRoundUp dflags tot_bytes +mkVirtHeapOffsetsWithPadding dflags header things = + ASSERT(not (any (isVoidRep . fst . fromNonVoid) things)) + ( tot_wds , bytesToWordsRoundUp dflags bytes_of_ptrs - , ptrs_w_offsets ++ non_ptrs_w_offsets + , concat (ptrs_w_offsets ++ non_ptrs_w_offsets) ++ final_pad ) where - hdr_words | is_thunk = thunkHdrSize dflags - | otherwise = fixedHdrSizeW dflags + hdr_words = case header of + NoHeader -> 0 + StdHeader -> fixedHdrSizeW dflags + ThunkHeader -> thunkHdrSize dflags hdr_bytes = wordsToBytes dflags hdr_words (ptrs, non_ptrs) = partition (isGcPtrRep . fst . fromNonVoid) things @@ -420,16 +443,64 @@ mkVirtHeapOffsets dflags is_thunk things (tot_bytes, non_ptrs_w_offsets) = mapAccumL computeOffset bytes_of_ptrs non_ptrs - computeOffset bytes_so_far nv_thing - = (bytes_so_far + wordsToBytes dflags (argRepSizeW dflags (toArgRep rep)), - (NonVoid thing, hdr_bytes + bytes_so_far)) - where (rep,thing) = fromNonVoid nv_thing + tot_wds = bytesToWordsRoundUp dflags tot_bytes + + final_pad_size = tot_wds * word_size - tot_bytes + final_pad + | final_pad_size > 0 = [(Padding final_pad_size + (hdr_bytes + tot_bytes))] + | otherwise = [] + + word_size = wORD_SIZE dflags + + computeOffset bytes_so_far nv_thing = + (new_bytes_so_far, with_padding field_off) + where + (rep, thing) = fromNonVoid nv_thing + + -- Size of the field in bytes. + !sizeB = primRepSizeB dflags rep + + -- Align the start offset (eg, 2-byte value should be 2-byte aligned). + -- But not more than to a word. + !align = min word_size sizeB + !start = roundUpTo bytes_so_far align + !padding = start - bytes_so_far + + -- Final offset is: + -- size of header + bytes_so_far + padding + !final_offset = hdr_bytes + bytes_so_far + padding + !new_bytes_so_far = start + sizeB + field_off = FieldOff (NonVoid thing) final_offset + + with_padding field_off + | padding == 0 = [field_off] + | otherwise = [ Padding padding (hdr_bytes + bytes_so_far) + , field_off + ] + + +mkVirtHeapOffsets + :: DynFlags + -> ClosureHeader -- What kind of header to account for + -> [NonVoid (PrimRep,a)] -- Things to make offsets for + -> (WordOff, -- _Total_ number of words allocated + WordOff, -- Number of words allocated for *pointers* + [(NonVoid a, ByteOff)]) +mkVirtHeapOffsets dflags header things = + ( tot_wds + , ptr_wds + , [ (field, offset) | (FieldOff field offset) <- things_offsets ] + ) + where + (tot_wds, ptr_wds, things_offsets) = + mkVirtHeapOffsetsWithPadding dflags header things -- | Just like mkVirtHeapOffsets, but for constructors mkVirtConstrOffsets :: DynFlags -> [NonVoid (PrimRep, a)] -> (WordOff, WordOff, [(NonVoid a, ByteOff)]) -mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags False +mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags StdHeader -- | Just like mkVirtConstrOffsets, but used when we don't have the actual -- arguments. Useful when e.g. generating info tables; we just need to know diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 5e62183fb5..9ddd8a3985 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, GADTs, UnboxedTuples #-} +{-# LANGUAGE GADTs, UnboxedTuples #-} ----------------------------------------------------------------------------- -- @@ -11,9 +11,8 @@ module StgCmmMonad ( FCode, -- type - initC, runC, thenC, thenFC, listCs, - returnFC, fixC, - newUnique, newUniqSupply, + initC, runC, fixC, + newUnique, emitLabel, @@ -30,7 +29,7 @@ module StgCmmMonad ( mkCall, mkCmmCall, - forkClosureBody, forkLneBody, forkAlts, codeOnly, + forkClosureBody, forkLneBody, forkAlts, forkAltPair, codeOnly, ConTagZ, @@ -59,13 +58,12 @@ module StgCmmMonad ( CgInfoDownwards(..), CgState(..) -- non-abstract ) where -#include "HsVersions.h" +import GhcPrelude hiding( sequence, succ ) import Cmm import StgCmmClosure import DynFlags import Hoopl.Collections -import Maybes import MkGraph import BlockId import CLabel @@ -79,13 +77,11 @@ import Unique import UniqSupply import FastString import Outputable +import Util import Control.Monad import Data.List -import Prelude hiding( sequence, succ ) -infixr 9 `thenC` -- Right-associative! -infixr 9 `thenFC` -------------------------------------------------------- @@ -114,27 +110,30 @@ infixr 9 `thenFC` -------------------------------------------------------- -newtype FCode a = FCode (CgInfoDownwards -> CgState -> (# a, CgState #)) +newtype FCode a = FCode { doFCode :: CgInfoDownwards -> CgState -> (a, CgState) } instance Functor FCode where - fmap f (FCode g) = FCode $ \i s -> case g i s of (# a, s' #) -> (# f a, s' #) + fmap f (FCode g) = FCode $ \i s -> case g i s of (a, s') -> (f a, s') instance Applicative FCode where - pure = returnFC - (<*>) = ap + pure val = FCode (\_info_down state -> (val, state)) + {-# INLINE pure #-} + (<*>) = ap instance Monad FCode where - (>>=) = thenFC - -{-# INLINE thenC #-} -{-# INLINE thenFC #-} -{-# INLINE returnFC #-} + FCode m >>= k = FCode $ + \info_down state -> + case m info_down state of + (m_result, new_state) -> + case k m_result of + FCode kcode -> kcode info_down new_state + {-# INLINE (>>=) #-} instance MonadUnique FCode where getUniqueSupplyM = cgs_uniqs <$> getState getUniqueM = FCode $ \_ st -> let (u, us') = takeUniqFromSupply (cgs_uniqs st) - in (# u, st { cgs_uniqs = us' } #) + in (u, st { cgs_uniqs = us' }) initC :: IO CgState initC = do { uniqs <- mkSplitUniqSupply 'c' @@ -143,36 +142,10 @@ initC = do { uniqs <- mkSplitUniqSupply 'c' runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) runC dflags mod st fcode = doFCode fcode (initCgInfoDown dflags mod) st -returnFC :: a -> FCode a -returnFC val = FCode (\_info_down state -> (# val, state #)) - -thenC :: FCode () -> FCode a -> FCode a -thenC (FCode m) (FCode k) = - FCode $ \info_down state -> case m info_down state of - (# _,new_state #) -> k info_down new_state - -listCs :: [FCode ()] -> FCode () -listCs [] = return () -listCs (fc:fcs) = do - fc - listCs fcs - -thenFC :: FCode a -> (a -> FCode c) -> FCode c -thenFC (FCode m) k = FCode $ - \info_down state -> - case m info_down state of - (# m_result, new_state #) -> - case k m_result of - FCode kcode -> kcode info_down new_state - fixC :: (a -> FCode a) -> FCode a -fixC fcode = FCode ( - \info_down state -> - let - (v,s) = doFCode (fcode v) info_down state - in - (# v, s #) - ) +fixC fcode = FCode $ + \info_down state -> let (v, s) = doFCode (fcode v) info_down state + in (v, s) -------------------------------------------------------- -- The code generator environment @@ -432,10 +405,10 @@ hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } -------------------------------------------------------- getState :: FCode CgState -getState = FCode $ \_info_down state -> (# state, state #) +getState = FCode $ \_info_down state -> (state, state) setState :: CgState -> FCode () -setState state = FCode $ \_info_down _ -> (# (), state #) +setState state = FCode $ \_info_down _ -> ((), state) getHpUsage :: FCode HeapUsage getHpUsage = do @@ -475,7 +448,7 @@ setBinds new_binds = do withState :: FCode a -> CgState -> FCode (a,CgState) withState (FCode fcode) newstate = FCode $ \info_down state -> case fcode info_down newstate of - (# retval, state2 #) -> (# (retval,state2), state #) + (retval, state2) -> ((retval,state2), state) newUniqSupply :: FCode UniqSupply newUniqSupply = do @@ -493,7 +466,7 @@ newUnique = do ------------------ getInfoDown :: FCode CgInfoDownwards -getInfoDown = FCode $ \info_down state -> (# info_down,state #) +getInfoDown = FCode $ \info_down state -> (info_down,state) getSelfLoop :: FCode (Maybe SelfLoopInfo) getSelfLoop = do @@ -514,11 +487,6 @@ getThisPackage = liftM thisPackage getDynFlags withInfoDown :: FCode a -> CgInfoDownwards -> FCode a withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state -doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState) -doFCode (FCode fcode) info_down state = - case fcode info_down state of - (# a, s #) -> ( a, s ) - -- ---------------------------------------------------------------------------- -- Get the current module name @@ -664,10 +632,19 @@ forkAlts branch_fcodes , cgs_hp_usg = cgs_hp_usg state } (_us, results) = mapAccumL compile us branch_fcodes (branch_results, branch_out_states) = unzip results - ; setState $ foldl stateIncUsage state branch_out_states + ; setState $ foldl' stateIncUsage state branch_out_states -- NB foldl. state is the *left* argument to stateIncUsage ; return branch_results } +forkAltPair :: FCode a -> FCode a -> FCode (a,a) +-- Most common use of 'forkAlts'; having this helper function avoids +-- accidental use of failible pattern-matches in @do@-notation +forkAltPair x y = do + xy' <- forkAlts [x,y] + case xy' of + [x',y'] -> return (x',y') + _ -> panic "forkAltPair" + -- collect the code emitted by an FCode computation getCodeR :: FCode a -> FCode (a, CmmAGraph) getCodeR fcode @@ -727,11 +704,9 @@ emitLabel id = do tscope <- getTickScope emitCgStmt (CgLabel id tscope) emitComment :: FastString -> FCode () -#if 0 /* def DEBUG */ -emitComment s = emitCgStmt (CgStmt (CmmComment s)) -#else -emitComment _ = return () -#endif +emitComment s + | debugIsOn = emitCgStmt (CgStmt (CmmComment s)) + | otherwise = return () emitTick :: CmmTickish -> FCode () emitTick = emitCgStmt . CgStmt . CmmTick diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 1ecd72f9db..266ab3a0f6 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -1,4 +1,6 @@ {-# LANGUAGE CPP #-} +-- emitPrimOp is quite large +{-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-} ---------------------------------------------------------------------------- -- @@ -17,6 +19,8 @@ module StgCmmPrim ( #include "HsVersions.h" +import GhcPrelude hiding ((<*>)) + import StgCmmLayout import StgCmmForeign import StgCmmEnv @@ -24,7 +28,7 @@ import StgCmmMonad import StgCmmUtils import StgCmmTicky import StgCmmHeap -import StgCmmProf ( costCentreFrom, curCCS ) +import StgCmmProf ( costCentreFrom ) import DynFlags import Platform @@ -44,10 +48,8 @@ import FastString import Outputable import Util -import Prelude hiding ((<*>)) - import Data.Bits ((.&.), bit) -import Control.Monad (liftM, when) +import Control.Monad (liftM, when, unless) ------------------------------------------------------------------------ -- Primitive operations and foreign calls @@ -192,7 +194,7 @@ shouldInlinePrimOp _ CopyMutableArrayArrayOp shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n) + Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = @@ -200,7 +202,7 @@ shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_infoLabel res src src_off (fromInteger n) + Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = @@ -225,7 +227,7 @@ shouldInlinePrimOp _ CopySmallMutableArrayOp shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n) + Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = @@ -233,7 +235,7 @@ shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmIn shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_infoLabel res src src_off (fromInteger n) + Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = @@ -281,7 +283,7 @@ emitPrimOp _ [res] ParOp [arg] emitCCall [(res,NoHint)] (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction))) - [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)] + [(baseExpr, AddrHint), (arg,AddrHint)] emitPrimOp dflags [res] SparkOp [arg] = do @@ -293,7 +295,7 @@ emitPrimOp dflags [res] SparkOp [arg] emitCCall [(tmp2,NoHint)] (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction))) - [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] + [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp)) emitPrimOp dflags [res] GetCCSOfOp [arg] @@ -304,7 +306,10 @@ emitPrimOp dflags [res] GetCCSOfOp [arg] | otherwise = CmmLit (zeroCLit dflags) emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg] - = emitAssign (CmmLocal res) curCCS + = emitAssign (CmmLocal res) cccsExpr + +emitPrimOp _ [res] MyThreadIdOp [] + = emitAssign (CmmLocal res) currentTSOExpr emitPrimOp dflags [res] ReadMutVarOp [mutv] = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags)) @@ -317,7 +322,7 @@ emitPrimOp dflags res@[] WriteMutVarOp [mutv,var] emitCCall [{-no results-}] (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) - [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)] + [(baseExpr, AddrHint), (mutv,AddrHint)] -- #define sizzeofByteArrayzh(r,a) \ -- r = ((StgArrBytes *)(a))->bytes @@ -347,14 +352,6 @@ emitPrimOp dflags [res] ByteArrayContents_Char [arg] emitPrimOp dflags [res] StableNameToIntOp [arg] = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags)) --- #define eqStableNamezh(r,sn1,sn2) \ --- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) -emitPrimOp dflags [res] EqStableNameOp [arg1,arg2] - = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [ - cmmLoadIndexW dflags arg1 (fixedHdrSizeW dflags) (bWord dflags), - cmmLoadIndexW dflags arg2 (fixedHdrSizeW dflags) (bWord dflags) - ]) - emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2] = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2]) @@ -378,20 +375,20 @@ emitPrimOp dflags [res] DataToTagOp [arg] -- #define unsafeFreezzeArrayzh(r,a) -- { --- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info); +-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info); -- r = a; -- } emitPrimOp _ [res] UnsafeFreezeArrayOp [arg] = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)), + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)), mkAssign (CmmLocal res) arg ] emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg] = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN0_infoLabel)), + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)), mkAssign (CmmLocal res) arg ] emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg] = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN0_infoLabel)), + [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)), mkAssign (CmmLocal res) arg ] -- #define unsafeFreezzeByteArrayzh(r,a) r=(a) @@ -516,6 +513,40 @@ emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args +-- IndexWord8ArrayAsXXX + +emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args +emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args +emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args +emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args +emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args + +-- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX + +emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args +emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args +emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args +emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args +emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args + -- WriteXXXoffAddr emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args @@ -554,6 +585,23 @@ emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayO emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args +-- WriteInt8ArrayAsXXX + +emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args +emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args +emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args + -- Copying and setting byte arrays emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] = doCopyByteArrayOp src src_off dst dst_off n @@ -568,6 +616,10 @@ emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] = emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] = doSetByteArrayOp ba off len c +-- Comparing byte arrays +emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] = + doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n + emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16 emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32 emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64 @@ -580,6 +632,20 @@ emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32 emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64 emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags) +-- Parallel bit deposit +emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8 +emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16 +emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32 +emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64 +emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags) + +-- Parallel bit extract +emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8 +emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16 +emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32 +emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64 +emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags) + -- count leading zeros emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8 emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16 @@ -833,6 +899,11 @@ callishPrimOpSupported dflags op || llvm -> Left (MO_Add2 (wordWidth dflags)) | otherwise -> Right genericWordAdd2Op + WordAddCOp | (ncg && (x86ish + || ppc)) + || llvm -> Left (MO_AddWordC (wordWidth dflags)) + | otherwise -> Right genericWordAddCOp + WordSubCOp | (ncg && (x86ish || ppc)) || llvm -> Left (MO_SubWordC (wordWidth dflags)) @@ -969,17 +1040,64 @@ genericWordAdd2Op [res_h, res_l] [arg_x, arg_y] (bottomHalf (CmmReg (CmmLocal r1))))] genericWordAdd2Op _ _ = panic "genericWordAdd2Op" +-- | Implements branchless recovery of the carry flag @c@ by checking the +-- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@: +-- +-- @ +-- c = a&b | (a|b)&~r +-- @ +-- +-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/ +genericWordAddCOp :: GenericOp +genericWordAddCOp [res_r, res_c] [aa, bb] + = do dflags <- getDynFlags + emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp (mo_wordUShr dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordAnd dflags) [aa,bb], + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordOr dflags) [aa,bb], + CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)] + ] + ], + mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) + ] + ] +genericWordAddCOp _ _ = panic "genericWordAddCOp" + +-- | Implements branchless recovery of the carry flag @c@ by checking the +-- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@: +-- +-- @ +-- c = ~a&b | (~a|b)&r +-- @ +-- +-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/ genericWordSubCOp :: GenericOp -genericWordSubCOp [res_r, res_c] [aa, bb] = do - dflags <- getDynFlags - emit $ catAGraphs - [ -- Put the result into 'res_r'. - mkAssign (CmmLocal res_r) $ - CmmMachOp (mo_wordSub dflags) [aa, bb] - -- Set 'res_c' to 1 if 'bb > aa' and to 0 otherwise. - , mkAssign (CmmLocal res_c) $ - CmmMachOp (mo_wordUGt dflags) [bb, aa] - ] +genericWordSubCOp [res_r, res_c] [aa, bb] + = do dflags <- getDynFlags + emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp (mo_wordUShr dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordNot dflags) [aa], + bb + ], + CmmMachOp (mo_wordAnd dflags) [ + CmmMachOp (mo_wordOr dflags) [ + CmmMachOp (mo_wordNot dflags) [aa], + bb + ], + CmmReg (CmmLocal res_r) + ] + ], + mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) + ] + ] genericWordSubCOp _ _ = panic "genericWordSubCOp" genericIntAddCOp :: GenericOp @@ -1279,9 +1397,22 @@ translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags) translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags) translateOp dflags SameTVarOp = Just (mo_wordEq dflags) translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags) +-- See Note [Comparing stable names] +translateOp dflags EqStableNameOp = Just (mo_wordEq dflags) translateOp _ _ = Nothing +-- Note [Comparing stable names] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- A StableName# is actually a pointer to a stable name object (SNO) +-- containing an index into the stable name table (SNT). We +-- used to compare StableName#s by following the pointers to the +-- SNOs and checking whether they held the same SNT indices. However, +-- this is not necessary: there is a one-to-one correspondence +-- between SNOs and entries in the SNT, so simple pointer equality +-- does the trick. + -- These primops are implemented by CallishMachOps, because they sometimes -- turn into foreign calls depending on the backend. @@ -1296,6 +1427,9 @@ callishOp DoubleTanhOp = Just MO_F64_Tanh callishOp DoubleAsinOp = Just MO_F64_Asin callishOp DoubleAcosOp = Just MO_F64_Acos callishOp DoubleAtanOp = Just MO_F64_Atan +callishOp DoubleAsinhOp = Just MO_F64_Asinh +callishOp DoubleAcoshOp = Just MO_F64_Acosh +callishOp DoubleAtanhOp = Just MO_F64_Atanh callishOp DoubleLogOp = Just MO_F64_Log callishOp DoubleExpOp = Just MO_F64_Exp callishOp DoubleSqrtOp = Just MO_F64_Sqrt @@ -1310,6 +1444,9 @@ callishOp FloatTanhOp = Just MO_F32_Tanh callishOp FloatAsinOp = Just MO_F32_Asin callishOp FloatAcosOp = Just MO_F32_Acos callishOp FloatAtanOp = Just MO_F32_Atan +callishOp FloatAsinhOp = Just MO_F32_Asinh +callishOp FloatAcoshOp = Just MO_F32_Acosh +callishOp FloatAtanhOp = Just MO_F32_Atanh callishOp FloatLogOp = Just MO_F32_Log callishOp FloatExpOp = Just MO_F32_Exp callishOp FloatSqrtOp = Just MO_F32_Sqrt @@ -1712,7 +1849,7 @@ doNewByteArrayOp res_r n = do let hdr_size = fixedHdrSize dflags - base <- allocHeapClosure rep info_ptr curCCS + base <- allocHeapClosure rep info_ptr cccsExpr [ (mkIntExpr dflags n, hdr_size + oFFSET_StgArrBytes_bytes dflags) ] @@ -1720,6 +1857,60 @@ doNewByteArrayOp res_r n = do emit $ mkAssign (CmmLocal res_r) base -- ---------------------------------------------------------------------------- +-- Comparing byte arrays + +doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr + -> FCode () +doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do + dflags <- getDynFlags + ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off + ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off + + -- short-cut in case of equal pointers avoiding a costly + -- subroutine call to the memcmp(3) routine; the Cmm logic below + -- results in assembly code being generated for + -- + -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int# + -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10# + -- + -- that looks like + -- + -- leaq 16(%r14),%rax + -- leaq 16(%rsi),%rbx + -- xorl %ecx,%ecx + -- cmpq %rbx,%rax + -- je l_ptr_eq + -- + -- ; NB: the common case (unequal pointers) falls-through + -- ; the conditional jump, and therefore matches the + -- ; usual static branch prediction convention of modern cpus + -- + -- subq $8,%rsp + -- movq %rbx,%rsi + -- movq %rax,%rdi + -- movl $10,%edx + -- xorl %eax,%eax + -- call memcmp + -- addq $8,%rsp + -- movslq %eax,%rax + -- movq %rax,%rcx + -- l_ptr_eq: + -- movq %rcx,%rbx + -- jmp *(%rbp) + + l_ptr_eq <- newBlockId + l_ptr_ne <- newBlockId + + emit (mkAssign (CmmLocal res) (zeroExpr dflags)) + emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p) + l_ptr_eq l_ptr_ne (Just False)) + + emitLabel l_ptr_ne + emitMemcmpCall res ba1_p ba2_p n 1 + + emitLabel l_ptr_eq + +-- ---------------------------------------------------------------------------- -- Copying byte arrays -- | Takes a source 'ByteArray#', an offset in the source array, a @@ -1749,10 +1940,9 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy -- TODO: Optimize branch for common case of no aliasing. copy src dst dst_p src_p bytes = do dflags <- getDynFlags - [moveCall, cpyCall] <- forkAlts [ - getCode $ emitMemmoveCall dst_p src_p bytes 1, - getCode $ emitMemcpyCall dst_p src_p bytes 1 - ] + (moveCall, cpyCall) <- forkAltPair + (getCode $ emitMemmoveCall dst_p src_p bytes 1) + (getCode $ emitMemcpyCall dst_p src_p bytes 1) emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr @@ -1826,12 +2016,12 @@ doNewArrayOp res_r rep info payload n init = do (mkIntExpr dflags (nonHdrSize dflags rep)) (zeroExpr dflags) - base <- allocHeapClosure rep info_ptr curCCS payload + base <- allocHeapClosure rep info_ptr cccsExpr payload arr <- CmmLocal `fmap` newTemp (bWord dflags) emit $ mkAssign arr base - -- Initialise all elements of the the array + -- Initialise all elements of the array p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep) for <- newBlockId emitLabel for @@ -1893,12 +2083,11 @@ doCopyMutableArrayOp = emitCopyArray copy -- TODO: Optimize branch for common case of no aliasing. copy src dst dst_p src_p bytes = do dflags <- getDynFlags - [moveCall, cpyCall] <- forkAlts [ - getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) - (wORD_SIZE dflags), - getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) - (wORD_SIZE dflags) - ] + (moveCall, cpyCall) <- forkAltPair + (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) + (wORD_SIZE dflags)) + (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) + (wORD_SIZE dflags)) emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff @@ -1956,12 +2145,11 @@ doCopySmallMutableArrayOp = emitCopySmallArray copy -- TODO: Optimize branch for common case of no aliasing. copy src dst dst_p src_p bytes = do dflags <- getDynFlags - [moveCall, cpyCall] <- forkAlts - [ getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) - (wORD_SIZE dflags) - , getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) - (wORD_SIZE dflags) - ] + (moveCall, cpyCall) <- forkAltPair + (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) + (wORD_SIZE dflags)) + (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) + (wORD_SIZE dflags)) emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff @@ -2008,7 +2196,7 @@ emitCloneArray info_p res_r src src_off n = do let hdr_size = fixedHdrSize dflags - base <- allocHeapClosure rep info_ptr curCCS + base <- allocHeapClosure rep info_ptr cccsExpr [ (mkIntExpr dflags n, hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags) , (mkIntExpr dflags (nonHdrSizeW rep), @@ -2047,7 +2235,7 @@ emitCloneSmallArray info_p res_r src src_off n = do let hdr_size = fixedHdrSize dflags - base <- allocHeapClosure rep info_ptr curCCS + base <- allocHeapClosure rep info_ptr cccsExpr [ (mkIntExpr dflags n, hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags) ] @@ -2213,6 +2401,30 @@ emitMemsetCall dst c n align = do (MO_Memset align) [ dst, c, n ] +emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode () +emitMemcmpCall res ptr1 ptr2 n align = do + -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all + -- code-gens currently call out to the @memcmp(3)@ C function. + -- This was easier than moving the sign-extensions into + -- all the code-gens. + dflags <- getDynFlags + let is32Bit = typeWidth (localRegType res) == W32 + + cres <- if is32Bit + then return res + else newTemp b32 + + emitPrimCall + [ cres ] + (MO_Memcmp align) + [ ptr1, ptr2, n ] + + unless is32Bit $ do + emit $ mkAssign (CmmLocal res) + (CmmMachOp + (mo_s_32ToWord dflags) + [(CmmReg (CmmLocal cres))]) + emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode () emitBSwapCall res x width = do emitPrimCall @@ -2227,6 +2439,20 @@ emitPopCntCall res x width = do (MO_PopCnt width) [ x ] +emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode () +emitPdepCall res x y width = do + emitPrimCall + [ res ] + (MO_Pdep width) + [ x, y ] + +emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode () +emitPextCall res x y width = do + emitPrimCall + [ res ] + (MO_Pext width) + [ x, y ] + emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode () emitClzCall res x width = do emitPrimCall diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index 434d7b50de..15c31ca59c 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE CPP #-} - ----------------------------------------------------------------------------- -- -- Code generation for profiling @@ -16,7 +14,7 @@ module StgCmmProf ( dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, enterCostCentreThunk, enterCostCentreFun, costCentreFrom, - curCCS, storeCurCCS, + storeCurCCS, emitSetCCC, saveCurrentCostCentre, restoreCurrentCostCentre, @@ -25,7 +23,7 @@ module StgCmmProf ( ldvEnter, ldvEnterClosure, ldvRecordCreate ) where -#include "HsVersions.h" +import GhcPrelude import StgCmmClosure import StgCmmUtils @@ -60,11 +58,8 @@ ccsType = bWord ccType :: DynFlags -> CmmType -- Type of a cost centre ccType = bWord -curCCS :: CmmExpr -curCCS = CmmReg (CmmGlobal CCCS) - storeCurCCS :: CmmExpr -> CmmAGraph -storeCurCCS e = mkAssign (CmmGlobal CCCS) e +storeCurCCS e = mkAssign cccsReg e mkCCostCentre :: CostCentre -> CmmLit mkCCostCentre cc = CmmLabel (mkCCLabel cc) @@ -91,7 +86,7 @@ initUpdFrameProf :: CmmExpr -> FCode () initUpdFrameProf frame = ifProfiling $ -- frame->header.prof.ccs = CCCS do dflags <- getDynFlags - emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) curCCS + emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) cccsExpr -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) -- is unnecessary because it is not used anyhow. @@ -131,7 +126,7 @@ saveCurrentCostCentre if not (gopt Opt_SccProfilingOn dflags) then return Nothing else do local_cc <- newTemp (ccType dflags) - emitAssign (CmmLocal local_cc) curCCS + emitAssign (CmmLocal local_cc) cccsExpr return (Just local_cc) restoreCurrentCostCentre :: Maybe LocalReg -> FCode () @@ -184,7 +179,7 @@ enterCostCentreFun ccs closure = if isCurrentCCS ccs then do dflags <- getDynFlags emitRtsCall rtsUnitId (fsLit "enterFunCCS") - [(CmmReg (CmmGlobal BaseReg), AddrHint), + [(baseExpr, AddrHint), (costCentreFrom dflags closure, AddrHint)] False else return () -- top-level function, nothing to do @@ -207,7 +202,7 @@ ifProfilingL dflags xs initCostCentres :: CollectedCCs -> FCode () -- Emit the declarations -initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) +initCostCentres (local_CCs, singleton_CCSs) = do dflags <- getDynFlags when (gopt Opt_SccProfilingOn dflags) $ do mapM_ emitCostCentreDecl local_CCs @@ -278,7 +273,7 @@ emitSetCCC cc tick push if not (gopt Opt_SccProfilingOn dflags) then return () else do tmp <- newTemp (ccsType dflags) - pushCostCentre tmp curCCS cc + pushCostCentre tmp cccsExpr cc when tick $ emit (bumpSccCount dflags (CmmReg (CmmLocal tmp))) when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp))) diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index 8d86e37ddf..8f3074856a 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns, CPP #-} +{-# LANGUAGE BangPatterns #-} ----------------------------------------------------------------------------- -- @@ -104,7 +104,7 @@ module StgCmmTicky ( tickySlowCall, tickySlowCallPat, ) where -#include "HsVersions.h" +import GhcPrelude import StgCmmArgRep ( slowCallPattern , toArgRep , argRepString ) import StgCmmClosure diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 237520877f..99fa550b83 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -43,6 +43,8 @@ module StgCmmUtils ( #include "HsVersions.h" +import GhcPrelude + import StgCmmMonad import StgCmmClosure import Cmm @@ -92,10 +94,10 @@ cgLit other_lit = do dflags <- getDynFlags mkSimpleLit :: DynFlags -> Literal -> CmmLit mkSimpleLit dflags (MachChar c) = CmmInt (fromIntegral (ord c)) (wordWidth dflags) mkSimpleLit dflags MachNullAddr = zeroCLit dflags -mkSimpleLit dflags (MachInt i) = CmmInt i (wordWidth dflags) -mkSimpleLit _ (MachInt64 i) = CmmInt i W64 -mkSimpleLit dflags (MachWord i) = CmmInt i (wordWidth dflags) -mkSimpleLit _ (MachWord64 i) = CmmInt i W64 +mkSimpleLit dflags (LitNumber LitNumInt i _) = CmmInt i (wordWidth dflags) +mkSimpleLit _ (LitNumber LitNumInt64 i _) = CmmInt i W64 +mkSimpleLit dflags (LitNumber LitNumWord i _) = CmmInt i (wordWidth dflags) +mkSimpleLit _ (LitNumber LitNumWord64 i _) = CmmInt i W64 mkSimpleLit _ (MachFloat r) = CmmFloat r W32 mkSimpleLit _ (MachDouble r) = CmmFloat r W64 mkSimpleLit _ (MachLabel fs ms fod) @@ -278,7 +280,7 @@ regTableOffset dflags n = get_Regtable_addr_from_offset :: DynFlags -> CmmType -> Int -> CmmExpr get_Regtable_addr_from_offset dflags _rep offset = if haveRegBase (targetPlatform dflags) - then CmmRegOff (CmmGlobal BaseReg) offset + then CmmRegOff baseReg offset else regTableOffset dflags offset @@ -527,8 +529,7 @@ emitCmmLitSwitch scrut branches deflt = do -- We find the necessary type information in the literals in the branches let signed = case head branches of - (MachInt _, _) -> True - (MachInt64 _, _) -> True + (LitNumber nt _ _, _) -> litNumIsSigned nt _ -> False let range | signed = (tARGET_MIN_INT dflags, tARGET_MAX_INT dflags) @@ -583,7 +584,7 @@ mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches -------------- label_default :: BlockId -> Maybe CmmAGraphScoped -> FCode (Maybe BlockId) label_default _ Nothing - = return Nothing + = return Nothing label_default join_lbl (Just code) = do lbl <- label_code join_lbl code return (Just lbl) |