diff options
-rw-r--r-- | compiler/GHC/Cmm.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/Cmm/CLabel.hs | 21 | ||||
-rw-r--r-- | compiler/GHC/Cmm/Info/Build.hs | 65 | ||||
-rw-r--r-- | compiler/GHC/Cmm/Parser.y | 4 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Bind.hs | 40 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Heap.hs | 18 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Utils.hs | 2 | ||||
-rw-r--r-- | rts/Prelude.h | 2 | ||||
-rw-r--r-- | rts/RtsSymbols.c | 5 | ||||
-rw-r--r-- | rts/StgStdThunks.cmm | 100 | ||||
-rw-r--r-- | rts/include/stg/MiscClosures.h | 4 |
11 files changed, 231 insertions, 37 deletions
diff --git a/compiler/GHC/Cmm.hs b/compiler/GHC/Cmm.hs index 3533dc2389..b764e80281 100644 --- a/compiler/GHC/Cmm.hs +++ b/compiler/GHC/Cmm.hs @@ -301,6 +301,9 @@ data GenCmmStatics (rawOnly :: Bool) where -> CmmInfoTable -> CostCentreStack -> [CmmLit] -- Payload + -> [CmmLit] -- Non-pointers that go to the end of the closure + -- This is used by stg_unpack_cstring closures. + -- See Note [unpack_cstring closures] in StgStdThunks.cmm. -> GenCmmStatics 'False -- | Static data, after SRTs are generated @@ -432,8 +435,8 @@ pprInfoTable platform (CmmInfoTable { cit_lbl = lbl, cit_rep = rep -- pprStatics :: Platform -> GenCmmStatics a -> SDoc -pprStatics platform (CmmStatics lbl itbl ccs payload) = - pdoc platform lbl <> colon <+> pdoc platform itbl <+> ppr ccs <+> pdoc platform payload +pprStatics platform (CmmStatics lbl itbl ccs payload extras) = + pdoc platform lbl <> colon <+> pdoc platform itbl <+> ppr ccs <+> pdoc platform payload <+> ppr extras pprStatics platform (CmmStaticsRaw lbl ds) = vcat ((pdoc platform lbl <> colon) : map (pprStatic platform) ds) pprStatic :: Platform -> CmmStatic -> SDoc diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs index 9edccdccf5..6d4397e62b 100644 --- a/compiler/GHC/Cmm/CLabel.hs +++ b/compiler/GHC/Cmm/CLabel.hs @@ -72,6 +72,8 @@ module GHC.Cmm.CLabel ( mkCAFBlackHoleInfoTableLabel, mkRtsPrimOpLabel, mkRtsSlowFastTickyCtrLabel, + mkRtsUnpackCStringLabel, + mkRtsUnpackCStringUtf8Label, mkSelectorInfoLabel, mkSelectorEntryLabel, @@ -562,6 +564,8 @@ data RtsLabelInfo | RtsApInfoTable Bool{-updatable-} Int{-arity-} -- ^ AP thunks | RtsApEntry Bool{-updatable-} Int{-arity-} + | RtsUnpackCStringInfoTable + | RtsUnpackCStringUtf8InfoTable | RtsPrimOp PrimOp | RtsApFast NonDetFastString -- ^ _fast versions of generic apply | RtsSlowFastTickyCtr String @@ -734,7 +738,6 @@ mkApEntryLabel platform upd arity = assert (arity > 0 && arity <= pc_MAX_SPEC_AP_SIZE (platformConstants platform)) $ RtsLabel (RtsApEntry upd arity) - -- A call to some primitive hand written Cmm code mkPrimCallLabel :: PrimCall -> CLabel mkPrimCallLabel (PrimCall str pkg) @@ -852,6 +855,11 @@ mkRtsApFastLabel str = RtsLabel (RtsApFast (NonDetFastString str)) mkRtsSlowFastTickyCtrLabel :: String -> CLabel mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat) +-- | A standard string unpacking thunk. See Note [unpack_cstring closures] in +-- StgStdThunks.cmm. +mkRtsUnpackCStringLabel, mkRtsUnpackCStringUtf8Label :: CLabel +mkRtsUnpackCStringLabel = RtsLabel RtsUnpackCStringInfoTable +mkRtsUnpackCStringUtf8Label = RtsLabel RtsUnpackCStringUtf8InfoTable -- Constructing Code Coverage Labels mkHpcTicksLabel :: Module -> CLabel @@ -958,6 +966,9 @@ hasIdLabelInfo _ = Nothing hasCAF :: CLabel -> Bool hasCAF (IdLabel _ _ (IdTickyInfo TickyRednCounts)) = False -- See Note [ticky for LNE] hasCAF (IdLabel _ MayHaveCafRefs _) = True +hasCAF (RtsLabel RtsUnpackCStringInfoTable) = True +hasCAF (RtsLabel RtsUnpackCStringUtf8InfoTable) = True + -- The info table stg_MK_STRING_info is for thunks hasCAF _ = False -- Note [ticky for LNE] @@ -1195,6 +1206,9 @@ labelType (CmmLabel _ _ _ CmmRet) = CodeLabel labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsApFast _)) = CodeLabel +labelType (RtsLabel RtsUnpackCStringInfoTable) = CodeLabel +labelType (RtsLabel RtsUnpackCStringUtf8InfoTable) + = CodeLabel labelType (RtsLabel _) = DataLabel labelType (LocalBlockLabel _) = CodeLabel labelType (SRTLabel _) = DataLabel @@ -1525,6 +1539,11 @@ pprCLabel !platform !sty lbl = -- see Note [Bangs in CLabel] RtsLabel (RtsSlowFastTickyCtr pat) -> maybe_underscore $ text "SLOW_CALL_fast_" <> text pat <> text "_ctr" + RtsLabel RtsUnpackCStringInfoTable + -> maybe_underscore $ text "stg_unpack_cstring_info" + RtsLabel RtsUnpackCStringUtf8InfoTable + -> maybe_underscore $ text "stg_unpack_cstring_utf8_info" + LargeBitmapLabel u -> maybe_underscore $ tempLabelPrefixOrUnderscore <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm" diff --git a/compiler/GHC/Cmm/Info/Build.hs b/compiler/GHC/Cmm/Info/Build.hs index 4b9294020d..e363eb879d 100644 --- a/compiler/GHC/Cmm/Info/Build.hs +++ b/compiler/GHC/Cmm/Info/Build.hs @@ -576,7 +576,7 @@ cafAnalData -> CAFSet cafAnalData platform st = case st of CmmStaticsRaw _lbl _data -> Set.empty - CmmStatics _lbl _itbl _ccs payload -> + CmmStatics _lbl _itbl _ccs payload _extras -> foldl' analyzeStatic Set.empty payload where analyzeStatic s lit = @@ -741,7 +741,9 @@ getBlockLabels = mapMaybe getBlockLabel getLabelledBlocks :: Platform -> CmmDecl -> [(SomeLabel, CAFfyLabel)] getLabelledBlocks platform decl = case decl of CmmData _ (CmmStaticsRaw _ _) -> [] - CmmData _ (CmmStatics lbl _ _ _) -> [ (DeclLabel lbl, mkCAFfyLabel platform lbl) ] + CmmData _ (CmmStatics lbl info _ _ _) -> [ (DeclLabel lbl, mkCAFfyLabel platform lbl) + | not (isThunkRep (cit_rep info)) + ] CmmProc top_info _ _ _ -> [ (BlockLabel blockId, caf_lbl) | (blockId, info) <- mapToList (info_tbls top_info) , let rep = cit_rep info @@ -786,28 +788,48 @@ depAnalSRTs platform cafEnv cafEnv_static decls = graph :: [SCC (SomeLabel, CAFfyLabel, Set CAFfyLabel)] graph = stronglyConnCompFromEdgedVerticesOrd nodes --- | Get @(Label, CAFfyLabel, Set CAFfyLabel)@ for each CAF block. --- The @Set CafLabel@ represents the set of CAFfy things which this CAF's code +-- | Get @(Maybe Label, CAFfyLabel, Set CAFfyLabel)@ for each CAF block. +-- The @Set CAFfyLabel@ represents the set of CAFfy things which this CAF's code -- depends upon. -- --- CAFs are treated differently from other labelled blocks: +-- - The 'Label' represents the entry code of the closure. This may be +-- 'Nothing' if it is a standard closure type (e.g. @stg_unpack_cstring@; see +-- Note [unpack_cstring closures] in StgStdThunks.cmm). +-- - The 'CAFLabel' is the label of the CAF closure. +-- - The @Set CAFLabel@ is the set of CAFfy closures which should be included +-- in the closure's SRT. +-- +-- Note that CAFs are treated differently from other labelled blocks: -- -- - we never shortcut a reference to a CAF to the contents of its -- SRT, since the point of SRTs is to keep CAFs alive. -- -- - CAFs therefore don't take part in the dependency analysis in depAnalSRTs. -- instead we generate their SRTs after everything else. --- -getCAFs :: Platform -> CAFEnv -> [CmmDecl] -> [(Label, CAFfyLabel, Set CAFfyLabel)] -getCAFs platform cafEnv decls = - [ (g_entry g, mkCAFfyLabel platform topLbl, cafs) - | CmmProc top_info topLbl _ g <- decls - , Just info <- [mapLookup (g_entry g) (info_tbls top_info)] - , let rep = cit_rep info - , isStaticRep rep && isThunkRep rep - , Just cafs <- [mapLookup (g_entry g) cafEnv] - ] +getCAFs :: Platform -> CAFEnv -> [CmmDecl] -> [(Maybe Label, CAFfyLabel, Set CAFfyLabel)] +getCAFs platform cafEnv = mapMaybe getCAFLabel + where + getCAFLabel :: CmmDecl -> Maybe (Maybe Label, CAFfyLabel, Set CAFfyLabel) + + getCAFLabel (CmmProc top_info top_lbl _ g) + | Just info <- mapLookup (g_entry g) (info_tbls top_info) + , let rep = cit_rep info + , isStaticRep rep && isThunkRep rep + , Just cafs <- mapLookup (g_entry g) cafEnv + = Just (Just (g_entry g), mkCAFfyLabel platform top_lbl, cafs) + + | otherwise + = Nothing + + getCAFLabel (CmmData _ (CmmStatics top_lbl info _ccs _payload _extras)) + | isThunkRep (cit_rep info) + = Just (Nothing, mkCAFfyLabel platform top_lbl, Set.empty) + + | otherwise + = Nothing + getCAFLabel (CmmData _ (CmmStaticsRaw _lbl _payload)) + = Nothing -- | Get the list of blocks that correspond to the entry points for -- @FUN_STATIC@ closures. These are the blocks for which if we have an @@ -882,7 +904,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do pprPanic "doSRTs" (text "Proc in static data list:" <+> pdoc platform decl) CmmData _ static -> case static of - CmmStatics lbl _ _ _ -> (lbl, set) + CmmStatics lbl _ _ _ _ -> (lbl, set) CmmStaticsRaw lbl _ -> (lbl, set) (proc_envs, procss) = unzip procs @@ -902,7 +924,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do sccs :: [SCC (SomeLabel, CAFfyLabel, Set CAFfyLabel)] sccs = {-# SCC depAnalSRTs #-} depAnalSRTs platform cafEnv static_data_env decls - cafsWithSRTs :: [(Label, CAFfyLabel, Set CAFfyLabel)] + cafsWithSRTs :: [(Maybe Label, CAFfyLabel, Set CAFfyLabel)] cafsWithSRTs = getCAFs platform cafEnv decls srtTraceM "doSRTs" (text "data:" <+> pdoc platform data_ $$ @@ -925,7 +947,7 @@ doSRTs cfg moduleSRTInfo procs data_ = do flip runStateT moduleSRTInfo $ do nonCAFs <- mapM (doSCC cfg staticFuns static_data_env) sccs cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) -> - oneSRT cfg staticFuns [BlockLabel l] [cafLbl] + oneSRT cfg staticFuns (map BlockLabel (maybeToList l)) [cafLbl] True{-is a CAF-} cafs static_data_env return (nonCAFs ++ cAFs) @@ -1248,6 +1270,7 @@ buildSRT profile refs = do [] -- no padding [mkIntCLit platform 0] -- link field [] -- no saved info + [] -- no extras return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl) -- | Update info tables with references to their SRTs. Also generate @@ -1263,10 +1286,10 @@ updInfoSRTs updInfoSRTs _ _ _ _ (CmmData s (CmmStaticsRaw lbl statics)) = [CmmData s (CmmStaticsRaw lbl statics)] -updInfoSRTs profile _ _ caffy (CmmData s (CmmStatics lbl itbl ccs payload)) +updInfoSRTs profile _ _ caffy (CmmData s (CmmStatics lbl itbl ccs payload extras)) = [CmmData s (CmmStaticsRaw lbl (map CmmStaticLit field_lits))] where - field_lits = mkStaticClosureFields profile itbl ccs caffy payload + field_lits = mkStaticClosureFields profile itbl ccs caffy payload extras updInfoSRTs profile srt_env funSRTEnv caffy (CmmProc top_info top_l live g) | Just (_,closure) <- maybeStaticClosure = [ proc, closure ] @@ -1296,7 +1319,7 @@ updInfoSRTs profile srt_env funSRTEnv caffy (CmmProc top_info top_l live g) Just srtEntries -> srtTrace "maybeStaticFun" (pdoc (profilePlatform profile) res) (info_tbl { cit_rep = new_rep }, res) where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ] - fields = mkStaticClosureFields profile info_tbl ccs caffy srtEntries + fields = mkStaticClosureFields profile info_tbl ccs caffy srtEntries [] new_rep = case cit_rep of HeapRep sta ptrs nptrs ty -> HeapRep sta (ptrs + length srtEntries) nptrs ty diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index ae6e126b68..656de66848 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -435,7 +435,7 @@ static :: { CmmParse [CmmStatic] } mkStaticClosure profile (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData) -- mkForeignLabel because these are only used -- for CHARLIKE and INTLIKE closures in the RTS. - dontCareCCS (map getLit lits) [] [] [] } } + dontCareCCS (map getLit lits) [] [] [] [] } } -- arrays of closures required for the CHARLIKE & INTLIKE arrays lits :: { [CmmParse CmmExpr] } @@ -1248,7 +1248,7 @@ profilingInfo profile desc_str ty_str staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse () staticClosure pkg cl_label info payload = do profile <- getProfile - let lits = mkStaticClosure profile (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] [] + let lits = mkStaticClosure profile (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] [] [] code $ emitDataLits (mkCmmDataLabel pkg (NeedExternDecl True) cl_label) lits foreignCall diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs index 177c3f2912..3e9f5a52d5 100644 --- a/compiler/GHC/StgToCmm/Bind.hs +++ b/compiler/GHC/StgToCmm/Bind.hs @@ -25,6 +25,8 @@ import GHC.Stg.Syntax import GHC.Platform import GHC.Platform.Profile +import GHC.Builtin.Names (unpackCStringName, unpackCStringUtf8Name) + import GHC.StgToCmm.Config import GHC.StgToCmm.Expr import GHC.StgToCmm.Monad @@ -87,6 +89,9 @@ cgTopRhsClosure platform rec id ccs upd_flag args body = lf_info = mkClosureLFInfo platform id TopLevel [] upd_flag args in (cg_id_info, gen_code lf_info closure_label) where + + gen_code :: LambdaFormInfo -> CLabel -> FCode () + -- special case for a indirection (f = g). We create an IND_STATIC -- closure pointing directly to the indirectee. This is exactly -- what the CAF will eventually evaluate to anyway, we're just @@ -101,11 +106,44 @@ cgTopRhsClosure platform rec id ccs upd_flag args body = -- concurrent/should_run/4030 fails, for instance. -- gen_code _ closure_label - | StgApp f [] <- body, null args, isNonRec rec + | StgApp f [] <- body + , null args + , isNonRec rec = do cg_info <- getCgIdInfo f emitDataCon closure_label indStaticInfoTable ccs [unLit (idInfoToAmode cg_info)] + -- Emit standard stg_unpack_cstring closures for top-level unpackCString# thunks. + -- + -- Note that we do not do this for thunks enclosured in code ticks (e.g. hpc + -- ticks) since we want to ensure that these ticks are not lost (e.g. + -- resulting in Strings being reported by hpc as uncovered). However, we + -- don't worry about standard profiling ticks since unpackCString tends not + -- be terribly interesting in profiles. See Note [unpack_cstring closures] in + -- StgStdThunks.cmm. + gen_code _ closure_label + | null args + , StgApp f [arg] <- stripStgTicksTopE (not . tickishIsCode) body + , Just unpack <- is_string_unpack_op f + = do arg' <- getArgAmode (NonVoid arg) + case arg' of + CmmLit lit -> do + let info = CmmInfoTable + { cit_lbl = unpack + , cit_rep = HeapRep True 0 1 Thunk + , cit_prof = NoProfilingInfo + , cit_srt = Nothing + , cit_clo = Nothing + } + emitDecl $ CmmData (Section Data closure_label) $ + CmmStatics closure_label info ccs [] [lit] + _ -> panic "cgTopRhsClosure.gen_code" + where + is_string_unpack_op f + | idName f == unpackCStringName = Just mkRtsUnpackCStringLabel + | idName f == unpackCStringUtf8Name = Just mkRtsUnpackCStringUtf8Label + | otherwise = Nothing + gen_code lf_info _closure_label = do { profile <- getProfile ; let name = idName id diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs index a7e7f23e9d..9e493dff3e 100644 --- a/compiler/GHC/StgToCmm/Heap.hs +++ b/compiler/GHC/StgToCmm/Heap.hs @@ -161,19 +161,20 @@ hpStore base vals = do -- Layout of static closures ----------------------------------------------------------- --- Make a static closure, adding on any extra padding needed for CAFs, --- and adding a static link field if necessary. - +-- | Make a static closure, adding on any extra padding needed for CAFs, and +-- adding a static link field if necessary. mkStaticClosureFields :: Profile -> CmmInfoTable -> CostCentreStack -> CafInfo - -> [CmmLit] -- Payload + -> [CmmLit] -- ^ Payload + -> [CmmLit] -- ^ Extra non-pointers that go to the end of the closure. + -- See Note [unpack_cstring closures] in StgStdThunks.cmm. -> [CmmLit] -- The full closure -mkStaticClosureFields profile info_tbl ccs caf_refs payload +mkStaticClosureFields profile info_tbl ccs caf_refs payload extras = mkStaticClosure profile info_lbl ccs payload padding - static_link_field saved_info_field + static_link_field saved_info_field extras where platform = profilePlatform profile info_lbl = cit_lbl info_tbl @@ -218,14 +219,15 @@ mkStaticClosureFields profile info_tbl ccs caf_refs payload -- in rts/sm/Storage.h mkStaticClosure :: Profile -> CLabel -> CostCentreStack -> [CmmLit] - -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] -mkStaticClosure profile info_lbl ccs payload padding static_link_field saved_info_field + -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] +mkStaticClosure profile info_lbl ccs payload padding static_link_field saved_info_field extras = [CmmLabel info_lbl] ++ staticProfHdr profile ccs ++ payload ++ padding ++ static_link_field ++ saved_info_field + ++ extras ----------------------------------------------------------- -- Heap overflow checking diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs index cd80cd51ff..704b09d1d4 100644 --- a/compiler/GHC/StgToCmm/Utils.hs +++ b/compiler/GHC/StgToCmm/Utils.hs @@ -266,7 +266,7 @@ emitRODataLits lbl lits = emitDecl (mkRODataLits lbl lits) emitDataCon :: CLabel -> CmmInfoTable -> CostCentreStack -> [CmmLit] -> FCode () emitDataCon lbl itbl ccs payload = - emitDecl (CmmData (Section Data lbl) (CmmStatics lbl itbl ccs payload)) + emitDecl (CmmData (Section Data lbl) (CmmStatics lbl itbl ccs payload [])) ------------------------------------------------------------------------- -- diff --git a/rts/Prelude.h b/rts/Prelude.h index 3db5546ad0..b52e1b38fd 100644 --- a/rts/Prelude.h +++ b/rts/Prelude.h @@ -33,6 +33,7 @@ PRELUDE_CLOSURE(ghczmprim_GHCziTupleziPrim_Z0T_closure); PRELUDE_CLOSURE(ghczmprim_GHCziTypes_True_closure); PRELUDE_CLOSURE(ghczmprim_GHCziTypes_False_closure); PRELUDE_CLOSURE(base_GHCziPack_unpackCString_closure); +PRELUDE_CLOSURE(base_GHCziPack_unpackCStringUtf8_closure); PRELUDE_CLOSURE(base_GHCziWeak_runFinalizzerBatch_closure); PRELUDE_CLOSURE(base_GHCziWeakziFinalizze_runFinalizzerBatch_closure); @@ -70,6 +71,7 @@ PRELUDE_CLOSURE(base_GHCziEventziWindows_processRemoteCompletion_closure); PRELUDE_CLOSURE(base_GHCziTopHandler_flushStdHandles_closure); PRELUDE_CLOSURE(base_GHCziTopHandler_runMainIO_closure); +PRELUDE_INFO(ghczmprim_GHCziCString_unpackCStringzh_info); PRELUDE_INFO(ghczmprim_GHCziTypes_Czh_con_info); PRELUDE_INFO(ghczmprim_GHCziTypes_Izh_con_info); PRELUDE_INFO(ghczmprim_GHCziTypes_Fzh_con_info); diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index 317b284158..097b8a1df4 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -9,6 +9,7 @@ #include "ghcplatform.h" #include "Rts.h" #include "RtsSymbols.h" + #include "TopHandler.h" #include "HsFFI.h" #include "CloneStack.h" @@ -713,7 +714,7 @@ extern char **environ; SymI_HasProto(defaultRtsConfig) \ SymI_HasProto(initLinker) \ SymI_HasProto(initLinker_) \ - SymI_HasDataProto(stg_unpackClosurezh) \ + SymI_HasDataProto(stg_unpackClosurezh) \ SymI_HasDataProto(stg_closureSizzezh) \ SymI_HasDataProto(stg_whereFromzh) \ SymI_HasDataProto(stg_getApStackValzh) \ @@ -976,6 +977,8 @@ extern char **environ; SymI_HasDataProto(stg_sel_13_noupd_info) \ SymI_HasDataProto(stg_sel_14_noupd_info) \ SymI_HasDataProto(stg_sel_15_noupd_info) \ + SymI_HasDataProto(stg_unpack_cstring_info) \ + SymI_HasDataProto(stg_unpack_cstring_utf8_info) \ SymI_HasDataProto(stg_upd_frame_info) \ SymI_HasDataProto(stg_bh_upd_frame_info) \ SymI_HasProto(suspendThread) \ diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm index 3c528f662f..c4f5d25881 100644 --- a/rts/StgStdThunks.cmm +++ b/rts/StgStdThunks.cmm @@ -13,6 +13,9 @@ #include "Cmm.h" #include "Updates.h" +import ghczmprim_GHCziCString_unpackCStringzh_info; +import ghczmprim_GHCziCString_unpackCStringUtf8zh_info; + /* ----------------------------------------------------------------------------- The code for a thunk that simply extracts a field from a single-constructor datatype depends only on the offset of the field @@ -286,3 +289,100 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info") StgThunk_payload(node,6)); } } + +/* ----------------------------------------------------------------------------- + Making strings + -------------------------------------------------------------------------- */ + +/* + * Note [unpack_cstring closures] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Strings are extremely common. In Core they will typically manifest as the + * a pair of top-level bindings: + * + * s :: String + * s = unpackCString# s# + * + * s# :: Addr# + * s# = "hello world"# + * + * It turns out that `s` is a non-trivial amount of code which is duplicated + * for every `String` literal. To avoid this duplicate, we have a standard + * string-unpacking closure, unpack_cstring. Note that currently we only do + * this for ASCII strings; strings mentioning non-ASCII characters are + * represented by CAF applications of unpackCStringUtf8# as before. + * + * unpack_cstring closures are similar to standard THUNK_STATIC closures but + * with a non-GC pointer to a C-string at the end (the "extra" pointer). + * We must place this extra pointer at the end of the closure to ensure that + * it has a similar layout to a normal THUNK_STATIC closure, which has no space + * for free variables (since these would be contained in the thunk's code and SRT). + * + * When it is evaluated, an stg_unpack_cstring closure is updated to be an + * indirection to the resulting [Char], just as a normal unpackCString# thunk + * would be. + * + * Closure layout: + * + * ┌───────────────────┐ ┌──► ┌──────────────────────────┐ + * │ stg_unpack_cstring│ │ │ "hello world ..." │ + * ├───────────────────┤ │ └──────────────────────────┘ + * │ indirectee │ │ + * ├───────────────────┤ │ + * │ static_link │ │ + * ├───────────────────┤ │ + * │ saved_info │ │ + * ├───────────────────┤ │ + * │ the_string ─┼───────┘ + * └───────────────────┘ + * + */ + +stg_do_unpack_cstring(P_ node, P_ newCAF_ret) { + STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring, node, newCAF_ret); + W_ str; + str = StgThunk_payload(node, 2); + push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) { + jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringzh_info)(node, str); + } +} + +INFO_TABLE(stg_unpack_cstring, 0, 0, THUNK_STATIC, "stg_unpack_cstring", "stg_unpack_cstring") + (P_ node) +{ + W_ newCAF_ret; + (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr"); + + if (newCAF_ret == 0) { + // We raced with another thread to evaluate the CAF and they won; + // `node` should now be an indirection. + ENTER(node); + } else { + jump stg_do_unpack_cstring(node, newCAF_ret); + } +} + +stg_do_unpack_cstring_utf8(P_ node, P_ newCAF_ret) { + STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring_utf8, node, newCAF_ret); + W_ str; + str = StgThunk_payload(node, 2); + push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) { + jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringUtf8zh_info)(node, str); + } +} + +INFO_TABLE(stg_unpack_cstring_utf8, 0, 0, THUNK_STATIC, "stg_unpack_cstring_utf8", "stg_unpack_cstring_utf8") + (P_ node) +{ + W_ newCAF_ret; + (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr"); + + if (newCAF_ret == 0) { + // We raced with another thread to evaluate the CAF and they won; + // `node` should now be an indirection. + ENTER(node); + } else { + jump stg_do_unpack_cstring_utf8(node, newCAF_ret); + } +} + diff --git a/rts/include/stg/MiscClosures.h b/rts/include/stg/MiscClosures.h index e6b4957e17..257d59a607 100644 --- a/rts/include/stg/MiscClosures.h +++ b/rts/include/stg/MiscClosures.h @@ -318,6 +318,10 @@ RTS_THUNK(stg_ap_5_upd); RTS_THUNK(stg_ap_6_upd); RTS_THUNK(stg_ap_7_upd); +// Standard entry for `unpackCString# str` thunks +RTS_ENTRY(stg_unpack_cstring); +RTS_ENTRY(stg_unpack_cstring_utf8); + /* standard application routines (see also utils/genapply, * and GHC.StgToCmm.ArgRep). */ |