diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2023-01-30 21:59:47 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2023-02-14 11:31:04 -0500 |
commit | d6411d6cddb8c94c74e5834f0199370d189d31a2 (patch) | |
tree | 1e6910e1417c37c40dceb7e4b1b99bfcb01f1184 | |
parent | 759304244f15992a098a924ebd93f295971da422 (diff) | |
download | haskell-d6411d6cddb8c94c74e5834f0199370d189d31a2.tar.gz |
Fix some correctness issues around tag inference when targeting the bytecode generator.
* Let binders are now always assumed untagged for bytecode.
* Imported referenced are now always assumed to be untagged for bytecode.
Fixes #22840
-rw-r--r-- | compiler/GHC/Driver/Config/Stg/Pipeline.hs | 1 | ||||
-rw-r--r-- | compiler/GHC/Stg/InferTags.hs | 72 | ||||
-rw-r--r-- | compiler/GHC/Stg/InferTags/Types.hs | 25 | ||||
-rw-r--r-- | compiler/GHC/Stg/Pipeline.hs | 3 | ||||
-rw-r--r-- | testsuite/tests/simplStg/should_compile/T22840.hs | 14 | ||||
-rw-r--r-- | testsuite/tests/simplStg/should_compile/T22840.stderr | 6 | ||||
-rw-r--r-- | testsuite/tests/simplStg/should_compile/T22840A.hs | 9 | ||||
-rw-r--r-- | testsuite/tests/simplStg/should_compile/T22840B.hs | 8 | ||||
-rw-r--r-- | testsuite/tests/simplStg/should_compile/all.T | 4 |
9 files changed, 115 insertions, 27 deletions
diff --git a/compiler/GHC/Driver/Config/Stg/Pipeline.hs b/compiler/GHC/Driver/Config/Stg/Pipeline.hs index 50e7be0913..8dc689792b 100644 --- a/compiler/GHC/Driver/Config/Stg/Pipeline.hs +++ b/compiler/GHC/Driver/Config/Stg/Pipeline.hs @@ -22,6 +22,7 @@ initStgPipelineOpts dflags for_bytecode = StgPipelineOpts , stgPipeline_pprOpts = initStgPprOpts dflags , stgPipeline_phases = getStgToDo for_bytecode dflags , stgPlatform = targetPlatform dflags + , stgPipeline_forBytecode = for_bytecode } -- | Which Stg-to-Stg passes to run. Depends on flags, ways etc. diff --git a/compiler/GHC/Stg/InferTags.hs b/compiler/GHC/Stg/InferTags.hs index c9835d5787..e4316beab5 100644 --- a/compiler/GHC/Stg/InferTags.hs +++ b/compiler/GHC/Stg/InferTags.hs @@ -204,6 +204,33 @@ a different StgPass! To handle this a large part of the analysis is polymorphic over the exact StgPass we are using. Which allows us to run the analysis on the output of itself. +Note [Tag inference for interpreted code] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The bytecode interpreter has a different behaviour when it comes +to the tagging of binders in certain situations than the StgToCmm code generator. + +a) Tags for let-bindings: + + When compiling a binding for a constructor like `let x = Just True` + Whether `x` will be properly tagged depends on the backend. + For the interpreter x points to a BCO which once + evaluated returns a properly tagged pointer to the heap object. + In the Cmm backend for the same binding we would allocate the constructor right + away and x will immediately be represented by a tagged pointer. + This means for interpreted code we can not assume let bound constructors are + properly tagged. Hence we distinguish between targeting bytecode and native in + the analysis. + We make this differentiation in `mkLetSig` where we simply never assume + lets are tagged when targeting bytecode. + +b) When referencing ids from other modules the Cmm backend will try to put a + proper tag on these references through various means. When doing analysis we + usually predict these cases to improve precision of the analysis. + But to my knowledge the bytecode generator makes no such attempts so we must + not infer imported bindings as tagged. + This is handled in GHC.Stg.InferTags.Types.lookupInfo + + -} {- ********************************************************************* @@ -212,20 +239,12 @@ the output of itself. * * ********************************************************************* -} --- doCodeGen :: HscEnv -> Module -> InfoTableProvMap -> [TyCon] --- -> CollectedCCs --- -> [CgStgTopBinding] -- ^ Bindings come already annotated with fvs --- -> HpcInfo --- -> IO (Stream IO CmmGroupSRTs CmmCgInfos) --- -- Note we produce a 'Stream' of CmmGroups, so that the --- -- backend can be run incrementally. Otherwise it generates all --- -- the C-- up front, which has a significant space cost. -inferTags :: StgPprOpts -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig) -inferTags ppr_opts logger this_mod stg_binds = do - +inferTags :: StgPprOpts -> Bool -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig) +inferTags ppr_opts !for_bytecode logger this_mod stg_binds = do + -- pprTraceM "inferTags for " (ppr this_mod <> text " bytecode:" <> ppr for_bytecode) -- Annotate binders with tag information. let (!stg_binds_w_tags) = {-# SCC "StgTagFields" #-} - inferTagsAnal stg_binds + inferTagsAnal for_bytecode stg_binds putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings ppr_opts stg_binds_w_tags) let export_tag_info = collectExportInfo stg_binds_w_tags @@ -254,10 +273,10 @@ type InferExtEq i = ( XLet i ~ XLet 'InferTaggedBinders , XLetNoEscape i ~ XLetNoEscape 'InferTaggedBinders , XRhsClosure i ~ XRhsClosure 'InferTaggedBinders) -inferTagsAnal :: [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders] -inferTagsAnal binds = +inferTagsAnal :: Bool -> [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders] +inferTagsAnal for_bytecode binds = -- pprTrace "Binds" (pprGenStgTopBindings shortStgPprOpts $ binds) $ - snd (mapAccumL inferTagTopBind initEnv binds) + snd (mapAccumL inferTagTopBind (initEnv for_bytecode) binds) ----------------------- inferTagTopBind :: TagEnv 'CodeGen -> GenStgTopBinding 'CodeGen @@ -420,11 +439,12 @@ inferTagBind in_env (StgNonRec bndr rhs) -- ppr bndr $$ -- ppr (isDeadEndId id) $$ -- ppr sig) - (env', StgNonRec (id, sig) rhs') + (env', StgNonRec (id, out_sig) rhs') where id = getBinderId in_env bndr - env' = extendSigEnv in_env [(id, sig)] - (sig,rhs') = inferTagRhs id in_env rhs + (in_sig,rhs') = inferTagRhs id in_env rhs + out_sig = mkLetSig in_env in_sig + env' = extendSigEnv in_env [(id, out_sig)] inferTagBind in_env (StgRec pairs) = -- pprTrace "rec" (ppr (map fst pairs) $$ ppr (in_env { te_env = out_env }, StgRec pairs')) $ @@ -443,14 +463,17 @@ inferTagBind in_env (StgRec pairs) | in_sigs == out_sigs = (te_env rhs_env, out_bndrs `zip` rhss') | otherwise = go env' out_sigs rhss' where - out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive in_bndrs = in_ids `zip` in_sigs + out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive rhs_env = extendSigEnv go_env in_bndrs (out_sigs, rhss') = unzip (zipWithEqual "inferTagBind" anaRhs in_ids go_rhss) env' = makeTagged go_env anaRhs :: Id -> GenStgRhs q -> (TagSig, GenStgRhs 'InferTaggedBinders) - anaRhs bnd rhs = inferTagRhs bnd rhs_env rhs + anaRhs bnd rhs = + let (sig_rhs,rhs') = inferTagRhs bnd rhs_env rhs + in (mkLetSig go_env sig_rhs, rhs') + updateBndr :: (Id,TagSig) -> (Id,TagSig) updateBndr (v,sig) = (setIdTagSig v sig, sig) @@ -536,6 +559,15 @@ inferTagRhs _ env _rhs@(StgRhsCon cc con cn ticks args) = --pprTrace "inferTagRhsCon" (ppr grp_ids) $ (TagSig (inferConTag env con args), StgRhsCon cc con cn ticks args) +-- Adjust let semantics to the targeted backend. +-- See Note [Tag inference for interpreted code] +mkLetSig :: TagEnv p -> TagSig -> TagSig +mkLetSig env in_sig + | for_bytecode = TagSig TagDunno + | otherwise = in_sig + where + for_bytecode = te_bytecode env + {- Note [Constructor TagSigs] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @inferConTag@ will infer the proper tag signature for a binding who's RHS is a constructor diff --git a/compiler/GHC/Stg/InferTags/Types.hs b/compiler/GHC/Stg/InferTags/Types.hs index e6ee76e0e4..11ac33cc5b 100644 --- a/compiler/GHC/Stg/InferTags/Types.hs +++ b/compiler/GHC/Stg/InferTags/Types.hs @@ -49,24 +49,30 @@ combineAltInfo ti TagTagged = ti type TagSigEnv = IdEnv TagSig data TagEnv p = TE { te_env :: TagSigEnv , te_get :: BinderP p -> Id + , te_bytecode :: !Bool } instance Outputable (TagEnv p) where - ppr te = ppr (te_env te) - + ppr te = for_txt <+> ppr (te_env te) + where + for_txt = if te_bytecode te + then text "for_bytecode" + else text "for_native" getBinderId :: TagEnv p -> BinderP p -> Id getBinderId = te_get -initEnv :: TagEnv 'CodeGen -initEnv = TE { te_env = emptyVarEnv - , te_get = \x -> x} +initEnv :: Bool -> TagEnv 'CodeGen +initEnv for_bytecode = TE { te_env = emptyVarEnv + , te_get = \x -> x + , te_bytecode = for_bytecode } -- | Simple convert env to a env of the 'InferTaggedBinders pass -- with no other changes. makeTagged :: TagEnv p -> TagEnv 'InferTaggedBinders makeTagged env = TE { te_env = te_env env - , te_get = fst } + , te_get = fst + , te_bytecode = te_bytecode env } noSig :: TagEnv p -> BinderP p -> (Id, TagSig) noSig env bndr @@ -75,14 +81,18 @@ noSig env bndr where var = getBinderId env bndr +-- | Look up a sig in the given env lookupSig :: TagEnv p -> Id -> Maybe TagSig lookupSig env fun = lookupVarEnv (te_env env) fun +-- | Look up a sig in the env or derive it from information +-- in the arg itself. lookupInfo :: TagEnv p -> StgArg -> TagInfo lookupInfo env (StgVarArg var) -- Nullary data constructors like True, False | Just dc <- isDataConWorkId_maybe var , isNullaryRepDataCon dc + , not for_bytecode = TagProper | isUnliftedType (idType var) @@ -93,6 +103,7 @@ lookupInfo env (StgVarArg var) = info | Just lf_info <- idLFInfo_maybe var + , not for_bytecode = case lf_info of -- Function, tagged (with arity) LFReEntrant {} @@ -112,6 +123,8 @@ lookupInfo env (StgVarArg var) | otherwise = TagDunno + where + for_bytecode = te_bytecode env lookupInfo _ (StgLitArg {}) = TagProper diff --git a/compiler/GHC/Stg/Pipeline.hs b/compiler/GHC/Stg/Pipeline.hs index 9e20010cf7..348d8e0e8f 100644 --- a/compiler/GHC/Stg/Pipeline.hs +++ b/compiler/GHC/Stg/Pipeline.hs @@ -50,6 +50,7 @@ data StgPipelineOpts = StgPipelineOpts -- ^ Should we lint the STG at various stages of the pipeline? , stgPipeline_pprOpts :: !StgPprOpts , stgPlatform :: !Platform + , stgPipeline_forBytecode :: !Bool } newtype StgM a = StgM { _unStgM :: ReaderT Char IO a } @@ -89,7 +90,7 @@ stg2stg logger extra_vars opts this_mod binds -- annotations (which is used by code generator to compute offsets into closures) ; let binds_sorted_with_fvs = depSortWithAnnotStgPgm this_mod binds' -- See Note [Tag inference for interactive contexts] - ; inferTags (stgPipeline_pprOpts opts) logger this_mod binds_sorted_with_fvs + ; inferTags (stgPipeline_pprOpts opts) (stgPipeline_forBytecode opts) logger this_mod binds_sorted_with_fvs } where diff --git a/testsuite/tests/simplStg/should_compile/T22840.hs b/testsuite/tests/simplStg/should_compile/T22840.hs new file mode 100644 index 0000000000..f7e4e59bc0 --- /dev/null +++ b/testsuite/tests/simplStg/should_compile/T22840.hs @@ -0,0 +1,14 @@ +{-# OPTIONS_GHC -fwrite-if-simplified-core -fbyte-code-and-object-code -fprefer-byte-code #-} +{-# LANGUAGE TemplateHaskell #-} + +module C where + +import T22840A +import T22840B +import Control.Monad.IO.Class + +$(liftIO $ do + putStrLn "start" + putStrLn (disp theT) + putStrLn "end" + return []) diff --git a/testsuite/tests/simplStg/should_compile/T22840.stderr b/testsuite/tests/simplStg/should_compile/T22840.stderr new file mode 100644 index 0000000000..cdcd8ad478 --- /dev/null +++ b/testsuite/tests/simplStg/should_compile/T22840.stderr @@ -0,0 +1,6 @@ +[1 of 3] Compiling T22840A ( T22840A.hs, T22840A.o, T22840A.dyn_o ) +[2 of 3] Compiling T22840B ( T22840B.hs, T22840B.o, T22840B.dyn_o, interpreted ) +[3 of 3] Compiling C ( T22840.hs, T22840.o, T22840.dyn_o, interpreted ) +start +Just +end diff --git a/testsuite/tests/simplStg/should_compile/T22840A.hs b/testsuite/tests/simplStg/should_compile/T22840A.hs new file mode 100644 index 0000000000..706aae9462 --- /dev/null +++ b/testsuite/tests/simplStg/should_compile/T22840A.hs @@ -0,0 +1,9 @@ +module T22840A where + +data T = MkT !(Maybe Bool) + +disp :: T -> String +disp (MkT b) = + case b of + Nothing -> "Nothing" + Just _ -> "Just" diff --git a/testsuite/tests/simplStg/should_compile/T22840B.hs b/testsuite/tests/simplStg/should_compile/T22840B.hs new file mode 100644 index 0000000000..903c76c1e9 --- /dev/null +++ b/testsuite/tests/simplStg/should_compile/T22840B.hs @@ -0,0 +1,8 @@ +{-# OPTIONS_GHC -fwrite-if-simplified-core -fbyte-code-and-object-code -fprefer-byte-code #-} + +module T22840B where + +import T22840A + +theT :: T +theT = MkT (Just True) diff --git a/testsuite/tests/simplStg/should_compile/all.T b/testsuite/tests/simplStg/should_compile/all.T index d8bb1dfa5b..8df780ec23 100644 --- a/testsuite/tests/simplStg/should_compile/all.T +++ b/testsuite/tests/simplStg/should_compile/all.T @@ -14,3 +14,7 @@ test('T19717', normal, compile, ['-ddump-stg-final -dsuppress-uniques -dno-typea test('inferTags002', [ only_ways(['optasm']), grep_errmsg(r'(call stg\_ap\_0)', [1])], compile, ['-ddump-cmm -dsuppress-uniques -dno-typeable-binds -O']) test('T22212', normal, compile, ['-O']) +test('T22840', [extra_files( + [ 'T22840A.hs' + , 'T22840B.hs' + ]), when(not(have_dynamic()),skip)], multimod_compile, ['T22840', '-dynamic-too -dtag-inference-checks']) |