From b77da25ef0d95e776a43779bbb4843eb01d33552 Mon Sep 17 00:00:00 2001 From: Manuel M T Chakravarty Date: Wed, 5 Dec 2012 15:28:19 +1100 Subject: Rewrote vectorisation avoidance (based on the HS paper) * Vectorisation avoidance is now the default * Types and values from unvectorised modules are permitted in scalar code * Simplified the VECTORISE pragmas (see http://hackage.haskell.org/trac/ghc/wiki/DataParallel/VectPragma for the spec) * Vectorisation information is now included in the annotated Core AST --- compiler/vectorise/Vectorise.hs | 396 ++++------ compiler/vectorise/Vectorise/Convert.hs | 18 +- compiler/vectorise/Vectorise/Env.hs | 88 +-- compiler/vectorise/Vectorise/Exp.hs | 1048 +++++++++++++------------ compiler/vectorise/Vectorise/Monad.hs | 26 +- compiler/vectorise/Vectorise/Monad/Global.hs | 60 +- compiler/vectorise/Vectorise/Monad/InstEnv.hs | 13 +- compiler/vectorise/Vectorise/Monad/Local.hs | 22 +- compiler/vectorise/Vectorise/Type/Classify.hs | 42 +- compiler/vectorise/Vectorise/Type/Env.hs | 211 ++--- compiler/vectorise/Vectorise/Type/Type.hs | 13 +- compiler/vectorise/Vectorise/Utils.hs | 27 +- 12 files changed, 992 insertions(+), 972 deletions(-) (limited to 'compiler/vectorise') diff --git a/compiler/vectorise/Vectorise.hs b/compiler/vectorise/Vectorise.hs index 8b7e817826..e6c4b1e0cf 100644 --- a/compiler/vectorise/Vectorise.hs +++ b/compiler/vectorise/Vectorise.hs @@ -13,26 +13,22 @@ import Vectorise.Type.Type import Vectorise.Convert import Vectorise.Utils.Hoisting import Vectorise.Exp -import Vectorise.Vect import Vectorise.Env import Vectorise.Monad import HscTypes hiding ( MonadThings(..) ) import CoreUnfold ( mkInlineUnfolding ) -import CoreFVs import PprCore import CoreSyn import CoreMonad ( CoreM, getHscEnv ) import Type import Id import DynFlags -import BasicTypes ( isStrongLoopBreaker ) import Outputable import Util ( zipLazy ) import MonadUtils import Control.Monad -import Data.Maybe -- |Vectorise a single module. @@ -69,7 +65,7 @@ vectModule guts@(ModGuts { mg_tcs = tycons = do { dumpOptVt Opt_D_dump_vt_trace "Before vectorisation" $ pprCoreBindings binds - -- Pick out all 'VECTORISE type' and 'VECTORISE class' pragmas + -- Pick out all 'VECTORISE [SCALAR] type' and 'VECTORISE class' pragmas ; let ty_vect_decls = [vd | vd@(VectType _ _ _) <- vect_decls] cls_vect_decls = [vd | vd@(VectClass _) <- vect_decls] @@ -87,8 +83,7 @@ vectModule guts@(ModGuts { mg_tcs = tycons -- Vectorise all the top level bindings and VECTORISE declarations on imported identifiers -- NB: Need to vectorise the imported bindings first (local bindings may depend on them). - ; let impBinds = [imp_id | Vect imp_id _ <- vect_decls, isGlobalId imp_id] ++ - [imp_id | VectInst imp_id <- vect_decls, isGlobalId imp_id] + ; let impBinds = [(imp_id, expr) | Vect imp_id expr <- vect_decls, isGlobalId imp_id] ; binds_imp <- mapM vectImpBind impBinds ; binds_top <- mapM vectTopBind binds @@ -101,7 +96,8 @@ vectModule guts@(ModGuts { mg_tcs = tycons } } --- Try to vectorise a top-level binding. If it doesn't vectorise then return it unharmed. +-- Try to vectorise a top-level binding. If it doesn't vectorise, or if it is entirely scalar, then +-- omit vectorisation of that binding. -- -- For example, for the binding -- @@ -125,129 +121,173 @@ vectModule guts@(ModGuts { mg_tcs = tycons -- lfoo = ... -- @ -- --- @vfoo@ is the "vectorised", or scalar, version that does the same as the original --- function foo, but takes an explicit environment. +-- @vfoo@ is the "vectorised", or scalar, version that does the same as the original function foo, +-- but takes an explicit environment. -- -- @lfoo@ is the "lifted" version that works on arrays. -- --- @v_foo@ combines both of these into a `Closure` that also contains the --- environment. +-- @v_foo@ combines both of these into a `Closure` that also contains the environment. -- --- The original binding @foo@ is rewritten to call the vectorised version --- present in the closure. +-- The original binding @foo@ is rewritten to call the vectorised version present in the closure. -- -- Vectorisation may be surpressed by annotating a binding with a 'NOVECTORISE' pragma. If this -- pragma is used in a group of mutually recursive bindings, either all or no binding must have --- the pragma. If only some bindings are annotated, a fatal error is being raised. +-- the pragma. If only some bindings are annotated, a fatal error is being raised. (In the case of +-- scalar bindings, we only omit vectorisation if all bindings in a group are scalar.) +-- -- FIXME: Once we support partial vectorisation, we may be able to vectorise parts of a group, or -- we may emit a warning and refrain from vectorising the entire group. -- vectTopBind :: CoreBind -> VM CoreBind vectTopBind b@(NonRec var expr) - = unlessNoVectDecl $ - do { -- Vectorise the right-hand side, create an appropriate top-level binding and add it - -- to the vectorisation map. - ; (inline, isScalar, expr') <- vectTopRhs [] var expr - ; var' <- vectTopBinder var inline expr' - ; when isScalar $ - addGlobalScalarVar var - - -- We replace the original top-level binding by a value projected from the vectorised - -- closure and add any newly created hoisted top-level bindings. - ; cexpr <- tryConvert var var' expr - ; hs <- takeHoisted - ; return . Rec $ (var, cexpr) : (var', expr') : hs - } - `orElseErrV` - do { emitVt " Could NOT vectorise top-level binding" $ ppr var - ; return b + = do + { traceVt "= Vectorise non-recursive top-level variable" (ppr var) + + ; (hasNoVect, vectDecl) <- lookupVectDecl var + ; if hasNoVect + then do + { -- 'NOVECTORISE' pragma => leave this binding as it is + ; traceVt "NOVECTORISE" $ ppr var + ; return b + } + else do + { vectRhs <- case vectDecl of + Just (_, expr') -> + -- 'VECTORISE' pragma => just use the provided vectorised rhs + do + { traceVt "VECTORISE" $ ppr var + ; return $ Just (False, inlineMe, expr') + } + Nothing -> + -- no pragma => standard vectorisation of rhs + do + { traceVt "[Vanilla]" $ ppr var <+> char '=' <+> ppr expr + ; vectTopExpr var expr + } + ; hs <- takeHoisted -- make sure we clean those out (even if we skip) + ; case vectRhs of + { Nothing -> + -- scalar binding => leave this binding as it is + do + { traceVt "scalar binding [skip]" $ ppr var + ; return b + } + ; Just (parBind, inline, expr') -> do + { + -- vanilla case => create an appropriate top-level binding & add it to the vectorisation map + ; when parBind $ + addGlobalParallelVar var + ; var' <- vectTopBinder var inline expr' + + -- We replace the original top-level binding by a value projected from the vectorised + -- closure and add any newly created hoisted top-level bindings. + ; cexpr <- tryConvert var var' expr + ; return . Rec $ (var, cexpr) : (var', expr') : hs + } } } } + `orElseErrV` + do + { emitVt " Could NOT vectorise top-level binding" $ ppr var + ; return b + } +vectTopBind b@(Rec binds) + = do + { traceVt "= Vectorise recursive top-level variables" $ ppr vars + + ; vectDecls <- mapM lookupVectDecl vars + ; let hasNoVects = map fst vectDecls + ; if and hasNoVects + then do + { -- 'NOVECTORISE' pragmas => leave this entire binding group as it is + ; traceVt "NOVECTORISE" $ ppr vars + ; return b + } + else do + { if or hasNoVects + then do + { -- Inconsistent 'NOVECTORISE' pragmas => bail out + ; dflags <- getDynFlags + ; cantVectorise dflags noVectoriseErr (ppr b) } - where - unlessNoVectDecl vectorise - = do { hasNoVectDecl <- noVectDecl var - ; when hasNoVectDecl $ - traceVt "NOVECTORISE" $ ppr var - ; if hasNoVectDecl then return b else vectorise - } -vectTopBind b@(Rec bs) - = unlessSomeNoVectDecl $ - do { (vars', _, exprs', hs) <- fixV $ - \ ~(_, inlines, rhss, _) -> - do { -- Vectorise the right-hand sides, create an appropriate top-level bindings - -- and add them to the vectorisation map. - ; vars' <- sequence [vectTopBinder var inline rhs - | (var, ~(inline, rhs)) <- zipLazy vars (zip inlines rhss)] - ; (inlines, areScalars, exprs') <- mapAndUnzip3M (uncurry $ vectTopRhs vars) bs - ; hs <- takeHoisted - ; if and areScalars - then -- (1) Entire recursive group is scalar - -- => add all variables to the global set of scalars - do { mapM_ addGlobalScalarVar vars - ; return (vars', inlines, exprs', hs) - } - else -- (2) At least one binding is not scalar - -- => vectorise again with empty set of local scalars - do { (inlines, _, exprs') <- mapAndUnzip3M (uncurry $ vectTopRhs []) bs - ; hs <- takeHoisted - ; return (vars', inlines, exprs', hs) - } - } - - -- Replace the original top-level bindings by a values projected from the vectorised - -- closures and add any newly created hoisted top-level bindings to the group. - ; cexprs <- sequence $ zipWith3 tryConvert vars vars' exprs - ; return . Rec $ zip vars cexprs ++ zip vars' exprs' ++ hs - } - `orElseErrV` - return b - where - (vars, exprs) = unzip bs + else do + { -- For all bindings *with* a pragma, just use the pragma-supplied vectorised expression + ; newBindsWPragma <- concat <$> + sequence [ vectTopBindAndConvert bind inlineMe expr' + | (bind, (_, Just (_, expr'))) <- zip binds vectDecls] + + -- Standard vectorisation of all rhses that are *without* a pragma. + -- NB: The reason for 'fixV' is rather subtle: 'vectTopBindAndConvert' adds entries for + -- the bound variables in the recursive group to the vectorisation map, which in turn + -- are needed by 'vectPolyExprs' (unless it returns 'Nothing'). + ; let bindsWOPragma = [bind | (bind, (_, Nothing)) <- zip binds vectDecls] + ; (newBinds, _) <- fixV $ + \ ~(_, exprs') -> + do + { -- Create appropriate top-level bindings, enter them into the vectorisation map, and + -- vectorise the right-hand sides + ; newBindsWOPragma <- concat <$> + sequence [vectTopBindAndConvert bind inline expr + | (bind, ~(inline, expr)) <- zipLazy bindsWOPragma exprs'] + -- irrefutable pattern and 'zipLazy' to tie the knot; + -- hence, can't use 'zipWithM' + ; vectRhses <- vectTopExprs bindsWOPragma + ; hs <- takeHoisted -- make sure we clean those out (even if we skip) - unlessSomeNoVectDecl vectorise - = do { hasNoVectDecls <- mapM noVectDecl vars - ; when (and hasNoVectDecls) $ - traceVt "NOVECTORISE" $ ppr vars - ; if and hasNoVectDecls - then return b -- all bindings have 'NOVECTORISE' - else if or hasNoVectDecls - then do dflags <- getDynFlags - cantVectorise dflags noVectoriseErr (ppr b) -- some (but not all) have 'NOVECTORISE' - else vectorise -- no binding has a 'NOVECTORISE' decl - } + ; case vectRhses of + Nothing -> + -- scalar bindings => skip all bindings except those with pragmas and retract the + -- entries into the vectorisation map for the scalar bindings + do + { traceVt "scalar bindings [skip]" $ ppr vars + ; mapM_ (undefGlobalVar . fst) bindsWOPragma + ; return (bindsWOPragma ++ newBindsWPragma, exprs') + } + Just (parBind, exprs') -> + -- vanilla case => record parallel variables and return the final bindings + do + { when parBind $ + mapM_ addGlobalParallelVar vars + ; return (newBindsWOPragma ++ newBindsWPragma ++ hs, exprs') + } + } + ; return $ Rec newBinds + } } } + `orElseErrV` + do + { emitVt " Could NOT vectorise top-level bindings" $ ppr vars + ; return b + } + where + vars = map fst binds noVectoriseErr = "NOVECTORISE must be used on all or no bindings of a recursive group" + + -- Replace the original top-level bindings by a values projected from the vectorised + -- closures and add any newly created hoisted top-level bindings to the group. + vectTopBindAndConvert (var, expr) inline expr' + = do + { var' <- vectTopBinder var inline expr' + ; cexpr <- tryConvert var var' expr + ; return [(var, cexpr), (var', expr')] + } --- Add a vectorised binding to an imported top-level variable that has a VECTORISE [SCALAR] pragma +-- Add a vectorised binding to an imported top-level variable that has a VECTORISE pragma -- in this module. -- --- RESTIRCTION: Currently, we cannot use the pragma vor mutually recursive definitions. +-- RESTIRCTION: Currently, we cannot use the pragma for mutually recursive definitions. -- -vectImpBind :: Id -> VM CoreBind -vectImpBind var - = do { -- Vectorise the right-hand side, create an appropriate top-level binding and add it - -- to the vectorisation map. For the non-lifted version, we refer to the original - -- definition — i.e., 'Var var'. - -- NB: To support recursive definitions, we tie a lazy knot. - ; (var', _, expr') <- fixV $ - \ ~(_, inline, rhs) -> - do { var' <- vectTopBinder var inline rhs - ; (inline, isScalar, expr') <- vectTopRhs [] var (Var var) - - ; when isScalar $ - addGlobalScalarVar var - ; return (var', inline, expr') - } +vectImpBind :: (Id, CoreExpr) -> VM CoreBind +vectImpBind (var, expr) + = do + { traceVt "= Add vectorised binding to imported variable" (ppr var) - -- We add any newly created hoisted top-level bindings. - ; hs <- takeHoisted - ; return . Rec $ (var', expr') : hs - } - --- | Make the vectorised version of this top level binder, and add the mapping --- between it and the original to the state. For some binder @foo@ the vectorised --- version is @$v_foo@ + ; var' <- vectTopBinder var inlineMe expr + ; return $ NonRec var' expr + } + +-- |Make the vectorised version of this top level binder, and add the mapping between it and the +-- original to the state. For some binder @foo@ the vectorised version is @$v_foo@ -- --- NOTE: 'vectTopBinder' *MUST* be lazy in inline and expr because of how it is --- used inside of 'fixV' in 'vectTopBind'. +-- NOTE: 'vectTopBinder' *MUST* be lazy in inline and expr because of how it is used inside of +-- 'fixV' in 'vectTopBind'. -- vectTopBinder :: Var -- ^ Name of the binding. -> Inline -- ^ Whether it should be inlined, used to annotate it. @@ -257,20 +297,20 @@ vectTopBinder var inline expr = do { -- Vectorise the type attached to the var. ; vty <- vectType (idType var) - -- If there is a vectorisation declartion for this binding, make sure that its type - -- matches - ; vectDecl <- lookupVectDecl var + -- If there is a vectorisation declartion for this binding, make sure its type matches + ; (_, vectDecl) <- lookupVectDecl var ; case vectDecl of Nothing -> return () Just (vdty, _) | eqType vty vdty -> return () | otherwise -> - do dflags <- getDynFlags - cantVectorise dflags ("Type mismatch in vectorisation pragma for " ++ showPpr dflags var) $ - (text "Expected type" <+> ppr vty) - $$ - (text "Inferred type" <+> ppr vdty) - + do + { dflags <- getDynFlags + ; cantVectorise dflags ("Type mismatch in vectorisation pragma for " ++ showPpr dflags var) $ + (text "Expected type" <+> ppr vty) + $$ + (text "Inferred type" <+> ppr vdty) + } -- Make the vectorised version of binding's name, and set the unfolding used for inlining ; var' <- liftM (`setIdUnfoldingLazily` unfolding) $ mkVectId var vty @@ -297,113 +337,17 @@ vectTopBinder var inline expr `setInlinePragma` dfunInlinePragma -} --- | Vectorise the RHS of a top-level binding, in an empty local environment. +-- |Project out the vectorised version of a binding from some closure, or return the original body +-- if that doesn't work. -- --- We need to distinguish four cases: --- --- (1) We have a (non-scalar) vectorisation declaration for the variable (which explicitly provides --- vectorised code implemented by the user) --- => no automatic vectorisation & instead use the user-supplied code --- --- (2) We have a scalar vectorisation declaration for a variable that is no dfun --- => generate vectorised code that uses a scalar 'map'/'zipWith' to lift the computation --- --- (3) We have a scalar vectorisation declaration for a variable that *is* a dfun --- => generate vectorised code according to the the "Note [Scalar dfuns]" below --- --- (4) There is no vectorisation declaration for the variable --- => perform automatic vectorisation of the RHS (the definition may or may not be a dfun; --- vectorisation proceeds differently depending on which it is) --- --- Note [Scalar dfuns] --- ~~~~~~~~~~~~~~~~~~~ --- --- Here is the translation scheme for scalar dfuns — assume the instance declaration: --- --- instance Num Int where --- (+) = primAdd --- {-# VECTORISE SCALAR instance Num Int #-} --- --- It desugars to --- --- $dNumInt :: Num Int --- $dNumInt = D:Num primAdd --- --- We vectorise it to --- --- $v$dNumInt :: V:Num Int --- $v$dNumInt = D:V:Num (closure2 ((+) $dNumInt) (scalar_zipWith ((+) $dNumInt)))) --- --- while adding the following entry to the vectorisation map: '$dNumInt' --> '$v$dNumInt'. --- --- See "Note [Vectorising classes]" in 'Vectorise.Type.Env' for the definition of 'V:Num'. --- --- NB: The outlined vectorisation scheme does not require the right-hand side of the original dfun. --- In fact, we definitely want to refer to the dfn variable instead of the right-hand side to --- ensure that the dictionary selection rules fire. --- -vectTopRhs :: [Var] -- ^ Names of all functions in the rec block - -> Var -- ^ Name of the binding. - -> CoreExpr -- ^ Body of the binding. - -> VM ( Inline -- (1) inline specification for the binding - , Bool -- (2) whether the right-hand side is a scalar computation - , CoreExpr) -- (3) the vectorised right-hand side -vectTopRhs recFs var expr - = closedV - $ do { globalScalar <- isGlobalScalarVar var - ; vectDecl <- lookupVectDecl var - ; dflags <- getDynFlags - ; let isDFun = isDFunId var - - ; traceVt ("vectTopRhs of " ++ showPpr dflags var ++ info globalScalar isDFun vectDecl ++ ":") $ - ppr expr - - ; rhs globalScalar isDFun vectDecl - } - where - rhs _globalScalar _isDFun (Just (_, expr')) -- Case (1) - = return (inlineMe, False, expr') - rhs True False Nothing -- Case (2) - = do { expr' <- vectScalarFun expr - ; return (inlineMe, True, vectorised expr') - } - rhs True True Nothing -- Case (3) - = do { expr' <- vectScalarDFun var - ; return (DontInline, True, expr') - } - rhs False False Nothing -- Case (4) — not a dfun - = do { let exprFvs = freeVars expr - ; (inline, isScalar, vexpr) - <- inBind var $ - vectPolyExpr (isStrongLoopBreaker $ idOccInfo var) recFs exprFvs Nothing - ; return (inline, isScalar, vectorised vexpr) - } - rhs False True Nothing -- Case (4) — is a dfun - = do { expr' <- vectDictExpr expr - ; return (DontInline, True, expr') - } - - info True False _ = " [VECTORISE SCALAR]" - info True True _ = " [VECTORISE SCALAR instance]" - info False _ vectDecl | isJust vectDecl = " [VECTORISE]" - | otherwise = " (no pragma)" - --- |Project out the vectorised version of a binding from some closure, --- or return the original body if that doesn't work or the binding is scalar. --- -tryConvert :: Var -- ^ Name of the original binding (eg @foo@) - -> Var -- ^ Name of vectorised version of binding (eg @$vfoo@) - -> CoreExpr -- ^ The original body of the binding. +tryConvert :: Var -- ^Name of the original binding (eg @foo@) + -> Var -- ^Name of vectorised version of binding (eg @$vfoo@) + -> CoreExpr -- ^The original body of the binding. -> VM CoreExpr tryConvert var vect_var rhs - = do { globalScalar <- isGlobalScalarVar var - ; if globalScalar - then - return rhs - else - fromVect (idType var) (Var vect_var) - `orElseErrV` - do { emitVt " Could NOT call vectorised from original version" $ ppr var - ; return rhs - } - } + = fromVect (idType var) (Var vect_var) + `orElseErrV` + do + { emitVt " Could NOT call vectorised from original version" $ ppr var + ; return rhs + } diff --git a/compiler/vectorise/Vectorise/Convert.hs b/compiler/vectorise/Vectorise/Convert.hs index 048362d59c..f21f5cac86 100644 --- a/compiler/vectorise/Vectorise/Convert.hs +++ b/compiler/vectorise/Vectorise/Convert.hs @@ -84,16 +84,16 @@ identityConv (AppTy {}) = noV $ text "identityConv: type appl. changes under identityConv (FunTy {}) = noV $ text "identityConv: function type changes under vectorisation" identityConv (ForAllTy {}) = noV $ text "identityConv: quantified type changes under vectorisation" --- |Check that this type constructor is neutral under type vectorisation — i.e., it is not altered --- by vectorisation as they contain no parallel arrays. +-- |Check that this type constructor is not changed by vectorisation — i.e., it does not embed any +-- parallel arrays. -- identityConvTyCon :: TyCon -> VM () identityConvTyCon tc - | isBoxedTupleTyCon tc = return () - | isUnLiftedTyCon tc = return () - | otherwise - = do tc' <- maybeV notVectErr (lookupTyCon tc) - if tc == tc' then return () else noV idErr + = do + { tc' <- lookupTyCon tc + ; case tc' of + Nothing -> return () + Just _ -> noV idErr + } where - notVectErr = text "identityConvTyCon: no vectorised version for type constructor" <+> ppr tc - idErr = text "identityConvTyCon: type constructor contains parallel arrays" <+> ppr tc + idErr = text "identityConvTyCon: type constructor contains parallel arrays" <+> ppr tc diff --git a/compiler/vectorise/Vectorise/Env.hs b/compiler/vectorise/Vectorise/Env.hs index d58ec8f800..345b4ba1c3 100644 --- a/compiler/vectorise/Vectorise/Env.hs +++ b/compiler/vectorise/Vectorise/Env.hs @@ -31,7 +31,7 @@ import Name import NameEnv import FastString import TysPrim -import TysWiredIn +--import TysWiredIn import Data.Maybe @@ -60,7 +60,8 @@ data LocalEnv -- ^Mapping from tyvars to their PA dictionaries. , local_bind_name :: FastString - -- ^Local binding name. + -- ^Local binding name. This is only used to generate better names for hoisted + -- expressions. } -- |Create an empty local environment. @@ -84,35 +85,34 @@ data GlobalEnv -- ^Mapping from global variables to their vectorised versions — aka the /vectorisation -- map/. - , global_vect_decls :: VarEnv (Type, CoreExpr) - -- ^Mapping from global variables that have a vectorisation declaration to the right-hand - -- side of that declaration and its type. This mapping only applies to non-scalar - -- vectorisation declarations. All variables with a scalar vectorisation declaration are - -- mentioned in 'global_scalars_vars'. - - , global_scalar_vars :: VarSet - -- ^Purely scalar variables. Code which mentions only these variables doesn't have to be - -- lifted. This includes variables from the current module that have a scalar - -- vectorisation declaration and those that the vectoriser determines to be scalar. - - , global_scalar_tycons :: NameSet - -- ^Type constructors whose values can only contain scalar data. This includes type - -- constructors that appear in a 'VECTORISE SCALAR type' pragma or 'VECTORISE type' pragma - -- *without* a right-hand side in the current or an imported module as well as type - -- constructors that are automatically identified as scalar by the vectoriser (in - -- 'Vectorise.Type.Env'). Scalar code may only operate on such data. + , global_parallel_vars :: VarSet + -- ^The domain of 'global_vars'. -- - -- NB: Not all type constructors in that set are members of the 'Scalar' type class - -- (which can be trivially marshalled across scalar code boundaries). - - , global_novect_vars :: VarSet - -- ^Variables that are not vectorised. (They may be referenced in the right-hand sides - -- of vectorisation declarations, though.) + -- This information is not redundant as it is impossible to extract the domain from a + -- 'VarEnv' (which is keyed on uniques alone). Moreover, we have mapped variables that + -- do not involve parallelism — e.g., the workers of vectorised, but scalar data types. + -- In addition, workers of parallel data types that we could not vectorise also need to + -- be tracked. + + , global_vect_decls :: VarEnv (Maybe (Type, CoreExpr)) + -- ^Mapping from global variables that have a vectorisation declaration to the right-hand + -- side of that declaration and its type and mapping variables that have NOVECTORISE + -- declarations to 'Nothing'. , global_tycons :: NameEnv TyCon - -- ^Mapping from TyCons to their vectorised versions. - -- TyCons which do not have to be vectorised are mapped to themselves. + -- ^Mapping from TyCons to their vectorised versions. The vectorised version will be + -- identical to the original version if it is not changed by vectorisation. In any case, + -- if a tycon appears in the domain of this mapping, it was successfully vectorised. + , global_parallel_tycons :: NameSet + -- ^Type constructors whose definition directly or indirectly includes a parallel type, + -- such as '[::]'. + -- + -- NB: This information is not redundant as some types have got a mapping in + -- 'global_tycons' (to a type other than themselves) and are still not parallel. An + -- example is '(->)'. Moreover, some types have *not* got a mapping in 'global_tycons' + -- (because they couldn't be vectorised), but still contain parallel types. + , global_datacons :: NameEnv DataCon -- ^Mapping from DataCons to their vectorised versions. @@ -129,7 +129,7 @@ data GlobalEnv -- ^External package inst-env & home-package inst-env for family instances. , global_bindings :: [(Var, CoreExpr)] - -- ^Hoisted bindings. + -- ^Hoisted bindings — temporary storage for toplevel bindings during code gen. } -- |Create an initial global environment. @@ -143,9 +143,8 @@ initGlobalEnv info vectDecls instEnvs famInstEnvs = GlobalEnv { global_vars = mapVarEnv snd $ vectInfoVar info , global_vect_decls = mkVarEnv vects - , global_scalar_vars = vectInfoScalarVars info `extendVarSetList` scalar_vars - , global_scalar_tycons = vectInfoScalarTyCons info `addListToNameSet` scalar_tycons - , global_novect_vars = mkVarSet novects + , global_parallel_vars = vectInfoParallelVars info + , global_parallel_tycons = vectInfoParallelTyCons info , global_tycons = mapNameEnv snd $ vectInfoTyCon info , global_datacons = mapNameEnv snd $ vectInfoDataCon info , global_pa_funs = emptyNameEnv @@ -155,23 +154,12 @@ initGlobalEnv info vectDecls instEnvs famInstEnvs , global_bindings = [] } where - vects = [(var, (ty, exp)) | Vect var (Just exp@(Var rhs_var)) <- vectDecls - , let ty = varType rhs_var] + vects = [(var, Just (ty, exp)) | Vect var exp@(Var rhs_var) <- vectDecls + , let ty = varType rhs_var] ++ -- FIXME: we currently only allow RHSes consisting of a -- single variable to be able to obtain the type without -- inference — see also 'TcBinds.tcVect' - scalar_vars = [var | Vect var Nothing <- vectDecls] ++ - [var | VectInst var <- vectDecls] ++ - [dataConWrapId doubleDataCon, dataConWrapId floatDataCon, dataConWrapId intDataCon] -- TODO: fix this hack - novects = [var | NoVect var <- vectDecls] - scalar_tycons = [tyConName tycon | VectType True tycon Nothing <- vectDecls] ++ - [tyConName tycon | VectType _ tycon (Just tycon') <- vectDecls - , tycon == tycon'] ++ - map tyConName [doublePrimTyCon, intPrimTyCon, floatPrimTyCon] -- TODO: fix this hack - -- - for 'VectType True tycon Nothing', we checked that the type does not - -- contain arrays (or type variables that could be instatiated to arrays) - -- - for 'VectType _ tycon (Just tycon')', where the two tycons are the same, - -- we also know that there can be no embedded arrays + [(var, Nothing) | NoVect var <- vectDecls] -- Operators on Global Environments ------------------------------------------- @@ -210,11 +198,11 @@ setPRFunsEnv ps genv = genv { global_pr_funs = mkNameEnv ps } modVectInfo :: GlobalEnv -> [Id] -> [TyCon] -> [CoreVect]-> VectInfo -> VectInfo modVectInfo env mg_ids mg_tyCons vectDecls info = info - { vectInfoVar = mk_env ids (global_vars env) - , vectInfoTyCon = mk_env tyCons (global_tycons env) - , vectInfoDataCon = mk_env dataCons (global_datacons env) - , vectInfoScalarVars = global_scalar_vars env `minusVarSet` vectInfoScalarVars info - , vectInfoScalarTyCons = global_scalar_tycons env `minusNameSet` vectInfoScalarTyCons info + { vectInfoVar = mk_env ids (global_vars env) + , vectInfoTyCon = mk_env tyCons (global_tycons env) + , vectInfoDataCon = mk_env dataCons (global_datacons env) + , vectInfoParallelVars = global_parallel_vars env `minusVarSet` vectInfoParallelVars info + , vectInfoParallelTyCons = global_parallel_tycons env `minusNameSet` vectInfoParallelTyCons info } where vectIds = [id | Vect id _ <- vectDecls] ++ diff --git a/compiler/vectorise/Vectorise/Exp.hs b/compiler/vectorise/Vectorise/Exp.hs index 8c5ef0045d..88f123210b 100644 --- a/compiler/vectorise/Vectorise/Exp.hs +++ b/compiler/vectorise/Vectorise/Exp.hs @@ -3,10 +3,9 @@ -- |Vectorisation of expressions. module Vectorise.Exp - ( -- * Vectorise polymorphic expressions with special cases for right-hand sides of particular - -- variable bindings - vectPolyExpr - , vectDictExpr + ( -- * Vectorise right-hand sides of toplevel bindings + vectTopExpr + , vectTopExprs , vectScalarFun , vectScalarDFun ) @@ -32,393 +31,404 @@ import DataCon import TyCon import TcType import Type -import PrelNames +import TypeRep import Var import VarEnv import VarSet +import NameSet import Id import BasicTypes( isStrongLoopBreaker ) import Literal -import TysWiredIn import TysPrim import Outputable import FastString +import DynFlags +import Util +import MonadUtils + import Control.Monad -import Control.Applicative import Data.Maybe import Data.List -import TcRnMonad (doptM) -import DynFlags -import Util -- Main entry point to vectorise expressions ----------------------------------- --- |Vectorise a polymorphic expression. +-- |Vectorise a polymorphic expression that forms a *non-recursive* binding. +-- +-- Return 'Nothing' if the expression is scalar; otherwise, the first component of the result +-- (which is of type 'Bool') indicates whether the expression is parallel (i.e., whether it is +-- tagged as 'VIParr'). -- --- If not yet available, precompute vectorisation avoidance information before vectorising. If --- the vectorisation avoidance optimisation is enabled, also use the vectorisation avoidance --- information to encapsulated subexpression that do not need to be vectorised. +-- We have got the non-recursive case as a special case as it doesn't require to compute +-- vectorisation information twice. -- -vectPolyExpr :: Bool -> [Var] -> CoreExprWithFVs -> Maybe VITree - -> VM (Inline, Bool, VExpr) - -- precompute vectorisation avoidance information (and possibly encapsulated subexpressions) -vectPolyExpr loop_breaker recFns expr Nothing +vectTopExpr :: Var -> CoreExpr -> VM (Maybe (Bool, Inline, CoreExpr)) +vectTopExpr var expr = do - { vectAvoidance <- liftDs $ doptM Opt_AvoidVect - ; vi <- vectAvoidInfo expr - ; (expr', vi') <- - if vectAvoidance - then do - { (expr', vi') <- encapsulateScalars vi expr - ; traceVt "vectPolyExpr encapsulated:" (ppr $ deAnnotate expr') - ; return (expr', vi') - } - else return (expr, vi) - ; vectPolyExpr loop_breaker recFns expr' (Just vi') + { exprVI <- encapsulateScalars <=< vectAvoidInfo emptyVarSet . freeVars $ expr + ; if isVIEncaps exprVI + then + return Nothing + else do + { vExpr <- closedV $ + inBind var $ + vectAnnPolyExpr False exprVI + ; inline <- computeInline exprVI + ; return $ Just (isVIParr exprVI, inline, vectorised vExpr) + } } - -- traverse through ticks -vectPolyExpr loop_breaker recFns (_, AnnTick tickish expr) (Just (VITNode _ [vit])) - = do - { (inline, isScalarFn, expr') <- vectPolyExpr loop_breaker recFns expr (Just vit) - ; return (inline, isScalarFn, vTick tickish expr') - } +-- Compute the inlining hint for the right-hand side of a top-level binding. +-- +computeInline :: CoreExprWithVectInfo -> VM Inline +computeInline ((_, VIDict), _) = return $ DontInline +computeInline (_, AnnTick _ expr) = computeInline expr +computeInline expr@(_, AnnLam _ _) = Inline <$> polyArity tvs + where + (tvs, _) = collectAnnTypeBinders expr +computeInline _expr = return $ DontInline - -- collect and vectorise type abstractions; then, descent into the body -vectPolyExpr loop_breaker recFns expr (Just vit) - = do - { let (tvs, mono) = collectAnnTypeBinders expr - vit' = stripLevels (length tvs) vit - ; arity <- polyArity tvs - ; polyAbstract tvs $ \args -> - do - { (inline, isScalarFn, mono') <- vectFnExpr False loop_breaker recFns mono vit' - ; return (addInlineArity inline arity, isScalarFn, mapVect (mkLams $ tvs ++ args) mono') - } +-- |Vectorise a recursive group of top-level polymorphic expressions. +-- +-- Return 'Nothing' if the expression group is scalar; otherwise, the first component of the result +-- (which is of type 'Bool') indicates whether the expressions are parallel (i.e., whether they are +-- tagged as 'VIParr'). +-- +vectTopExprs :: [(Var, CoreExpr)] -> VM (Maybe (Bool, [(Inline, CoreExpr)])) +vectTopExprs binds + = do + { exprVIs <- mapM (vectAvoidAndEncapsulate emptyVarSet) exprs + ; if all isVIEncaps exprVIs + then + return Nothing + else do + { (areVIParr, vExprs) <- unzip <$> mapM encapsulateAndVect binds + ; return $ Just (or areVIParr, vExprs) + } } where - stripLevels 0 vit = vit - stripLevels n (VITNode _ [vit]) = stripLevels (n - 1) vit - stripLevels _ vit = pprPanic "vectPolyExpr: stripLevels:" (text (show vit)) + (vars, exprs) = unzip binds + + vectAvoidAndEncapsulate pvs = encapsulateScalars <=< vectAvoidInfo pvs . freeVars + + encapsulateAndVect (var, expr) + = do + { exprVI <- vectAvoidAndEncapsulate (mkVarSet vars) expr + ; vExpr <- closedV $ + inBind var $ + vectAnnPolyExpr (isStrongLoopBreaker $ idOccInfo var) exprVI + ; inline <- computeInline exprVI + ; return (isVIParr exprVI, (inline, vectorised vExpr)) + } + +-- |Vectorise a polymorphic expression annotated with vectorisation information. +-- +-- The special case of dictionary functions is currently handled separately. (Would be neater to +-- integrate them, though!) +-- +vectAnnPolyExpr :: Bool -> CoreExprWithVectInfo -> VM VExpr +vectAnnPolyExpr loop_breaker (_, AnnTick tickish expr) + -- traverse through ticks + = vTick tickish <$> vectAnnPolyExpr loop_breaker expr +vectAnnPolyExpr loop_breaker expr + | isVIDict expr + -- special case the right-hand side of dictionary functions + = (, undefined) <$> vectDictExpr (deAnnotate expr) + | otherwise + -- collect and vectorise type abstractions; then, descent into the body + = polyAbstract tvs $ \args -> + mapVect (mkLams $ tvs ++ args) <$> vectFnExpr False loop_breaker mono + where + (tvs, mono) = collectAnnTypeBinders expr -- Encapsulate every purely sequential subexpression of a (potentially) parallel expression into a --- into a lambda abstraction over all its free variables followed by the corresponding application --- to those variables. We can, then, avoid the vectorisation of the ensapsulated subexpressions. +-- lambda abstraction over all its free variables followed by the corresponding application to those +-- variables. We can, then, avoid the vectorisation of the ensapsulated subexpressions. -- -- Preconditions: -- -- * All free variables and the result type must be /simple/ types. --- * The expression is sufficientlt complex (top warrant special treatment). For now, that is +-- * The expression is sufficiently complex (to warrant special treatment). For now, that is -- every expression that is not constant and contains at least one operation. -- -encapsulateScalars :: VITree -> CoreExprWithFVs -> VM (CoreExprWithFVs, VITree) -encapsulateScalars vit ce@(_, AnnType _ty) - = return (ce, vit) - -encapsulateScalars vit ce@(_, AnnVar _v) - = return (ce, vit) - -encapsulateScalars vit ce@(_, AnnLit _) - = return (ce, vit) - -encapsulateScalars (VITNode vi [vit]) (fvs, AnnTick tck expr) - = do { (extExpr, vit') <- encapsulateScalars vit expr - ; return ((fvs, AnnTick tck extExpr), VITNode vi [vit']) - } - -encapsulateScalars _ (_fvs, AnnTick _tck _expr) - = panic "encapsulateScalar AnnTick doesn't match up" - -encapsulateScalars (VITNode vi [vit]) ce@(fvs, AnnLam bndr expr) - = do { varsS <- varsSimple fvs - ; case (vi, varsS) of - (VISimple, True) -> do { let (e', vit') = liftSimple vit ce - ; return (e', vit') - } - _ -> do { (extExpr, vit') <- encapsulateScalars vit expr - ; return ((fvs, AnnLam bndr extExpr), VITNode vi [vit']) - } - } - -encapsulateScalars _ (_fvs, AnnLam _bndr _expr) - = panic "encapsulateScalars AnnLam doesn't match up" - -encapsulateScalars vt@(VITNode vi [vit1, vit2]) ce@(fvs, AnnApp ce1 ce2) - = do { varsS <- varsSimple fvs - ; case (vi, varsS) of - (VISimple, True) -> do { let (e', vt') = liftSimple vt ce - -- ; checkTreeAnnM vt' e' - -- ; traceVt "Passed checkTree test!!" (ppr $ deAnnotate e') - ; return (e', vt') - } - _ -> do { (etaCe1, vit1') <- encapsulateScalars vit1 ce1 - ; (etaCe2, vit2') <- encapsulateScalars vit2 ce2 - ; return ((fvs, AnnApp etaCe1 etaCe2), VITNode vi [vit1', vit2']) - } - } - -encapsulateScalars _ (_fvs, AnnApp _ce1 _ce2) - = panic "encapsulateScalars AnnApp doesn't match up" - -encapsulateScalars vt@(VITNode vi (scrutVit : altVits)) ce@(fvs, AnnCase scrut bndr ty alts) - = do { varsS <- varsSimple fvs - ; case (vi, varsS) of - (VISimple, True) -> return $ liftSimple vt ce - _ -> do { (extScrut, scrutVit') <- encapsulateScalars scrutVit scrut - ; extAltsVits <- zipWithM expAlt altVits alts - ; let (extAlts, altVits') = unzip extAltsVits - ; return ((fvs, AnnCase extScrut bndr ty extAlts), VITNode vi (scrutVit': altVits')) - } - } +encapsulateScalars :: CoreExprWithVectInfo -> VM CoreExprWithVectInfo +encapsulateScalars ce@(_, AnnType _ty) + = return ce +encapsulateScalars ce@((_, VISimple), AnnVar v) + | isFunTy . varType $ v -- NB: diverts from the paper: encapsulate scalar function types + = liftSimpleAndCase ce +encapsulateScalars ce@(_, AnnVar _v) + = return ce +encapsulateScalars ce@(_, AnnLit _) + = return ce +encapsulateScalars ((fvs, vi), AnnTick tck expr) + = do + { encExpr <- encapsulateScalars expr + ; return ((fvs, vi), AnnTick tck encExpr) + } +encapsulateScalars ce@((fvs, vi), AnnLam bndr expr) + = do + { varsS <- allScalarVarTypeSet fvs + ; case (vi, varsS) of + (VISimple, True) -> liftSimpleAndCase ce + _ -> do + { encExpr <- encapsulateScalars expr + ; return ((fvs, vi), AnnLam bndr encExpr) + } + } +encapsulateScalars ce@((fvs, vi), AnnApp ce1 ce2) + = do + { varsS <- allScalarVarTypeSet fvs + ; case (vi, varsS) of + (VISimple, True) -> liftSimpleAndCase ce + _ -> do + { encCe1 <- encapsulateScalars ce1 + ; encCe2 <- encapsulateScalars ce2 + ; return ((fvs, vi), AnnApp encCe1 encCe2) + } + } +encapsulateScalars ce@((fvs, vi), AnnCase scrut bndr ty alts) + = do + { varsS <- allScalarVarTypeSet fvs + ; case (vi, varsS) of + (VISimple, True) -> liftSimpleAndCase ce + _ -> do + { encScrut <- encapsulateScalars scrut + ; encAlts <- mapM encAlt alts + ; return ((fvs, vi), AnnCase encScrut bndr ty encAlts) + } + } where - expAlt vt (con, bndrs, expr) - = do { (extExpr, vt') <- encapsulateScalars vt expr - ; return ((con, bndrs, extExpr), vt') - } - -encapsulateScalars _ (_fvs, AnnCase _scrut _bndr _ty _alts) - = panic "encapsulateScalars AnnCase doesn't match up" - -encapsulateScalars vt@(VITNode vi [vt1, vt2]) ce@(fvs, AnnLet (AnnNonRec bndr expr1) expr2) - = do { varsS <- varsSimple fvs - ; case (vi, varsS) of - (VISimple, True) -> return $ liftSimple vt ce - _ -> do { (extExpr1, vt1') <- encapsulateScalars vt1 expr1 - ; (extExpr2, vt2') <- encapsulateScalars vt2 expr2 - ; return ((fvs, AnnLet (AnnNonRec bndr extExpr1) extExpr2), VITNode vi [vt1', vt2']) - } - } - -encapsulateScalars _ (_fvs, AnnLet (AnnNonRec _bndr _expr1) _expr2) - = panic "encapsulateScalars AnnLet nonrec doesn't match up" - -encapsulateScalars vt@(VITNode vi (vtB : vtBnds)) ce@(fvs, AnnLet (AnnRec bndngs) expr) - = do { varsS <- varsSimple fvs - ; case (vi, varsS) of - (VISimple, True) -> return $ liftSimple vt ce - _ -> do { extBndsVts <- zipWithM expBndg vtBnds bndngs - ; let (extBnds, vtBnds') = unzip extBndsVts - ; (extExpr, vtB') <- encapsulateScalars vtB expr - ; let vt' = VITNode vi (vtB':vtBnds') - ; return ((fvs, AnnLet (AnnRec extBnds) extExpr), vt') - } - } - where - expBndg vit (bndr, expr) - = do { (extExpr, vit') <- encapsulateScalars vit expr - ; return ((bndr, extExpr), vit') - } - -encapsulateScalars _ (_fvs, AnnLet (AnnRec _) _expr2) - = panic "encapsulateScalars AnnLet rec doesn't match up" - -encapsulateScalars (VITNode vi [vit]) (fvs, AnnCast expr coercion) - = do { (extExpr, vit') <- encapsulateScalars vit expr - ; return ((fvs, AnnCast extExpr coercion), VITNode vi [vit']) - } - -encapsulateScalars _ (_fvs, AnnCast _expr _coercion) - = panic "encapsulateScalars AnnCast rec doesn't match up" - -encapsulateScalars _ _ - = panic "encapsulateScalars case not handled" + encAlt (con, bndrs, expr) = (con, bndrs,) <$> encapsulateScalars expr +encapsulateScalars ce@((fvs, vi), AnnLet (AnnNonRec bndr expr1) expr2) + = do + { varsS <- allScalarVarTypeSet fvs + ; case (vi, varsS) of + (VISimple, True) -> liftSimpleAndCase ce + _ -> do + { encExpr1 <- encapsulateScalars expr1 + ; encExpr2 <- encapsulateScalars expr2 + ; return ((fvs, vi), AnnLet (AnnNonRec bndr encExpr1) encExpr2) + } + } +encapsulateScalars ce@((fvs, vi), AnnLet (AnnRec binds) expr) + = do + { varsS <- allScalarVarTypeSet fvs + ; case (vi, varsS) of + (VISimple, True) -> liftSimpleAndCase ce + _ -> do + { encBinds <- mapM encBind binds + ; encExpr <- encapsulateScalars expr + ; return ((fvs, vi), AnnLet (AnnRec encBinds) encExpr) + } + } + where + encBind (bndr, expr) = (bndr,) <$> encapsulateScalars expr +encapsulateScalars ((fvs, vi), AnnCast expr coercion) + = do + { encExpr <- encapsulateScalars expr + ; return ((fvs, vi), AnnCast encExpr coercion) + } +encapsulateScalars _ + = panic "Vectorise.Exp.encapsulateScalars: unknown constructor" --- Lambda-lift the given expression and apply it to the abstracted free variables. +-- Lambda-lift the given simple expression and apply it to the abstracted free variables. -- --- If the expression is a case expression scrutinising anything but a primitive type, then lift +-- If the expression is a case expression scrutinising anything, but a scalar type, then lift -- each alternative individually. -- -liftSimple :: VITree -> CoreExprWithFVs -> (CoreExprWithFVs, VITree) -liftSimple (VITNode vi (scrutVit : altVits)) (fvs, AnnCase expr bndr t alts) - | Just (c,_) <- splitTyConApp_maybe (exprType $ deAnnotate $ expr), - (not $ elem c [boolTyCon, intTyCon, doubleTyCon, floatTyCon]) -- FIXME: shouldn't be hardcoded - = ((fvs, AnnCase expr bndr t alts'), VITNode vi (scrutVit : altVits')) - where - (alts', altVits') = unzip $ map (\(ac,bndrs, (alt, avi)) -> ((ac,bndrs,alt), avi)) $ - zipWith (\(ac, bndrs, aex) -> \altVi -> (ac, bndrs, liftSimple altVi aex)) alts altVits - -liftSimple viTree ae@(fvs, _annEx) - = (mkAnnApps (mkAnnLams ae vars) vars, viTree') +liftSimpleAndCase :: CoreExprWithVectInfo -> VM CoreExprWithVectInfo +liftSimpleAndCase aexpr@((fvs, _vi), AnnCase expr bndr t alts) + = do + { vi <- vectAvoidInfoTypeOf expr + ; if (vi == VISimple) + then + return $ liftSimple aexpr -- if the scrutinee is scalar, we need no special treatment + else do + { alts' <- mapM (\(ac, bndrs, aexpr) -> (ac, bndrs,) <$> liftSimpleAndCase aexpr) alts + ; return ((fvs, vi), AnnCase expr bndr t alts') + } + } +liftSimpleAndCase aexpr = return $ liftSimple aexpr + +liftSimple :: CoreExprWithVectInfo -> CoreExprWithVectInfo +liftSimple ((fvs, vi), expr) + = ASSERT(vi == VISimple) + mkAnnApps (mkAnnLams vars fvs expr) vars where - mkViTreeLams (VITNode _ vits) [] = VITNode VIEncaps vits - mkViTreeLams vi (_:vs) = VITNode VIEncaps [mkViTreeLams vi vs] + vars = varSetElems fvs - mkViTreeApps vi [] = vi - mkViTreeApps vi (_:vs) = VITNode VISimple [mkViTreeApps vi vs, VITNode VISimple []] - - vars = varSetElems fvs - viTree' = mkViTreeApps (mkViTreeLams viTree vars) vars - - mkAnnLam :: bndr -> AnnExpr bndr VarSet -> AnnExpr' bndr VarSet - mkAnnLam bndr ce = AnnLam bndr ce - - mkAnnLams:: CoreExprWithFVs -> [Var] -> CoreExprWithFVs - mkAnnLams (fv, aex') [] = (fv, aex') -- fv should be empty. check! - mkAnnLams (fv, aex') (v:vs) = mkAnnLams (delVarSet fv v, (mkAnnLam v ((delVarSet fv v), aex'))) vs - - mkAnnApp :: (AnnExpr bndr VarSet) -> Var -> (AnnExpr' bndr VarSet) - mkAnnApp aex v = AnnApp aex (unitVarSet v, (AnnVar v)) + mkAnnLams :: [Var] -> VarSet -> AnnExpr' Var (VarSet, VectAvoidInfo) -> CoreExprWithVectInfo + mkAnnLams [] fvs expr = ASSERT(isEmptyVarSet fvs) + ((emptyVarSet, VIEncaps), expr) + mkAnnLams (v:vs) fvs expr = mkAnnLams vs (fvs `delVarSet` v) (AnnLam v ((fvs, VIEncaps), expr)) - mkAnnApps:: CoreExprWithFVs -> [Var] -> CoreExprWithFVs - mkAnnApps (fv, aex') [] = (fv, aex') - mkAnnApps ae (v:vs) = - let - (fv, aex') = mkAnnApps ae vs - in (extendVarSet fv v, mkAnnApp (fv, aex') v) + mkAnnApps :: CoreExprWithVectInfo -> [Var] -> CoreExprWithVectInfo + mkAnnApps aexpr [] = aexpr + mkAnnApps aexpr (v:vs) = mkAnnApps (mkAnnApp aexpr v) vs + + mkAnnApp :: CoreExprWithVectInfo -> Var -> CoreExprWithVectInfo + mkAnnApp aexpr@((fvs, _vi), _expr) v + = ((fvs `extendVarSet` v, VISimple), AnnApp aexpr ((unitVarSet v, VISimple), AnnVar v)) -- |Vectorise an expression. -- -vectExpr :: CoreExprWithFVs -> VITree -> VM VExpr --- vectExpr e vi | not (checkTree vi (deAnnotate e)) --- = pprPanic "vectExpr" (ppr $ deAnnotate e) - -vectExpr (_, AnnVar v) _ +vectExpr :: CoreExprWithVectInfo -> VM VExpr + +vectExpr (_, AnnVar v) = vectVar v -vectExpr (_, AnnLit lit) _ +vectExpr (_, AnnLit lit) = vectConst $ Lit lit -vectExpr e@(_, AnnLam bndr _) vt - | isId bndr = (\(_, _, ve) -> ve) <$> vectFnExpr True False [] e vt - | otherwise = do dflags <- getDynFlags - cantVectorise dflags "Unexpected type lambda (vectExpr)" (ppr (deAnnotate e)) +vectExpr e@(_, AnnLam bndr _) + | isId bndr = vectFnExpr True False e + | otherwise + = do + { dflags <- getDynFlags + ; cantVectorise dflags "Unexpected type lambda (vectExpr)" $ ppr (deAnnotate e) + } -- SPECIAL CASE: Vectorise/lift 'patError @ ty err' by only vectorising/lifting the type 'ty'; -- its only purpose is to abort the program, but we need to adjust the type to keep CoreLint -- happy. -- FIXME: can't be do this with a VECTORISE pragma on 'pAT_ERROR_ID' now? -vectExpr (_, AnnApp (_, AnnApp (_, AnnVar v) (_, AnnType ty)) err) _ +vectExpr (_, AnnApp (_, AnnApp (_, AnnVar v) (_, AnnType ty)) err) | v == pAT_ERROR_ID - = do { (vty, lty) <- vectAndLiftType ty - ; return (mkCoreApps (Var v) [Type vty, err'], mkCoreApps (Var v) [Type lty, err']) - } + = do + { (vty, lty) <- vectAndLiftType ty + ; return (mkCoreApps (Var v) [Type vty, err'], mkCoreApps (Var v) [Type lty, err']) + } where err' = deAnnotate err -- type application (handle multiple consecutive type applications simultaneously to ensure the -- PA dictionaries are put at the right places) -vectExpr e@(_, AnnApp _ arg) (VITNode _ [_, _]) +vectExpr e@(_, AnnApp _ arg) | isAnnTypeArg arg = vectPolyApp e - - -- 'Int', 'Float', or 'Double' literal - -- FIXME: this needs to be generalised -vectExpr (_, AnnApp (_, AnnVar v) (_, AnnLit lit)) _ - | Just con <- isDataConId_maybe v - , is_special_con con + + -- Lifted literal +vectExpr (_, AnnApp (_, AnnVar v) (_, AnnLit lit)) + | Just _con <- isDataConId_maybe v = do - let vexpr = App (Var v) (Lit lit) - lexpr <- liftPD vexpr - return (vexpr, lexpr) - where - is_special_con con = con `elem` [intDataCon, floatDataCon, doubleDataCon] + { let vexpr = App (Var v) (Lit lit) + ; lexpr <- liftPD vexpr + ; return (vexpr, lexpr) + } -- value application (dictionary or user value) -vectExpr e@(_, AnnApp fn arg) (VITNode _ [vit1, vit2]) +vectExpr e@(_, AnnApp fn arg) | isPredTy arg_ty -- dictionary application (whose result is not a dictionary) = vectPolyApp e | otherwise -- user value - = do { -- vectorise the types - ; varg_ty <- vectType arg_ty - ; vres_ty <- vectType res_ty + = do + { -- vectorise the types + ; varg_ty <- vectType arg_ty + ; vres_ty <- vectType res_ty - -- vectorise the function and argument expression - ; vfn <- vectExpr fn vit1 - ; varg <- vectExpr arg vit2 + -- vectorise the function and argument expression + ; vfn <- vectExpr fn + ; varg <- vectExpr arg - -- the vectorised function is a closure; apply it to the vectorised argument - ; mkClosureApp varg_ty vres_ty vfn varg - } + -- the vectorised function is a closure; apply it to the vectorised argument + ; mkClosureApp varg_ty vres_ty vfn varg + } where (arg_ty, res_ty) = splitFunTy . exprType $ deAnnotate fn -vectExpr (_, AnnCase scrut bndr ty alts) vt +vectExpr (_, AnnCase scrut bndr ty alts) | Just (tycon, ty_args) <- splitTyConApp_maybe scrut_ty , isAlgTyCon tycon - = vectAlgCase tycon ty_args scrut bndr ty alts vt - | otherwise = do dflags <- getDynFlags - cantVectorise dflags "Can't vectorise expression" (ppr scrut_ty) + = vectAlgCase tycon ty_args scrut bndr ty alts + | otherwise + = do + { dflags <- getDynFlags + ; cantVectorise dflags "Can't vectorise expression (no algebraic type constructor)" $ + ppr scrut_ty + } where scrut_ty = exprType (deAnnotate scrut) -vectExpr (_, AnnLet (AnnNonRec bndr rhs) body) (VITNode _ [vt1, vt2]) +vectExpr (_, AnnLet (AnnNonRec bndr rhs) body) = do - vrhs <- localV . inBind bndr . liftM (\(_,_,z)->z) $ vectPolyExpr False [] rhs (Just vt1) - (vbndr, vbody) <- vectBndrIn bndr (vectExpr body vt2) - return $ vLet (vNonRec vbndr vrhs) vbody + { vrhs <- localV $ + inBind bndr $ + vectAnnPolyExpr False rhs + ; (vbndr, vbody) <- vectBndrIn bndr (vectExpr body) + ; return $ vLet (vNonRec vbndr vrhs) vbody + } -vectExpr (_, AnnLet (AnnRec bs) body) (VITNode _ (vtB : vtBnds)) +vectExpr (_, AnnLet (AnnRec bs) body) = do - (vbndrs, (vrhss, vbody)) <- vectBndrsIn bndrs + { (vbndrs, (vrhss, vbody)) <- vectBndrsIn bndrs $ liftM2 (,) - (zipWith3M vect_rhs bndrs rhss vtBnds) - (vectExpr body vtB) - return $ vLet (vRec vbndrs vrhss) vbody + (zipWithM vect_rhs bndrs rhss) + (vectExpr body) + ; return $ vLet (vRec vbndrs vrhss) vbody + } where (bndrs, rhss) = unzip bs - vect_rhs bndr rhs vt = localV - . inBind bndr - . liftM (\(_,_,z)->z) - $ vectPolyExpr (isStrongLoopBreaker $ idOccInfo bndr) [] rhs (Just vt) - zipWith3M f xs ys zs = zipWithM (\x -> \(y,z) -> (f x y z)) xs (zip ys zs) + vect_rhs bndr rhs = localV $ + inBind bndr $ + vectAnnPolyExpr (isStrongLoopBreaker $ idOccInfo bndr) rhs -vectExpr (_, AnnTick tickish expr) (VITNode _ [vit]) - = liftM (vTick tickish) (vectExpr expr vit) +vectExpr (_, AnnTick tickish expr) + = vTick tickish <$> vectExpr expr -vectExpr (_, AnnType ty) _ - = liftM vType (vectType ty) +vectExpr (_, AnnType ty) + = vType <$> vectType ty -vectExpr e vit = do dflags <- getDynFlags - cantVectorise dflags "Can't vectorise expression (vectExpr)" (ppr (deAnnotate e) $$ text (" " ++ show vit)) +vectExpr e + = do + { dflags <- getDynFlags + ; cantVectorise dflags "Can't vectorise expression (vectExpr)" $ ppr (deAnnotate e) + } --- |Vectorise an expression that *may* have an outer lambda abstraction. +-- |Vectorise an expression that *may* have an outer lambda abstraction. If the expression is marked +-- as encapsulated ('VIEncaps'), vectorise it as a scalar computation (using a generalised scalar +-- zip). -- -- We do not handle type variables at this point, as they will already have been stripped off by --- 'vectPolyExpr'. We also only have to worry about one set of dictionary arguments as we (1) only +-- 'vectPolyExpr'. We also only have to worry about one set of dictionary arguments as we (1) only -- deal with Haskell 2011 and (2) class selectors are vectorised elsewhere. -- -vectFnExpr :: Bool -- ^ If we process the RHS of a binding, whether that binding should - -- be inlined - -> Bool -- ^ Whether the binding is a loop breaker - -> [Var] -- ^ Names of function in same recursive binding group - -> CoreExprWithFVs -- ^ Expression to vectorise; must have an outer `AnnLam` - -> VITree - -> VM (Inline, Bool, VExpr) --- vectFnExpr _ _ _ e vi | not (checkTree vi (deAnnotate e)) --- = pprPanic "vectFnExpr" (ppr $ deAnnotate e) -vectFnExpr inline loop_breaker recFns expr@(_fvs, AnnLam bndr body) vt@(VITNode _ [vt']) - -- predicate abstraction: leave as a normal abstraction, but vectorise the predicate type +vectFnExpr :: Bool -- ^If we process the RHS of a binding, whether that binding + -- should be inlined + -> Bool -- ^Whether the binding is a loop breaker + -> CoreExprWithVectInfo -- ^Expression to vectorise; must have an outer `AnnLam` + -> VM VExpr +vectFnExpr inline loop_breaker expr@(_ann, AnnLam bndr body) + -- predicate abstraction: leave as a normal abstraction, but vectorise the predicate type | isId bndr && isPredTy (idType bndr) - = do { vBndr <- vectBndr bndr - ; (inline, isScalarFn, vbody) <- vectFnExpr inline loop_breaker recFns body vt' - ; return (inline, isScalarFn, mapVect (mkLams [vectorised vBndr]) vbody) - } - -- non-predicate abstraction: vectorise (try to vectorise as a scalar computation) + = do + { vBndr <- vectBndr bndr + ; vbody <- vectFnExpr inline loop_breaker body + ; return $ mapVect (mkLams [vectorised vBndr]) vbody + } + -- non-predicate abstraction: vectorise as a scalar computation + | isId bndr && isVIEncaps expr + = vectScalarFun . deAnnotate $ expr + -- non-predicate abstraction: vectorise as a non-scalar computation | isId bndr - = mark DontInline True (vectScalarFunMaybe (deAnnotate expr) vt) - `orElseV` - mark inlineMe False (vectLam inline loop_breaker expr vt) -vectFnExpr _ _ _ e vt - -- not an abstraction: vectorise as a vanilla expression - = mark DontInline False $ vectExpr e vt - -mark :: Inline -> Bool -> VM a -> VM (Inline, Bool, a) -mark b isScalarFn p = do { x <- p; return (b, isScalarFn, x) } + = vectLam inline loop_breaker expr +vectFnExpr _ _ expr + -- not an abstraction: vectorise as a vanilla expression + = vectExpr expr -- |Vectorise type and dictionary applications. -- -- These are always headed by a variable (as we don't support higher-rank polymorphism), but may --- involve two sets of type variables and dictionaries. Consider, +-- involve two sets of type variables and dictionaries. Consider, -- -- > class C a where -- > m :: D b => b -> a -- -- The type of 'm' is 'm :: forall a. C a => forall b. D b => b -> a'. -- -vectPolyApp :: CoreExprWithFVs -> VM VExpr +vectPolyApp :: CoreExprWithVectInfo -> VM VExpr vectPolyApp e0 = case e4 of (_, AnnVar var) @@ -530,21 +540,6 @@ vectDictExpr (Coercion coe) -- instead they become dictionaries of vectorised methods). We treat them differently, though see -- "Note [Scalar dfuns]" in 'Vectorise'. -- -vectScalarFunMaybe :: CoreExpr -- ^ Expression to be vectorised - -> VITree -- ^ Vectorisation information - -> VM VExpr -vectScalarFunMaybe expr (VITNode VIEncaps _) = vectScalarFun expr -vectScalarFunMaybe _expr _ = noV $ ptext (sLit "not a scalar function") - --- |Vectorise an expression of functional type by lifting it by an application of a member of the --- zipWith family (i.e., 'map', 'zipWith', zipWith3', etc.) This is only a valid strategy if the --- function does not contain parallel subcomputations and has only 'Scalar' types in its result and --- arguments — this is a predcondition for calling this function. --- --- Dictionary functions are also scalar functions (as dictionaries themselves are not vectorised, --- instead they become dictionaries of vectorised methods). We treat them differently, though see --- "Note [Scalar dfuns]" in 'Vectorise'. --- vectScalarFun :: CoreExpr -> VM VExpr vectScalarFun expr = do @@ -673,12 +668,11 @@ unVectDict ty e -- variables are passed explicit (as conventional arguments) into the body during closure -- construction. -- -vectLam :: Bool -- ^ When the RHS of a binding, whether that binding should be inlined. - -> Bool -- ^ Whether the binding is a loop breaker. - -> CoreExprWithFVs -- ^ Body of abstraction. - -> VITree +vectLam :: Bool -- ^ Should the RHS of a binding be inlined? + -> Bool -- ^ Whether the binding is a loop breaker. + -> CoreExprWithVectInfo -- ^ Body of abstraction. -> VM VExpr -vectLam inline loop_breaker expr@(fvs, AnnLam _ _) vi +vectLam inline loop_breaker expr@((fvs, _vi), AnnLam _ _) = do { let (bndrs, body) = collectAnnValBinders expr -- grab the in-scope type variables @@ -706,18 +700,13 @@ vectLam inline loop_breaker expr@(fvs, AnnLam _ _) vi . hoistPolyVExpr tyvars vfvs_dict' (maybe_inline arity) $ do { -- generate the vectorised body of the lambda abstraction ; lc <- builtin liftingContext - ; let viBody = stripLams expr vi - -- ; checkTreeAnnM vi expr - ; (vbndrs, vbody) <- vectBndrsIn (fvs_nondict ++ bndrs) (vectExpr body viBody) + ; (vbndrs, vbody) <- vectBndrsIn (fvs_nondict ++ bndrs) $ vectExpr body ; vbody' <- break_loop lc res_ty vbody ; return $ vLams lc vbndrs vbody' } } where - stripLams (_, AnnLam _ e) (VITNode _ [vt]) = stripLams e vt - stripLams _ vi = vi - maybe_inline n | inline = Inline n | otherwise = DontInline @@ -735,7 +724,7 @@ vectLam inline loop_breaker expr@(fvs, AnnLam _ _) vi (LitAlt (mkMachInt 0), [], empty)]) } | otherwise = return (ve, le) -vectLam _ _ _ _ = panic "vectLam" +vectLam _ _ _ = panic "Vectorise.Exp.vectLam: not a lambda" -- Vectorise an algebraic case expression. -- @@ -754,31 +743,31 @@ vectLam _ _ _ _ = panic "vectLam" -- -- FIXME: this is too lazy -vectAlgCase :: TyCon -> [Type] -> CoreExprWithFVs-> Var -> Type - -> [(AltCon, [Var], CoreExprWithFVs)] -> VITree +vectAlgCase :: TyCon -> [Type] -> CoreExprWithVectInfo -> Var -> Type + -> [(AltCon, [Var], CoreExprWithVectInfo)] -> VM VExpr -vectAlgCase _tycon _ty_args scrut bndr ty [(DEFAULT, [], body)] (VITNode _ (scrutVit : [altVit])) +vectAlgCase _tycon _ty_args scrut bndr ty [(DEFAULT, [], body)] = do - vscrut <- vectExpr scrut scrutVit + vscrut <- vectExpr scrut (vty, lty) <- vectAndLiftType ty - (vbndr, vbody) <- vectBndrIn bndr (vectExpr body altVit) + (vbndr, vbody) <- vectBndrIn bndr (vectExpr body) return $ vCaseDEFAULT vscrut vbndr vty lty vbody -vectAlgCase _tycon _ty_args scrut bndr ty [(DataAlt _, [], body)] (VITNode _ (scrutVit : [altVit])) +vectAlgCase _tycon _ty_args scrut bndr ty [(DataAlt _, [], body)] = do - vscrut <- vectExpr scrut scrutVit + vscrut <- vectExpr scrut (vty, lty) <- vectAndLiftType ty - (vbndr, vbody) <- vectBndrIn bndr (vectExpr body altVit) + (vbndr, vbody) <- vectBndrIn bndr (vectExpr body) return $ vCaseDEFAULT vscrut vbndr vty lty vbody -vectAlgCase _tycon _ty_args scrut bndr ty [(DataAlt dc, bndrs, body)] (VITNode _ (scrutVit : [altVit])) +vectAlgCase _tycon _ty_args scrut bndr ty [(DataAlt dc, bndrs, body)] = do (vty, lty) <- vectAndLiftType ty - vexpr <- vectExpr scrut scrutVit + vexpr <- vectExpr scrut (vbndr, (vbndrs, (vect_body, lift_body))) <- vect_scrut_bndr . vectBndrsIn bndrs - $ vectExpr body altVit + $ vectExpr body let (vect_bndrs, lift_bndrs) = unzip vbndrs (vscrut, lscrut, pdata_dc) <- pdataUnwrapScrut (vVar vbndr) vect_dc <- maybeV dataConErr (lookupDataCon dc) @@ -796,9 +785,9 @@ vectAlgCase _tycon _ty_args scrut bndr ty [(DataAlt dc, bndrs, body)] (VITNode _ dataConErr = (text "vectAlgCase: data constructor not vectorised" <+> ppr dc) -vectAlgCase tycon _ty_args scrut bndr ty alts (VITNode _ (scrutVit : altVits)) +vectAlgCase tycon _ty_args scrut bndr ty alts = do - vect_tc <- maybeV tyConErr (lookupTyCon tycon) + vect_tc <- vectTyCon tycon (vty, lty) <- vectAndLiftType ty let arity = length (tyConDataCons vect_tc) @@ -807,10 +796,10 @@ vectAlgCase tycon _ty_args scrut bndr ty alts (VITNode _ (scrutVit : altVits)) let sel = Var sel_bndr (vbndr, valts) <- vect_scrut_bndr - $ mapM (proc_alt arity sel vty lty) (zip alts' altVits) + $ mapM (proc_alt arity sel vty lty) alts' let (vect_dcs, vect_bndrss, lift_bndrss, vbodies) = unzip4 valts - vexpr <- vectExpr scrut scrutVit + vexpr <- vectExpr scrut (vect_scrut, lift_scrut, pdata_dc) <- pdataUnwrapScrut (vVar vbndr) let (vect_bodies, lift_bodies) = unzip vbodies @@ -829,8 +818,6 @@ vectAlgCase tycon _ty_args scrut bndr ty alts (VITNode _ (scrutVit : altVits)) return . vLet (vNonRec vbndr vexpr) $ (vect_case, lift_case) where - tyConErr = (text "vectAlgCase: type constructor not vectorised" <+> ppr tycon) - vect_scrut_bndr | isDeadBinder bndr = vectBndrNewIn bndr (fsLit "scrut") | otherwise = vectBndrIn bndr @@ -842,12 +829,12 @@ vectAlgCase tycon _ty_args scrut bndr ty alts (VITNode _ (scrutVit : altVits)) cmp _ DEFAULT = GT cmp _ _ = panic "vectAlgCase/cmp" - proc_alt arity sel _ lty ((DataAlt dc, bndrs, body), vi) + proc_alt arity sel _ lty (DataAlt dc, bndrs, body@((fvs_body, _), _)) = do vect_dc <- maybeV dataConErr (lookupDataCon dc) let ntag = dataConTagZ vect_dc tag = mkDataConTag vect_dc - fvs = freeVarsOf body `delVarSetList` bndrs + fvs = fvs_body `delVarSetList` bndrs sel_tags <- liftM (`App` sel) (builtin (selTags arity)) lc <- builtin liftingContext @@ -860,7 +847,7 @@ vectAlgCase tycon _ty_args scrut bndr ty alts (VITNode _ (scrutVit : altVits)) binds <- mapM (pack_var (Var lc) sel_tags tag) . filter isLocalId $ varSetElems fvs - (ve, le) <- vectExpr body vi + (ve, le) <- vectExpr body return (ve, Case (elems `App` sel) lc lty [(DEFAULT, [], (mkLets (concat binds) le))]) -- empty <- emptyPD vty @@ -892,9 +879,6 @@ vectAlgCase tycon _ty_args scrut bndr ty alts (VITNode _ (scrutVit : altVits)) _ -> return [] -vectAlgCase tycon _ty_args _scrut _bndr _ty _alts (VITNode _ _) - = pprPanic "vectAlgCase (mismatched node information)" (ppr tycon) - -- Support to compute information for vectorisation avoidance ------------------ @@ -905,202 +889,248 @@ data VectAvoidInfo = VIParr -- tree contains parallel computations | VISimple -- result type is scalar & no parallel subcomputation | VIComplex -- any result type, no parallel subcomputation | VIEncaps -- tree encapsulated by 'liftSimple' + | VIDict -- dictionary computation (never parallel) deriving (Eq, Show) --- Instead of integrating the vectorisation avoidance information into Core expression, we keep --- them in a separate tree (that structurally mirrors the Core expression that it annotates). +-- Core expression annotated with free variables and vectorisation-specific information. -- -data VITree = VITNode VectAvoidInfo [VITree] - deriving (Show) +type CoreExprWithVectInfo = AnnExpr Id (VarSet, VectAvoidInfo) --- Is any of the tree nodes a 'VIPArr' node? +-- Yield the type of an annotated core expression. -- -anyVIPArr :: [VITree] -> Bool -anyVIPArr = or . (map (\(VITNode vi _) -> vi == VIParr)) +annExprType :: AnnExpr Var ann -> Type +annExprType = exprType . deAnnotate --- Compute Core annotations to determine for which subexpressions we can avoid vectorisation +-- Project the vectorisation information from an annotated Core expression. -- --- FIXME: free scalar vars don't actually need to be passed through, since encapsulations makes sure, --- that there are no free variables in encapsulated lambda expressions -vectAvoidInfo :: CoreExprWithFVs -> VM VITree -vectAvoidInfo ce@(_, AnnVar v) - = do { vi <- vectAvoidInfoType $ exprType $ deAnnotate ce - ; viTrace ce vi [] - ; traceVt "vectAvoidInfo AnnVar" ((ppr v) <+> (ppr $ exprType $ deAnnotate ce)) - ; return $ VITNode vi [] - } +vectAvoidInfoOf :: CoreExprWithVectInfo -> VectAvoidInfo +vectAvoidInfoOf ((_, vi), _) = vi -vectAvoidInfo ce@(_, AnnLit _) - = do { vi <- vectAvoidInfoType $ exprType $ deAnnotate ce - ; viTrace ce vi [] - ; traceVt "vectAvoidInfo AnnLit" (ppr $ exprType $ deAnnotate ce) - ; return $ VITNode vi [] - } +-- Is this a 'VIParr' node? +-- +isVIParr :: CoreExprWithVectInfo -> Bool +isVIParr = (== VIParr) . vectAvoidInfoOf -vectAvoidInfo ce@(_, AnnApp e1 e2) - = do { vt1 <- vectAvoidInfo e1 - ; vt2 <- vectAvoidInfo e2 - ; vi <- if anyVIPArr [vt1, vt2] - then return VIParr - else vectAvoidInfoType $ exprType $ deAnnotate ce - ; viTrace ce vi [vt1, vt2] - ; return $ VITNode vi [vt1, vt2] - } +-- Is this a 'VIEncaps' node? +-- +isVIEncaps :: CoreExprWithVectInfo -> Bool +isVIEncaps = (== VIEncaps) . vectAvoidInfoOf -vectAvoidInfo ce@(_, AnnLam _var body) - = do { vt@(VITNode vi _) <- vectAvoidInfo body - ; viTrace ce vi [vt] - ; let resultVI | vi == VIParr = VIParr - | otherwise = VIComplex - ; return $ VITNode resultVI [vt] - } +-- Is this a 'VIDict' node? +-- +isVIDict :: CoreExprWithVectInfo -> Bool +isVIDict = (== VIDict) . vectAvoidInfoOf -vectAvoidInfo ce@(_, AnnLet (AnnNonRec _var expr) body) - = do { vtE <- vectAvoidInfo expr - ; vtB <- vectAvoidInfo body - ; vi <- if anyVIPArr [vtE, vtB] - then return VIParr - else vectAvoidInfoType $ exprType $ deAnnotate ce - ; viTrace ce vi [vtE, vtB] - ; return $ VITNode vi [vtE, vtB] - } +-- 'VIParr' if either argument is 'VIParr'; otherwise, the first argument. +-- +unlessVIParr :: VectAvoidInfo -> VectAvoidInfo -> VectAvoidInfo +unlessVIParr _ VIParr = VIParr +unlessVIParr vi _ = vi -vectAvoidInfo ce@(_, AnnLet (AnnRec bnds) body) - = do { let (_, exprs) = unzip bnds - ; vtBnds <- mapM (\e -> vectAvoidInfo e) exprs - ; if (anyVIPArr vtBnds) - then do { vtBnds' <- mapM (\e -> vectAvoidInfo e) exprs - ; vtB <- vectAvoidInfo body - ; return (VITNode VIParr (vtB: vtBnds')) - } - else do { vtB@(VITNode vib _) <- vectAvoidInfo body - ; ni <- if (vib == VIParr) - then return VIParr - else vectAvoidInfoType $ exprType $ deAnnotate ce - ; viTrace ce ni (vtB : vtBnds) - ; return $ VITNode ni (vtB : vtBnds) - } - } +-- 'VIParr' if either arguments vectorisation information is 'VIParr'; otherwise, the vectorisation +-- information of the first argument is produced. +-- +unlessVIParrExpr :: VectAvoidInfo -> CoreExprWithVectInfo -> VectAvoidInfo +infixl `unlessVIParrExpr` +unlessVIParrExpr e1 e2 = e1 `unlessVIParr` vectAvoidInfoOf e2 -vectAvoidInfo ce@(_, AnnCase expr _var _ty alts) - = do { vtExpr <- vectAvoidInfo expr - ; vtAlts <- mapM (\(_, _, e) -> vectAvoidInfo e) alts - ; ni <- if anyVIPArr (vtExpr : vtAlts) - then return VIParr - else vectAvoidInfoType $ exprType $ deAnnotate ce - ; viTrace ce ni (vtExpr : vtAlts) - ; return $ VITNode ni (vtExpr: vtAlts) - } +-- Compute Core annotations to determine for which subexpressions we can avoid vectorisation. +-- +-- * The first argument is the set of free, local variables whose evaluation may entail parallelism. +-- +vectAvoidInfo :: VarSet -> CoreExprWithFVs -> VM CoreExprWithVectInfo +vectAvoidInfo pvs ce@(fvs, AnnVar v) + = do + { gpvs <- globalParallelVars + ; vi <- if v `elemVarSet` pvs || v `elemVarSet` gpvs + then return VIParr + else vectAvoidInfoTypeOf ce + ; viTrace ce vi [] -vectAvoidInfo (_, AnnCast expr _) - = do { vt@(VITNode vi _) <- vectAvoidInfo expr - ; return $ VITNode vi [vt] - } + ; vit <- vectAvoidInfoTypeOf ce -- TEMPORARY + ; traceVt (" AnnVar: vectAvoidInfoTypeOf: " ++ show vit) empty -vectAvoidInfo (_, AnnTick _ expr) - = do { vt@(VITNode vi _) <- vectAvoidInfo expr - ; return $ VITNode vi [vt] - } + ; return ((fvs, vi), AnnVar v) + } -vectAvoidInfo (_, AnnType {}) - = return $ VITNode VISimple [] +vectAvoidInfo _pvs ce@(fvs, AnnLit lit) + = do + { vi <- vectAvoidInfoTypeOf ce + ; viTrace ce vi [] + ; return ((fvs, vi), AnnLit lit) + } -vectAvoidInfo (_, AnnCoercion {}) - = return $ VITNode VISimple [] +vectAvoidInfo pvs ce@(fvs, AnnApp e1 e2) + = do + { ceVI <- vectAvoidInfoTypeOf ce + ; eVI1 <- vectAvoidInfo pvs e1 + ; eVI2 <- vectAvoidInfo pvs e2 + ; let vi = ceVI `unlessVIParrExpr` eVI1 `unlessVIParrExpr` eVI2 + ; viTrace ce vi [eVI1, eVI2] + ; return ((fvs, vi), AnnApp eVI1 eVI2) + } + +vectAvoidInfo pvs ce@(fvs, AnnLam var body) + = do + { bodyVI <- vectAvoidInfo pvs body + ; varVI <- vectAvoidInfoType $ varType var + ; let vi = vectAvoidInfoOf bodyVI `unlessVIParr` varVI + ; viTrace ce vi [bodyVI] + ; return ((fvs, vi), AnnLam var bodyVI) + } + +vectAvoidInfo pvs ce@(fvs, AnnLet (AnnNonRec var e) body) + = do + { ceVI <- vectAvoidInfoTypeOf ce + ; eVI <- vectAvoidInfo pvs e + ; isScalarTy <- isScalar $ varType var + ; (bodyVI, vi) <- if isVIParr eVI && not isScalarTy + then do -- binding is parallel + { bodyVI <- vectAvoidInfo (fvs `extendVarSet` var) body + ; return (bodyVI, VIParr) + } + else do -- binding doesn't affect parallelism + { bodyVI <- vectAvoidInfo fvs body + ; return (bodyVI, ceVI `unlessVIParrExpr` bodyVI) + } + ; viTrace ce vi [eVI, bodyVI] + ; return ((fvs, vi), AnnLet (AnnNonRec var eVI) bodyVI) + } + +vectAvoidInfo pvs ce@(fvs, AnnLet (AnnRec bnds) body) + = do + { ceVI <- vectAvoidInfoTypeOf ce + ; bndsVI <- mapM (vectAvoidInfoBnd pvs) bnds + ; parrBndrs <- map fst <$> filterM isVIParrBnd bndsVI + ; if not . null $ parrBndrs + then do -- body may trigger parallelism via at least one binding + { new_pvs <- filterM ((not <$>) . isScalar . varType) parrBndrs + ; let extendedPvs = pvs `extendVarSetList` new_pvs + ; bndsVI <- mapM (vectAvoidInfoBnd extendedPvs) bnds + ; bodyVI <- vectAvoidInfo extendedPvs body + ; viTrace ce VIParr (map snd bndsVI ++ [bodyVI]) + ; return ((fvs, VIParr), AnnLet (AnnRec bndsVI) bodyVI) + } + else do -- demanded bindings cannot trigger parallelism + { bodyVI <- vectAvoidInfo pvs body + ; let vi = ceVI `unlessVIParrExpr` bodyVI + ; viTrace ce vi (map snd bndsVI ++ [bodyVI]) + ; return ((fvs, vi), AnnLet (AnnRec bndsVI) bodyVI) + } + } + where + vectAvoidInfoBnd pvs (var, e) = (var,) <$> vectAvoidInfo pvs e + + isVIParrBnd (var, eVI) + = do + { isScalarTy <- isScalar (varType var) + ; return $ isVIParr eVI && not isScalarTy + } + +vectAvoidInfo pvs ce@(fvs, AnnCase e var ty alts) + = do + { ceVI <- vectAvoidInfoTypeOf ce + ; eVI <- vectAvoidInfo pvs e + ; isScalarTy <- isScalar . annExprType $ e + ; altsVI <- mapM (vectAvoidInfoAlt (isVIParr eVI && not isScalarTy)) alts + ; allScalarBndrs <- anyM allScalarAltBndrs altsVI + ; let alteVIs = [eVI | (_, _, eVI) <- altsVI] + vi | isVIParr eVI && not allScalarBndrs = VIParr + | otherwise + = foldl unlessVIParrExpr ceVI alteVIs + ; viTrace ce vi (eVI : alteVIs) + ; return ((fvs, vi), AnnCase eVI var ty altsVI) + } + where + vectAvoidInfoAlt isScalarScrut (con, bndrs, e) = (con, bndrs,) <$> vectAvoidInfo altPvs e + where + altPvs | isScalarScrut = pvs + | otherwise = pvs `extendVarSetList` bndrs + + allScalarAltBndrs (_, bndrs, _) = allScalarVarType bndrs + +vectAvoidInfo pvs (fvs, AnnCast e (fvs_ann, ann)) + = do + { eVI <- vectAvoidInfo pvs e + ; return ((fvs, vectAvoidInfoOf eVI), AnnCast eVI ((fvs_ann, VISimple), ann)) + } + +vectAvoidInfo pvs (fvs, AnnTick tick e) + = do + { eVI <- vectAvoidInfo pvs e + ; return ((fvs, vectAvoidInfoOf eVI), AnnTick tick eVI) + } + +vectAvoidInfo _pvs (fvs, AnnType ty) + = return ((fvs, VISimple), AnnType ty) + +vectAvoidInfo _pvs (fvs, AnnCoercion coe) + = return ((fvs, VISimple), AnnCoercion coe) -- Compute vectorisation avoidance information for a type. -- vectAvoidInfoType :: Type -> VM VectAvoidInfo -vectAvoidInfoType ty - | maybeParrTy ty = return VIParr - | otherwise - = do { sType <- isSimpleType ty - ; if sType - then return VISimple - else return VIComplex - } +vectAvoidInfoType ty + | isPredTy ty + = return VIDict + | Just (arg, res) <- splitFunTy_maybe ty + = do + { argVI <- vectAvoidInfoType arg + ; resVI <- vectAvoidInfoType res + ; case (argVI, resVI) of + (VISimple, VISimple) -> return VISimple -- NB: diverts from the paper: scalar functions + (_ , VIDict) -> return VIDict + _ -> return $ VIComplex `unlessVIParr` argVI `unlessVIParr` resVI + } + | otherwise + = do + { parr <- maybeParrTy ty + ; if parr + then return VIParr + else do + { scalar <- isScalar ty + ; if scalar + then return VISimple + else return VIComplex + } } + +-- Compute vectorisation avoidance information for the type of a Core expression (with FVs). +-- +vectAvoidInfoTypeOf :: AnnExpr Var ann -> VM VectAvoidInfo +vectAvoidInfoTypeOf = vectAvoidInfoType . annExprType --- Checks whether the type might be a parallel array type. In particular, if the outermost --- constructor is a type family, we conservatively assume that it may be a parallel array type. +-- Checks whether the type might be a parallel array type. -- -maybeParrTy :: Type -> Bool +maybeParrTy :: Type -> VM Bool maybeParrTy ty - | Just ty' <- coreView ty = maybeParrTy ty' - | Just (tyCon, ts) <- splitTyConApp_maybe ty = isPArrTyCon tyCon || isSynFamilyTyCon tyCon - || or (map maybeParrTy ts) -maybeParrTy _ = False - --- FIXME: This should not be hardcoded. -isSimpleType :: Type -> VM Bool -isSimpleType ty - | Just (c, _cs) <- splitTyConApp_maybe ty - = return $ (tyConName c) `elem` [boolTyConName, intTyConName, word8TyConName, doubleTyConName, floatTyConName] -{- - = do { globals <- globalScalarTyCons - ; traceVt ("isSimpleType " ++ (show (elemNameSet (tyConName c) globals ))) (ppr c) - ; return (elemNameSet (tyConName c) globals ) - } - -} - | Nothing <- splitTyConApp_maybe ty - = return False -isSimpleType ty - = pprPanic "Vectorise.Exp.isSimpleType not handled" (ppr ty) - -varsSimple :: VarSet -> VM Bool -varsSimple vs - = do { varTypes <- mapM isSimpleType $ map varType $ varSetElems vs - ; return $ and varTypes - } - -viTrace :: CoreExprWithFVs -> VectAvoidInfo -> [VITree] -> VM () -viTrace ce vi vTs - = traceVt ("vitrace " ++ (show vi) ++ "[" ++ (concat $ map (\(VITNode vi _) -> show vi ++ " ") vTs) ++"]") - (ppr $ deAnnotate ce) - + -- looking through newtypes + | Just ty' <- coreView ty + = (== VIParr) <$> vectAvoidInfoType ty' + -- decompose constructor applications + | Just (tc, ts) <- splitTyConApp_maybe ty + = do + { isParallel <- (tyConName tc `elemNameSet`) <$> globalParallelTyCons + ; if isParallel + then return True + else or <$> mapM maybeParrTy ts + } +maybeParrTy (ForAllTy _ ty) = maybeParrTy ty +maybeParrTy _ = return False -{- ----- Sanity check of the tree, for debugging only -checkTree :: VITree -> CoreExpr -> Bool -checkTree (VITNode _ []) (Type _ty) - = True - -checkTree (VITNode _ []) (Var _v) - = True - -checkTree (VITNode _ []) (Lit _) - = True - -checkTree (VITNode _ [vit]) (Tick _ expr) - = checkTree vit expr - -checkTree (VITNode _ [vit]) (Lam _ expr) - = checkTree vit expr - -checkTree (VITNode _ [vit1, vit2]) (App ce1 ce2) - = (checkTree vit1 ce1) && (checkTree vit2 ce2) - -checkTree (VITNode _ (scrutVit : altVits)) (Case scrut _ _ alts) - = (checkTree scrutVit scrut) && (and $ zipWith checkAlt altVits alts) - where - checkAlt vt (_, _, expr) = checkTree vt expr - -checkTree (VITNode _ [vt1, vt2]) (Let (NonRec _ expr1) expr2) - = (checkTree vt1 expr1) && (checkTree vt2 expr2) - -checkTree (VITNode _ (vtB : vtBnds)) (Let (Rec bndngs) expr) - = (and $ zipWith checkBndr vtBnds bndngs) && - (checkTree vtB expr) - where - checkBndr vt (_, e) = checkTree vt e - -checkTree (VITNode _ [vit]) (Cast expr _) - = checkTree vit expr +-- Are the types of all variables in the 'Scalar' class? +-- +allScalarVarType :: [Var] -> VM Bool +allScalarVarType vs = and <$> mapM (isScalar . varType) vs -checkTree _ _ = False +-- Are the types of all variables in the set in the 'Scalar' class? +-- +allScalarVarTypeSet :: VarSet -> VM Bool +allScalarVarTypeSet = allScalarVarType . varSetElems -checkTreeAnnM:: VITree -> CoreExprWithFVs -> VM () -checkTreeAnnM vi e = - if not (checkTree vi $ deAnnotate e) - then error ("checkTreeAnnM : \n " ++ show vi) - else return () --} +-- Debugging support +-- +viTrace :: CoreExprWithFVs -> VectAvoidInfo -> [CoreExprWithVectInfo] -> VM () +viTrace ce vi vTs + = traceVt ("vect info: " ++ show vi ++ "[" ++ + (concat $ map ((++ " ") . show . vectAvoidInfoOf) vTs) ++ "]") + (ppr $ deAnnotate ce) diff --git a/compiler/vectorise/Vectorise/Monad.hs b/compiler/vectorise/Vectorise/Monad.hs index 375b0af85e..6b5e9cc354 100644 --- a/compiler/vectorise/Vectorise/Monad.hs +++ b/compiler/vectorise/Vectorise/Monad.hs @@ -14,8 +14,8 @@ module Vectorise.Monad ( -- * Variables lookupVar, lookupVar_maybe, - addGlobalScalarVar, - addGlobalScalarTyCon, + addGlobalParallelVar, + addGlobalParallelTyCon, ) where import Vectorise.Monad.Base @@ -172,22 +172,22 @@ dumpVar dflags var = cantVectorise dflags "Variable not vectorised:" (ppr var) --- Global scalars -------------------------------------------------------------- +-- Global parallel entities ---------------------------------------------------- --- |Mark the given variable as scalar — i.e., executing the associated code does not involve any +-- |Mark the given variable as parallel — i.e., executing the associated code might involve -- parallel array computations. -- -addGlobalScalarVar :: Var -> VM () -addGlobalScalarVar var - = do { traceVt "addGlobalScalarVar" (ppr var) - ; updGEnv $ \env -> env{global_scalar_vars = extendVarSet (global_scalar_vars env) var} +addGlobalParallelVar :: Var -> VM () +addGlobalParallelVar var + = do { traceVt "addGlobalParallelVar" (ppr var) + ; updGEnv $ \env -> env{global_parallel_vars = extendVarSet (global_parallel_vars env) var} } --- |Mark the given type constructor as scalar — i.e., its values cannot embed parallel arrays. +-- |Mark the given type constructor as parallel — i.e., its values might embed parallel arrays. -- -addGlobalScalarTyCon :: TyCon -> VM () -addGlobalScalarTyCon tycon - = do { traceVt "addGlobalScalarTyCon" (ppr tycon) +addGlobalParallelTyCon :: TyCon -> VM () +addGlobalParallelTyCon tycon + = do { traceVt "addGlobalParallelTyCon" (ppr tycon) ; updGEnv $ \env -> - env{global_scalar_tycons = addOneToNameSet (global_scalar_tycons env) (tyConName tycon)} + env{global_parallel_tycons = addOneToNameSet (global_parallel_tycons env) (tyConName tycon)} } diff --git a/compiler/vectorise/Vectorise/Monad/Global.hs b/compiler/vectorise/Vectorise/Monad/Global.hs index a5c8449fc2..0fe460ad73 100644 --- a/compiler/vectorise/Vectorise/Monad/Global.hs +++ b/compiler/vectorise/Vectorise/Monad/Global.hs @@ -6,13 +6,13 @@ module Vectorise.Monad.Global ( updGEnv, -- * Vars - defGlobalVar, + defGlobalVar, undefGlobalVar, -- * Vectorisation declarations - lookupVectDecl, noVectDecl, + lookupVectDecl, -- * Scalars - globalScalarVars, isGlobalScalarVar, globalScalarTyCons, + globalParallelVars, globalParallelTyCons, -- * TyCons lookupTyCon, @@ -93,48 +93,54 @@ defGlobalVar v v' | otherwise = ptext (sLit "in the current module") +-- |Remove the mapping of a variable in the vectorisation map. +-- +undefGlobalVar :: Var -> VM () +undefGlobalVar v + = do + { traceVt "REMOVING global var mapping:" (ppr v) + ; updGEnv $ \env -> env { global_vars = delVarEnv (global_vars env) v } + } + -- Vectorisation declarations ------------------------------------------------- --- |Check whether a variable has a (non-scalar) vectorisation declaration. +-- |Check whether a variable has a vectorisation declaration. -- -lookupVectDecl :: Var -> VM (Maybe (Type, CoreExpr)) -lookupVectDecl var = readGEnv $ \env -> lookupVarEnv (global_vect_decls env) var - --- |Check whether a variable has a 'NOVECTORISE' declaration. +-- The first component of the result indicates whether the variable has a 'NOVECTORISE' declaration. +-- The second component contains the given type and expression in case of a 'VECTORISE' declaration. -- -noVectDecl :: Var -> VM Bool -noVectDecl var = readGEnv $ \env -> elemVarSet var (global_novect_vars env) +lookupVectDecl :: Var -> VM (Bool, Maybe (Type, CoreExpr)) +lookupVectDecl var + = readGEnv $ \env -> + case lookupVarEnv (global_vect_decls env) var of + Nothing -> (False, Nothing) + Just Nothing -> (True, Nothing) + Just vectDecl -> (False, vectDecl) --- Scalars -------------------------------------------------------------------- +-- Parallel entities ----------------------------------------------------------- --- |Get the set of global scalar variables. +-- |Get the set of global parallel variables. -- -globalScalarVars :: VM VarSet -globalScalarVars = readGEnv global_scalar_vars +globalParallelVars :: VM VarSet +globalParallelVars = readGEnv global_parallel_vars --- |Check whether a given variable is in the set of global scalar variables. +-- |Get the set of all parallel type constructors (those that may embed parallelism) including both +-- both those parallel type constructors declared in an imported module and those declared in the +-- current module. -- -isGlobalScalarVar :: Var -> VM Bool -isGlobalScalarVar var = readGEnv $ \env -> var `elemVarSet` global_scalar_vars env - --- |Get the set of global scalar type constructors including both those scalar type constructors --- declared in an imported module and those declared in the current module. --- -globalScalarTyCons :: VM NameSet -globalScalarTyCons = readGEnv global_scalar_tycons +globalParallelTyCons :: VM NameSet +globalParallelTyCons = readGEnv global_parallel_tycons -- TyCons --------------------------------------------------------------------- --- |Lookup the vectorised version of a `TyCon` from the global environment. +-- |Determine the vectorised version of a `TyCon`. The vectorisation map in the global environment +-- contains a vectorised version if the original `TyCon` embeds any parallel arrays. -- lookupTyCon :: TyCon -> VM (Maybe TyCon) lookupTyCon tc - | isUnLiftedTyCon tc || isTupleTyCon tc - = return (Just tc) - | otherwise = readGEnv $ \env -> lookupNameEnv (global_tycons env) (tyConName tc) -- |Add a mapping between plain and vectorised `TyCon`s to the global environment. diff --git a/compiler/vectorise/Vectorise/Monad/InstEnv.hs b/compiler/vectorise/Vectorise/Monad/InstEnv.hs index fc12ee567c..95546bf503 100644 --- a/compiler/vectorise/Vectorise/Monad/InstEnv.hs +++ b/compiler/vectorise/Vectorise/Monad/InstEnv.hs @@ -1,5 +1,6 @@ module Vectorise.Monad.InstEnv - ( lookupInst + ( existsInst + , lookupInst , lookupFamInst ) where @@ -21,6 +22,14 @@ import Util #include "HsVersions.h" +-- Check whether a unique class instance for a given class and type arguments exists. +-- +existsInst :: Class -> [Type] -> VM Bool +existsInst cls tys + = do { instEnv <- readGEnv global_inst_env + ; return $ either (const False) (const True) (lookupUniqueInstEnv instEnv cls tys) + } + -- Look up the dfun of a class instance. -- -- The match must be unique —i.e., match exactly one instance— but the @@ -64,6 +73,6 @@ lookupFamInst tycon tys [(fam_inst, rep_tys)] -> return ( fam_inst, rep_tys) _other -> do dflags <- getDynFlags - cantVectorise dflags "VectMonad.lookupFamInst: not found: " + cantVectorise dflags "Vectorise.Monad.InstEnv.lookupFamInst: not found: " (ppr $ mkTyConApp tycon tys) } diff --git a/compiler/vectorise/Vectorise/Monad/Local.hs b/compiler/vectorise/Vectorise/Monad/Local.hs index 8b3c1dcf19..5415c5691d 100644 --- a/compiler/vectorise/Vectorise/Monad/Local.hs +++ b/compiler/vectorise/Vectorise/Monad/Local.hs @@ -44,20 +44,24 @@ updLEnv f = VM $ \_ genv lenv -> return (Yes genv (f lenv) ()) -- localV :: VM a -> VM a localV p - = do env <- readLEnv id - x <- p - setLEnv env - return x + = do + { env <- readLEnv id + ; x <- p + ; setLEnv env + ; return x + } -- |Perform a computation in an empty local environment. -- closedV :: VM a -> VM a closedV p - = do env <- readLEnv id - setLEnv (emptyLocalEnv { local_bind_name = local_bind_name env }) - x <- p - setLEnv env - return x + = do + { env <- readLEnv id + ; setLEnv (emptyLocalEnv { local_bind_name = local_bind_name env }) + ; x <- p + ; setLEnv env + ; return x + } -- |Get the name of the local binding currently being vectorised. -- diff --git a/compiler/vectorise/Vectorise/Type/Classify.hs b/compiler/vectorise/Vectorise/Type/Classify.hs index 0cab706cf4..e1cd43ac3c 100644 --- a/compiler/vectorise/Vectorise/Type/Classify.hs +++ b/compiler/vectorise/Vectorise/Type/Classify.hs @@ -13,10 +13,12 @@ -- types. As '([::])' is being vectorised, any type constructor whose definition involves -- '([::])', either directly or indirectly, will be vectorised. -module Vectorise.Type.Classify ( - classifyTyCons -) where +module Vectorise.Type.Classify + ( classifyTyCons + ) +where +import NameSet import UniqSet import UniqFM import DataCon @@ -29,7 +31,7 @@ import Digraph -- |From a list of type constructors, extract those that can be vectorised, returning them in two -- sets, where the first result list /must be/ vectorised and the second result list /need not be/ --- vectorised. The third result list are those type constructors that we cannot convert (either +-- vectorised. The third result list are those type constructors that we cannot convert (either -- because they use language extensions or because they dependent on type constructors for which -- no vectorised version is available). @@ -37,28 +39,40 @@ import Digraph -- -- * tycons which have converted versions are mapped to 'True' -- * tycons which are not changed by vectorisation are mapped to 'False' --- * tycons which can't be converted are not elements of the map +-- * tycons which haven't been converted (because they can't or weren't vectorised) are not +-- elements of the map -- -classifyTyCons :: UniqFM Bool -- ^type constructor conversion status - -> [TyCon] -- ^type constructors that need to be classified - -> ([TyCon], [TyCon], [TyCon]) -- ^tycons to be converted & not to be converted -classifyTyCons convStatus tcs = classify [] [] [] convStatus (tyConGroups tcs) +classifyTyCons :: UniqFM Bool -- ^type constructor vectorisation status + -> NameSet -- ^tycons involving parallel arrays + -> [TyCon] -- ^type constructors that need to be classified + -> ( [TyCon] -- to be converted + , [TyCon] -- need not be converted (but could be) + , [TyCon] -- can't be converted, but involve parallel arrays + , [TyCon] -- can't be converted and have no parallel arrays + ) +classifyTyCons convStatus parTyCons tcs = classify [] [] [] [] convStatus parTyCons (tyConGroups tcs) where - classify conv keep ignored _ [] = (conv, keep, ignored) - classify conv keep ignored cs ((tcs, ds) : rs) + classify conv keep par novect _ _ [] = (conv, keep, par, novect) + classify conv keep par novect cs pts ((tcs, ds) : rs) | can_convert && must_convert - = classify (tcs ++ conv) keep ignored (cs `addListToUFM` [(tc, True) | tc <- tcs]) rs + = classify (tcs ++ conv) keep par novect (cs `addListToUFM` [(tc, True) | tc <- tcs]) pts' rs | can_convert - = classify conv (tcs ++ keep) ignored (cs `addListToUFM` [(tc, False) | tc <- tcs]) rs + = classify conv (tcs ++ keep) par novect (cs `addListToUFM` [(tc, False) | tc <- tcs]) pts' rs + | has_parr + = classify conv keep (tcs ++ par) novect cs pts' rs | otherwise - = classify conv keep (tcs ++ ignored) cs rs + = classify conv keep par (tcs ++ novect) cs pts' rs where refs = ds `delListFromUniqSet` tcs + + pts' | has_parr = pts `addListToNameSet` map tyConName tcs + | otherwise = pts can_convert = (isNullUFM (refs `minusUFM` cs) && all convertable tcs) || isShowClass tcs must_convert = foldUFM (||) False (intersectUFM_C const cs refs) && (not . isShowClass $ tcs) + has_parr = any ((`elemNameSet` parTyCons) . tyConName) . eltsUFM $ refs -- We currently admit Haskell 2011-style data and newtype declarations as well as type -- constructors representing classes. diff --git a/compiler/vectorise/Vectorise/Type/Env.hs b/compiler/vectorise/Vectorise/Type/Env.hs index 0051d072a4..faa80a8629 100644 --- a/compiler/vectorise/Vectorise/Type/Env.hs +++ b/compiler/vectorise/Vectorise/Type/Env.hs @@ -32,7 +32,9 @@ import Id import MkId import NameEnv import NameSet +import UniqFM import OccName +import Unique import Util import Outputable @@ -47,69 +49,85 @@ import Data.List -- Note [Pragmas to vectorise tycons] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- --- VECTORISE pragmas for type constructors cover three different flavours of vectorising data type +-- All imported type constructors that are not mapped to a vectorised type in the vectorisation map +-- (possibly because the defining module was not compiled with vectorisation) may be used in scalar +-- code encapsulated in vectorised code. If a such a type constructor 'T' is a member of the +-- 'Scalar' class (and hence also of 'PData' and 'PRepr'), it may also be used in vectorised code, +-- where 'T' represents itself, but the representation of 'T' still remains opaque in vectorised +-- code (i.e., it can only be used in scalar code). +-- +-- An example is the treatment of 'Int'. 'Int's can be used in vectorised code and remain unchanged +-- by vectorisation. However, the representation of 'Int' by the 'I#' data constructor wrapping an +-- 'Int#' is not exposed in vectorised code. Instead, computations involving the representation need +-- to be confined to scalar code. +-- +-- VECTORISE pragmas for type constructors cover four different flavours of vectorising data type -- constructors: -- --- (1) Data type constructor 'T' that may be used in vectorised code, where 'T' represents itself, --- but the representation of 'T' is opaque in vectorised code. +-- (1) Data type constructor 'T' that together with its constructors 'Cn' may be used in vectorised +-- code, where 'T' and the 'Cn' are automatically vectorised in the same manner as data types +-- declared in a vectorised module. This includes the case where the vectoriser determines that +-- the original representation of 'T' may be used in vectorised code (as it does not embed any +-- parallel arrays.) This case is for type constructors that are *imported* from a non- +-- vectorised module, but that we want to use with full vectorisation support. -- --- An example is the treatment of 'Int'. 'Int's can be used in vectorised code and remain --- unchanged by vectorisation. However, the representation of 'Int' by the 'I#' data --- constructor wrapping an 'Int#' is not exposed in vectorised code. Instead, computations --- involving the representation need to be confined to scalar code. +-- An example is the treatment of 'Ordering' and '[]'. The former remains unchanged by +-- vectorisation, whereas the latter is fully vectorised. -- --- 'PData' and 'PRepr' instances need to be explicitly supplied for 'T' (they are not generated --- by the vectoriser). +-- 'PData' and 'PRepr' instances are automatically generated by the vectoriser. -- --- Type constructors declared with {-# VECTORISE SCALAR type T #-} are treated in this manner. --- (The vectoriser never treats a type constructor automatically in this manner.) +-- Type constructors declared with {-# VECTORISE type T #-} are treated in this manner. -- -- (2) Data type constructor 'T' that may be used in vectorised code, where 'T' is represented by an --- explicitly given 'Tv', but the representation of 'T' is opaque in vectorised code. +-- explicitly given 'Tv', but the representation of 'T' is opaque in vectorised code (i.e., the +-- constructors of 'T' may not occur in vectorised code). -- --- An example is the treatment of '[::]'. '[::]'s can be used in vectorised code and is --- vectorised to 'PArray'. However, the representation of '[::]' is not exposed in vectorised --- code. Instead, computations involving the representation need to be confined to scalar code. +-- An example is the treatment of '[::]'. The type '[::]' can be used in vectorised code and is +-- vectorised to 'PArray'. However, the representation of '[::]' is not exposed in vectorised +-- code. Instead, computations involving the representation need to be confined to scalar code. -- -- 'PData' and 'PRepr' instances need to be explicitly supplied for 'T' (they are not generated -- by the vectoriser). -- --- Type constructors declared with {-# VECTORISE SCALAR type T = T' #-} are treated in this +-- Type constructors declared with {-# VECTORISE type T = Tv #-} are treated in this manner -- manner. (The vectoriser never treats a type constructor automatically in this manner.) -- --- (3) Data type constructor 'T' that together with its constructors 'Cn' may be used in vectorised --- code, where 'T' and the 'Cn' are automatically vectorised in the same manner as data types --- declared in a vectorised module. This includes the case where the vectoriser determines that --- the original representation of 'T' may be used in vectorised code (as it does not embed any --- parallel arrays.) This case is for type constructors that are *imported* from a non- --- vectorised module, but that we want to use with full vectorisation support. +-- (3) Data type constructor 'T' that does not contain any parallel arrays and has explicitly +-- provided 'PData' and 'PRepr' instances (and maybe also a 'Scalar' instance), which together +-- with the type's constructors 'Cn' may be used in vectorised code. The type 'T' and its +-- constructors 'Cn' are represented by themselves in vectorised code. -- --- An example is the treatment of 'Ordering' and '[]'. The former remains unchanged by --- vectorisation, whereas the latter is fully vectorised. - --- 'PData' and 'PRepr' instances are automatically generated by the vectoriser. +-- An example is 'Bool', which is represented by itself in vectorised code (as it cannot embed +-- any parallel arrays). However, we do not want any automatic generation of class and family +-- instances, which is why Case (1) does not apply. -- --- Type constructors declared with {-# VECTORISE type T #-} are treated in this manner. +-- 'PData' and 'PRepr' instances need to be explicitly supplied for 'T' (they are not generated +-- by the vectoriser). +-- +-- Type constructors declared with {-# VECTORISE SCALAR type T #-} are treated in this manner. -- --- (4) Data type constructor 'T' that together with its constructors 'Cn' may be used in vectorised --- code, where 'T' is represented by an explicitly given 'Tv' whose constructors 'Cvn' represent --- the original constructors in vectorised code. As a special case, we can have 'Tv = T' +-- (4) Data type constructor 'T' that does not contain any parallel arrays and that, in vectorised +-- code, is represented by an explicitly given 'Tv', but the representation of 'T' is opaque in +-- vectorised code and 'T' is regarded to be scalar — i.e., it may be used in encapsulated +-- scalar subcomputations. -- --- An example is the treatment of 'Bool', which is represented by itself in vectorised code --- (as it cannot embed any parallel arrays). However, we do not want any automatic generation --- of class and family instances, which is why Case (3) does not apply. +-- An example is the treatment of '(->)'. Types '(->)' can be used in vectorised code and are +-- vectorised to '(:->)'. However, the representation of '(->)' is not exposed in vectorised +-- code. Instead, computations involving the representation need to be confined to scalar code +-- and may be part of encapsulated scalar computations. -- -- 'PData' and 'PRepr' instances need to be explicitly supplied for 'T' (they are not generated -- by the vectoriser). -- --- Type constructors declared with {-# VECTORISE type T = T' #-} are treated in this manner. +-- Type constructors declared with {-# VECTORISE SCALAR type T = Tv #-} are treated in this +-- manner. (The vectoriser never treats a type constructor automatically in this manner.) -- -- In addition, we have also got a single pragma form for type classes: {-# VECTORISE class C #-}. -- It implies that the class type constructor may be used in vectorised code together with its data -- constructor. We generally produce a vectorised version of the data type and data constructor. -- We do not generate 'PData' and 'PRepr' instances for class type constructors. This pragma is the --- default for all type classes declared in this module, but the pragma can also be used explitly on --- imported classes. +-- default for all type classes declared in a vectorised module, but the pragma can also be used +-- explitly on imported classes. -- Note [Vectorising classes] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -147,38 +165,36 @@ vectTypeEnv :: [TyCon] -- Type constructors defined in this mod vectTypeEnv tycons vectTypeDecls vectClassDecls = do { traceVt "** vectTypeEnv" $ ppr tycons - ; let -- {-# VECTORISE SCALAR type T -#} (imported and local tycons) - localAbstractTyCons = [tycon | VectType True tycon Nothing <- vectTypeDecls] - - -- {-# VECTORISE type T -#} (ONLY the imported tycons) + ; let -- {-# VECTORISE type T -#} (ONLY the imported tycons) impVectTyCons = ( [tycon | VectType False tycon Nothing <- vectTypeDecls] ++ [tycon | VectClass tycon <- vectClassDecls]) \\ tycons + + -- {-# VECTORISE [SCALAR] type T = Tv -#} (imported & local tycons with an /RHS/) + vectTyConsWithRHS = [ (tycon, rhs, isScalar) + | VectType isScalar tycon (Just rhs) <- vectTypeDecls] - -- {-# VECTORISE [SCALAR] type T = T' -#} (imported and local tycons) - vectTyConsWithRHS = [ (tycon, rhs, isAbstract) - | VectType isAbstract tycon (Just rhs) <- vectTypeDecls] + -- {-# VECTORISE SCALAR type T -#} (imported & local /scalar/ tycons without an RHS) + scalarTyConsNoRHS = [tycon | VectType True tycon Nothing <- vectTypeDecls] - -- filter VECTORISE SCALAR tycons and VECTORISE tycons with explicit rhses - vectSpecialTyConNames = mkNameSet . map tyConName $ - localAbstractTyCons ++ map fst3 vectTyConsWithRHS + -- Check that is not a VECTORISE SCALAR tycon nor VECTORISE tycons with explicit rhs? + vectSpecialTyConNames = mkNameSet . map tyConName $ + scalarTyConsNoRHS ++ map fst3 vectTyConsWithRHS notVectSpecialTyCon tc = not $ (tyConName tc) `elemNameSet` vectSpecialTyConNames - -- Build a map containing all vectorised type constructor. If they are scalar, they are - -- mapped to 'False' (vectorised type constructor == original type constructor). - ; allScalarTyConNames <- globalScalarTyCons -- covers both current and imported modules + -- Build a map containing all vectorised type constructor. If the vectorised type + -- constructor differs from the original one, then it is mapped to 'True'; if they are + -- both the same, then it maps to 'False'. ; vectTyCons <- globalVectTyCons - ; let vectTyConBase = mapNameEnv (const True) vectTyCons -- by default fully vectorised + ; let vectTyConBase = mapUFM_Directly isDistinct vectTyCons -- 'True' iff tc /= V[[tc]] + isDistinct u tc = u /= getUnique tc vectTyConFlavour = vectTyConBase `plusNameEnv` mkNameEnv [ (tyConName tycon, True) | (tycon, _, _) <- vectTyConsWithRHS] `plusNameEnv` - mkNameEnv [ (tcName, False) -- original representation - | tcName <- nameSetToList allScalarTyConNames] - `plusNameEnv` mkNameEnv [ (tyConName tycon, False) -- original representation - | tycon <- localAbstractTyCons] + | tycon <- scalarTyConsNoRHS] -- Split the list of 'TyCons' into the ones (1) that we must vectorise and those (2) @@ -189,11 +205,15 @@ vectTypeEnv tycons vectTypeDecls vectClassDecls -- these are being handled separately. NB: Some type constructors may be marked SCALAR -- /and/ have an explicit right-hand side.) -- - -- Furthermore, 'drop_tcs' are those type constructors that we cannot vectorise. - ; let maybeVectoriseTyCons = filter notVectSpecialTyCon tycons ++ impVectTyCons - (conv_tcs, keep_tcs, drop_tcs) = classifyTyCons vectTyConFlavour maybeVectoriseTyCons + -- Furthermore, 'par_tcs' and 'drop_tcs' are those type constructors that we cannot + -- vectorise, and of those, only the 'par_tcs' involve parallel arrays. + ; parallelTyCons <- globalParallelTyCons + ; let maybeVectoriseTyCons = filter notVectSpecialTyCon tycons ++ impVectTyCons + (conv_tcs, keep_tcs, par_tcs, drop_tcs) + = classifyTyCons vectTyConFlavour parallelTyCons maybeVectoriseTyCons - ; traceVt " VECT SCALAR : " $ ppr localAbstractTyCons + ; traceVt " VECT SCALAR : " $ ppr (scalarTyConsNoRHS ++ + [tycon | (tycon, _, True) <- vectTyConsWithRHS]) ; traceVt " VECT [class] : " $ ppr impVectTyCons ; traceVt " VECT with rhs : " $ ppr (map fst3 vectTyConsWithRHS) ; traceVt " -- after classification (local and VECT [class] tycons) --" empty @@ -203,26 +223,22 @@ vectTypeEnv tycons vectTypeDecls vectClassDecls -- warn the user about unvectorised type constructors ; let explanation = ptext (sLit "(They use unsupported language extensions") $$ ptext (sLit "or depend on type constructors that are not vectorised)") - drop_tcs_nosyn = filter (not . isSynTyCon) drop_tcs + drop_tcs_nosyn = filter (not . isSynTyCon) (par_tcs ++ drop_tcs) ; unless (null drop_tcs_nosyn) $ emitVt "Warning: cannot vectorise these type constructors:" $ pprQuotedList drop_tcs_nosyn $$ explanation - ; mapM_ addGlobalScalarTyCon keep_tcs + ; mapM_ addParallelTyConAndCons $ conv_tcs ++ par_tcs ; let mapping = - -- Type constructors that we don't need to vectorise, use the same + -- Type constructors that we found we don't need to vectorise and those + -- declared VECTORISE SCALAR /without/ an explicit right-hand side, use the same -- representation in both unvectorised and vectorised code; they are not -- abstract. - [(tycon, tycon, False) | tycon <- keep_tcs] + [(tycon, tycon, False) | tycon <- keep_tcs ++ scalarTyConsNoRHS] -- We do the same for type constructors declared VECTORISE SCALAR /without/ - -- an explicit right-hand side, but ignore their representation (data - -- constructors) as they are abstract. - ++ [(tycon, tycon, True) | tycon <- localAbstractTyCons] - -- Type constructors declared VECTORISE /with/ an explicit vectorised type, - -- we map from the original to the given type; whether they are abstract depends - -- on whether the vectorisation declaration was SCALAR. - ++ vectTyConsWithRHS + -- an explicit right-hand side + ++ [(tycon, vTycon, True) | (tycon, vTycon, _) <- vectTyConsWithRHS] ; syn_tcs <- catMaybes <$> mapM defTyConDataCons mapping -- Vectorise all the data type declarations that we can and must vectorise (enter the @@ -263,17 +279,15 @@ vectTypeEnv tycons vectTypeDecls vectClassDecls do { defTyConPAs (zipLazy vect_tcs dfuns) -- Query the 'PData' instance type constructors for type constructors that have a - -- VECTORISE pragma with an explicit right-hand side (this is Item (4) of - -- "Note [Pragmas to vectorise tycons]" above). - ; let (withRHS_non_abstract, vwithRHS_non_abstract) - = unzip [(tycon, vtycon) | (tycon, vtycon, False) <- vectTyConsWithRHS] - ; pdata_withRHS_tcs <- mapM pdataReprTyConExact withRHS_non_abstract + -- VECTORISE SCALAR type pragma without an explicit right-hand side (this is Item + -- (3) of "Note [Pragmas to vectorise tycons]" above). + ; pdata_scalar_tcs <- mapM pdataReprTyConExact scalarTyConsNoRHS -- Build workers for all vectorised data constructors (except abstract ones) ; sequence_ $ - zipWith3 vectDataConWorkers (orig_tcs ++ withRHS_non_abstract) - (vect_tcs ++ vwithRHS_non_abstract) - (pdata_tcs ++ pdata_withRHS_tcs) + zipWith3 vectDataConWorkers (orig_tcs ++ scalarTyConsNoRHS) + (vect_tcs ++ scalarTyConsNoRHS) + (pdata_tcs ++ pdata_scalar_tcs) -- Build a 'PA' dictionary for all type constructors (except abstract ones & those -- defined with an explicit right-hand side where the dictionary is user-supplied) @@ -295,6 +309,12 @@ vectTypeEnv tycons vectTypeDecls vectClassDecls } where fst3 (a, _, _) = a + + addParallelTyConAndCons tycon + = do + { addGlobalParallelTyCon tycon + ; mapM_ addGlobalParallelVar . concatMap dataConImplicitIds . tyConDataCons $ tycon + } -- Add a mapping from the original to vectorised type constructor to the vectorisation map. -- Unless the type constructor is abstract, also mappings from the orignal's data constructors @@ -307,21 +327,22 @@ vectTypeEnv tycons vectTypeDecls vectClassDecls -- right type constructor when reading vectorisation information from interface files). -- defTyConDataCons (origTyCon, vectTyCon, isAbstract) - = do { canonName <- mkLocalisedName mkVectTyConOcc origName - ; if origName == vectName -- Case (1) - || vectName == canonName -- Case (2) - then do - { defTyCon origTyCon vectTyCon -- T --> vT - ; defDataCons -- Ci --> vCi - ; return Nothing - } - else do -- Case (3) - { let synTyCon = mkSyn canonName (mkTyConTy vectTyCon) -- type S = vT - ; defTyCon origTyCon synTyCon -- T --> S - ; defDataCons -- Ci --> vCi - ; return $ Just synTyCon - } - } + = do + { canonName <- mkLocalisedName mkVectTyConOcc origName + ; if origName == vectName -- Case (1) + || vectName == canonName -- Case (2) + then do + { defTyCon origTyCon vectTyCon -- T --> vT + ; defDataCons -- Ci --> vCi + ; return Nothing + } + else do -- Case (3) + { let synTyCon = mkSyn canonName (mkTyConTy vectTyCon) -- type S = vT + ; defTyCon origTyCon synTyCon -- T --> S + ; defDataCons -- Ci --> vCi + ; return $ Just synTyCon + } + } where origName = tyConName origTyCon vectName = tyConName vectTyCon @@ -343,9 +364,9 @@ buildTyConPADict vect_tc prepr_ax pdata_tc pdatas_tc = tyConRepr vect_tc >>= buildPADict vect_tc prepr_ax pdata_tc pdatas_tc -- Produce a custom-made worker for the data constructors of a vectorised data type. This includes --- all data constructors that may be used in vetcorised code — i.e., all data constructors of data --- types other than scalar ones. Also adds a mapping from the original to vectorised worker into --- the vectorisation map. +-- all data constructors that may be used in vectorised code — i.e., all data constructors of data +-- types with 'VECTORISE [SCALAR] type' pragmas with an explicit right-hand side. Also adds a mapping +-- from the original to vectorised worker into the vectorisation map. -- -- FIXME: It's not nice that we need create a special worker after the data constructors has -- already been constructed. Also, I don't think the worker is properly added to the data diff --git a/compiler/vectorise/Vectorise/Type/Type.hs b/compiler/vectorise/Vectorise/Type/Type.hs index a7ec86a296..ebb09e663c 100644 --- a/compiler/vectorise/Vectorise/Type/Type.hs +++ b/compiler/vectorise/Vectorise/Type/Type.hs @@ -14,21 +14,16 @@ import TcType import Type import TypeRep import TyCon -import Outputable import Control.Monad import Control.Applicative import Data.Maybe --- | Vectorise a type constructor. + +-- |Vectorise a type constructor. Unless there is a vectorised version (stripped of embedded +-- parallel arrays), the vectorised version is the same as the original. -- vectTyCon :: TyCon -> VM TyCon -vectTyCon tc - | isFunTyCon tc = builtin closureTyCon - | isBoxedTupleTyCon tc = return tc - | isUnLiftedTyCon tc = return tc - | otherwise - = maybeCantVectoriseM "Tycon not vectorised: " (ppr tc) - $ lookupTyCon tc +vectTyCon tc = maybe tc id <$> lookupTyCon tc -- |Produce the vectorised and lifted versions of a type. -- diff --git a/compiler/vectorise/Vectorise/Utils.hs b/compiler/vectorise/Vectorise/Utils.hs index c5f1cb7cb1..fafce7a67d 100644 --- a/compiler/vectorise/Vectorise/Utils.hs +++ b/compiler/vectorise/Vectorise/Utils.hs @@ -17,7 +17,7 @@ module Vectorise.Utils ( combinePD, liftPD, -- * Scalars - zipScalars, scalarClosure, + isScalar, zipScalars, scalarClosure, -- * Naming newLocalVar @@ -137,20 +137,29 @@ liftPD x -- Scalars -------------------------------------------------------------------- +isScalar :: Type -> VM Bool +isScalar ty + = do + { scalar <- builtin scalarClass + ; existsInst scalar [ty] + } + zipScalars :: [Type] -> Type -> VM CoreExpr zipScalars arg_tys res_ty - = do - scalar <- builtin scalarClass - (dfuns, _) <- mapAndUnzipM (\ty -> lookupInst scalar [ty]) ty_args - zipf <- builtin (scalarZip $ length arg_tys) - return $ Var zipf `mkTyApps` ty_args `mkApps` map Var dfuns + = do + { scalar <- builtin scalarClass + ; (dfuns, _) <- mapAndUnzipM (\ty -> lookupInst scalar [ty]) ty_args + ; zipf <- builtin (scalarZip $ length arg_tys) + ; return $ Var zipf `mkTyApps` ty_args `mkApps` map Var dfuns + } where ty_args = arg_tys ++ [res_ty] scalarClosure :: [Type] -> Type -> CoreExpr -> CoreExpr -> VM CoreExpr scalarClosure arg_tys res_ty scalar_fun array_fun = do - ctr <- builtin (closureCtrFun $ length arg_tys) - pas <- mapM paDictOfType (init arg_tys) - return $ Var ctr `mkTyApps` (arg_tys ++ [res_ty]) + { ctr <- builtin (closureCtrFun $ length arg_tys) + ; pas <- mapM paDictOfType (init arg_tys) + ; return $ Var ctr `mkTyApps` (arg_tys ++ [res_ty]) `mkApps` (pas ++ [scalar_fun, array_fun]) + } -- cgit v1.2.1