diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2020-02-24 20:59:43 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-02-26 15:10:58 -0500 |
commit | 817f93eac4d13f680e8e3e7a25eb403b1864f82e (patch) | |
tree | f7014721e49627f15d76f44a5bf663043e35fafc /compiler/GHC/Core.hs | |
parent | b2b49a0aad353201678970c76d8305a5dcb1bfab (diff) | |
download | haskell-817f93eac4d13f680e8e3e7a25eb403b1864f82e.tar.gz |
Modules: Core (#13009)
Update haddock submodule
Diffstat (limited to 'compiler/GHC/Core.hs')
-rw-r--r-- | compiler/GHC/Core.hs | 2346 |
1 files changed, 2346 insertions, 0 deletions
diff --git a/compiler/GHC/Core.hs b/compiler/GHC/Core.hs new file mode 100644 index 0000000000..59556fccc2 --- /dev/null +++ b/compiler/GHC/Core.hs @@ -0,0 +1,2346 @@ +{- +(c) The University of Glasgow 2006 +(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +-} + +{-# LANGUAGE CPP, DeriveDataTypeable, FlexibleContexts #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE BangPatterns #-} +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} +{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} + +-- | GHC.Core holds all the main data types for use by for the Glasgow Haskell Compiler midsection +module GHC.Core ( + -- * Main data types + Expr(..), Alt, Bind(..), AltCon(..), Arg, + Tickish(..), TickishScoping(..), TickishPlacement(..), + CoreProgram, CoreExpr, CoreAlt, CoreBind, CoreArg, CoreBndr, + TaggedExpr, TaggedAlt, TaggedBind, TaggedArg, TaggedBndr(..), deTagExpr, + + -- * In/Out type synonyms + InId, InBind, InExpr, InAlt, InArg, InType, InKind, + InBndr, InVar, InCoercion, InTyVar, InCoVar, + OutId, OutBind, OutExpr, OutAlt, OutArg, OutType, OutKind, + OutBndr, OutVar, OutCoercion, OutTyVar, OutCoVar, MOutCoercion, + + -- ** 'Expr' construction + mkLet, mkLets, mkLetNonRec, mkLetRec, mkLams, + mkApps, mkTyApps, mkCoApps, mkVarApps, mkTyArg, + + mkIntLit, mkIntLitInt, + mkWordLit, mkWordLitWord, + mkWord64LitWord64, mkInt64LitInt64, + mkCharLit, mkStringLit, + mkFloatLit, mkFloatLitFloat, + mkDoubleLit, mkDoubleLitDouble, + + mkConApp, mkConApp2, mkTyBind, mkCoBind, + varToCoreExpr, varsToCoreExprs, + + isId, cmpAltCon, cmpAlt, ltAlt, + + -- ** Simple 'Expr' access functions and predicates + bindersOf, bindersOfBinds, rhssOfBind, rhssOfAlts, + collectBinders, collectTyBinders, collectTyAndValBinders, + collectNBinders, + collectArgs, stripNArgs, collectArgsTicks, flattenBinds, + + exprToType, exprToCoercion_maybe, + applyTypeToArg, + + isValArg, isTypeArg, isCoArg, isTyCoArg, valArgCount, valBndrCount, + isRuntimeArg, isRuntimeVar, + + -- * Tick-related functions + tickishCounts, tickishScoped, tickishScopesLike, tickishFloatable, + tickishCanSplit, mkNoCount, mkNoScope, + tickishIsCode, tickishPlace, + tickishContains, + + -- * Unfolding data types + Unfolding(..), UnfoldingGuidance(..), UnfoldingSource(..), + + -- ** Constructing 'Unfolding's + noUnfolding, bootUnfolding, evaldUnfolding, mkOtherCon, + unSaturatedOk, needSaturated, boringCxtOk, boringCxtNotOk, + + -- ** Predicates and deconstruction on 'Unfolding' + unfoldingTemplate, expandUnfolding_maybe, + maybeUnfoldingTemplate, otherCons, + isValueUnfolding, isEvaldUnfolding, isCheapUnfolding, + isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding, + isStableUnfolding, isFragileUnfolding, hasSomeUnfolding, + isBootUnfolding, + canUnfold, neverUnfoldGuidance, isStableSource, + + -- * Annotated expression data types + AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt, + + -- ** Operations on annotated expressions + collectAnnArgs, collectAnnArgsTicks, + + -- ** Operations on annotations + deAnnotate, deAnnotate', deAnnAlt, deAnnBind, + collectAnnBndrs, collectNAnnBndrs, + + -- * Orphanhood + IsOrphan(..), isOrphan, notOrphan, chooseOrphanAnchor, + + -- * Core rule data types + CoreRule(..), RuleBase, + RuleName, RuleFun, IdUnfoldingFun, InScopeEnv, + RuleEnv(..), mkRuleEnv, emptyRuleEnv, + + -- ** Operations on 'CoreRule's + ruleArity, ruleName, ruleIdName, ruleActivation, + setRuleIdName, ruleModule, + isBuiltinRule, isLocalRule, isAutoRule, + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import CostCentre +import VarEnv( InScopeSet ) +import Var +import Type +import Coercion +import Name +import NameSet +import NameEnv( NameEnv, emptyNameEnv ) +import Literal +import DataCon +import Module +import BasicTypes +import GHC.Driver.Session +import Outputable +import Util +import UniqSet +import SrcLoc ( RealSrcSpan, containsSpan ) +import Binary + +import Data.Data hiding (TyCon) +import Data.Int +import Data.Word + +infixl 4 `mkApps`, `mkTyApps`, `mkVarApps`, `App`, `mkCoApps` +-- Left associative, so that we can say (f `mkTyApps` xs `mkVarApps` ys) + +{- +************************************************************************ +* * +\subsection{The main data types} +* * +************************************************************************ + +These data types are the heart of the compiler +-} + +-- | This is the data type that represents GHCs core intermediate language. Currently +-- GHC uses System FC <https://www.microsoft.com/en-us/research/publication/system-f-with-type-equality-coercions/> for this purpose, +-- which is closely related to the simpler and better known System F <http://en.wikipedia.org/wiki/System_F>. +-- +-- We get from Haskell source to this Core language in a number of stages: +-- +-- 1. The source code is parsed into an abstract syntax tree, which is represented +-- by the data type 'GHC.Hs.Expr.HsExpr' with the names being 'RdrName.RdrNames' +-- +-- 2. This syntax tree is /renamed/, which attaches a 'Unique.Unique' to every 'RdrName.RdrName' +-- (yielding a 'Name.Name') to disambiguate identifiers which are lexically identical. +-- For example, this program: +-- +-- @ +-- f x = let f x = x + 1 +-- in f (x - 2) +-- @ +-- +-- Would be renamed by having 'Unique's attached so it looked something like this: +-- +-- @ +-- f_1 x_2 = let f_3 x_4 = x_4 + 1 +-- in f_3 (x_2 - 2) +-- @ +-- But see Note [Shadowing] below. +-- +-- 3. The resulting syntax tree undergoes type checking (which also deals with instantiating +-- type class arguments) to yield a 'GHC.Hs.Expr.HsExpr' type that has 'Id.Id' as it's names. +-- +-- 4. Finally the syntax tree is /desugared/ from the expressive 'GHC.Hs.Expr.HsExpr' type into +-- this 'Expr' type, which has far fewer constructors and hence is easier to perform +-- optimization, analysis and code generation on. +-- +-- The type parameter @b@ is for the type of binders in the expression tree. +-- +-- The language consists of the following elements: +-- +-- * Variables +-- See Note [Variable occurrences in Core] +-- +-- * Primitive literals +-- +-- * Applications: note that the argument may be a 'Type'. +-- See Note [Core let/app invariant] +-- See Note [Levity polymorphism invariants] +-- +-- * Lambda abstraction +-- See Note [Levity polymorphism invariants] +-- +-- * Recursive and non recursive @let@s. Operationally +-- this corresponds to allocating a thunk for the things +-- bound and then executing the sub-expression. +-- +-- See Note [Core letrec invariant] +-- See Note [Core let/app invariant] +-- See Note [Levity polymorphism invariants] +-- See Note [Core type and coercion invariant] +-- +-- * Case expression. Operationally this corresponds to evaluating +-- the scrutinee (expression examined) to weak head normal form +-- and then examining at most one level of resulting constructor (i.e. you +-- cannot do nested pattern matching directly with this). +-- +-- The binder gets bound to the value of the scrutinee, +-- and the 'Type' must be that of all the case alternatives +-- +-- IMPORTANT: see Note [Case expression invariants] +-- +-- * Cast an expression to a particular type. +-- This is used to implement @newtype@s (a @newtype@ constructor or +-- destructor just becomes a 'Cast' in Core) and GADTs. +-- +-- * Notes. These allow general information to be added to expressions +-- in the syntax tree +-- +-- * A type: this should only show up at the top level of an Arg +-- +-- * A coercion + +{- Note [Why does Case have a 'Type' field?] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The obvious alternative is + exprType (Case scrut bndr alts) + | (_,_,rhs1):_ <- alts + = exprType rhs1 + +But caching the type in the Case constructor + exprType (Case scrut bndr ty alts) = ty +is better for at least three reasons: + +* It works when there are no alternatives (see case invariant 1 above) + +* It might be faster in deeply-nested situations. + +* It might not be quite the same as (exprType rhs) for one + of the RHSs in alts. Consider a phantom type synonym + type S a = Int + and we want to form the case expression + case x of { K (a::*) -> (e :: S a) } + Then exprType of the RHS is (S a), but we cannot make that be + the 'ty' in the Case constructor because 'a' is simply not in + scope there. Instead we must expand the synonym to Int before + putting it in the Case constructor. See GHC.Core.Utils.mkSingleAltCase. + + So we'd have to do synonym expansion in exprType which would + be inefficient. + +* The type stored in the case is checked with lintInTy. This checks + (among other things) that it does not mention any variables that are + not in scope. If we did not have the type there, it would be a bit + harder for Core Lint to reject case blah of Ex x -> x where + data Ex = forall a. Ex a. +-} + +-- If you edit this type, you may need to update the GHC formalism +-- See Note [GHC Formalism] in GHC.Core.Lint +data Expr b + = Var Id + | Lit Literal + | App (Expr b) (Arg b) + | Lam b (Expr b) + | Let (Bind b) (Expr b) + | Case (Expr b) b Type [Alt b] -- See Note [Case expression invariants] + -- and Note [Why does Case have a 'Type' field?] + | Cast (Expr b) Coercion + | Tick (Tickish Id) (Expr b) + | Type Type + | Coercion Coercion + deriving Data + +-- | Type synonym for expressions that occur in function argument positions. +-- Only 'Arg' should contain a 'Type' at top level, general 'Expr' should not +type Arg b = Expr b + +-- | A case split alternative. Consists of the constructor leading to the alternative, +-- the variables bound from the constructor, and the expression to be executed given that binding. +-- The default alternative is @(DEFAULT, [], rhs)@ + +-- If you edit this type, you may need to update the GHC formalism +-- See Note [GHC Formalism] in GHC.Core.Lint +type Alt b = (AltCon, [b], Expr b) + +-- | A case alternative constructor (i.e. pattern match) + +-- If you edit this type, you may need to update the GHC formalism +-- See Note [GHC Formalism] in GHC.Core.Lint +data AltCon + = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@. + -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@ + + | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@ + -- Invariant: always an *unlifted* literal + -- See Note [Literal alternatives] + + | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@ + deriving (Eq, Data) + +-- This instance is a bit shady. It can only be used to compare AltCons for +-- a single type constructor. Fortunately, it seems quite unlikely that we'll +-- ever need to compare AltCons for different type constructors. +-- The instance adheres to the order described in [Core case invariants] +instance Ord AltCon where + compare (DataAlt con1) (DataAlt con2) = + ASSERT( dataConTyCon con1 == dataConTyCon con2 ) + compare (dataConTag con1) (dataConTag con2) + compare (DataAlt _) _ = GT + compare _ (DataAlt _) = LT + compare (LitAlt l1) (LitAlt l2) = compare l1 l2 + compare (LitAlt _) DEFAULT = GT + compare DEFAULT DEFAULT = EQ + compare DEFAULT _ = LT + +-- | Binding, used for top level bindings in a module and local bindings in a @let@. + +-- If you edit this type, you may need to update the GHC formalism +-- See Note [GHC Formalism] in GHC.Core.Lint +data Bind b = NonRec b (Expr b) + | Rec [(b, (Expr b))] + deriving Data + +{- +Note [Shadowing] +~~~~~~~~~~~~~~~~ +While various passes attempt to rename on-the-fly in a manner that +avoids "shadowing" (thereby simplifying downstream optimizations), +neither the simplifier nor any other pass GUARANTEES that shadowing is +avoided. Thus, all passes SHOULD work fine even in the presence of +arbitrary shadowing in their inputs. + +In particular, scrutinee variables `x` in expressions of the form +`Case e x t` are often renamed to variables with a prefix +"wild_". These "wild" variables may appear in the body of the +case-expression, and further, may be shadowed within the body. + +So the Unique in a Var is not really unique at all. Still, it's very +useful to give a constant-time equality/ordering for Vars, and to give +a key that can be used to make sets of Vars (VarSet), or mappings from +Vars to other things (VarEnv). Moreover, if you do want to eliminate +shadowing, you can give a new Unique to an Id without changing its +printable name, which makes debugging easier. + +Note [Literal alternatives] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Literal alternatives (LitAlt lit) are always for *un-lifted* literals. +We have one literal, a literal Integer, that is lifted, and we don't +allow in a LitAlt, because LitAlt cases don't do any evaluation. Also +(see #5603) if you say + case 3 of + S# x -> ... + J# _ _ -> ... +(where S#, J# are the constructors for Integer) we don't want the +simplifier calling findAlt with argument (LitAlt 3). No no. Integer +literals are an opaque encoding of an algebraic data type, not of +an unlifted literal, like all the others. + +Also, we do not permit case analysis with literal patterns on floating-point +types. See #9238 and Note [Rules for floating-point comparisons] in +PrelRules for the rationale for this restriction. + +-------------------------- GHC.Core INVARIANTS --------------------------- + +Note [Variable occurrences in Core] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Variable /occurrences/ are never CoVars, though /bindings/ can be. +All CoVars appear in Coercions. + +For example + \(c :: Age~#Int) (d::Int). d |> (sym c) +Here 'c' is a CoVar, which is lambda-bound, but it /occurs/ in +a Coercion, (sym c). + +Note [Core letrec invariant] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The right hand sides of all top-level and recursive @let@s +/must/ be of lifted type (see "Type#type_classification" for +the meaning of /lifted/ vs. /unlifted/). + +There is one exception to this rule, top-level @let@s are +allowed to bind primitive string literals: see +Note [Core top-level string literals]. + +Note [Core top-level string literals] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As an exception to the usual rule that top-level binders must be lifted, +we allow binding primitive string literals (of type Addr#) of type Addr# at the +top level. This allows us to share string literals earlier in the pipeline and +crucially allows other optimizations in the Core2Core pipeline to fire. +Consider, + + f n = let a::Addr# = "foo"# + in \x -> blah + +In order to be able to inline `f`, we would like to float `a` to the top. +Another option would be to inline `a`, but that would lead to duplicating string +literals, which we want to avoid. See #8472. + +The solution is simply to allow top-level unlifted binders. We can't allow +arbitrary unlifted expression at the top-level though, unlifted binders cannot +be thunks, so we just allow string literals. + +We allow the top-level primitive string literals to be wrapped in Ticks +in the same way they can be wrapped when nested in an expression. +CoreToSTG currently discards Ticks around top-level primitive string literals. +See #14779. + +Also see Note [Compilation plan for top-level string literals]. + +Note [Compilation plan for top-level string literals] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Here is a summary on how top-level string literals are handled by various +parts of the compilation pipeline. + +* In the source language, there is no way to bind a primitive string literal + at the top level. + +* In Core, we have a special rule that permits top-level Addr# bindings. See + Note [Core top-level string literals]. Core-to-core passes may introduce + new top-level string literals. + +* In STG, top-level string literals are explicitly represented in the syntax + tree. + +* A top-level string literal may end up exported from a module. In this case, + in the object file, the content of the exported literal is given a label with + the _bytes suffix. + +Note [Core let/app invariant] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The let/app invariant + the right hand side of a non-recursive 'Let', and + the argument of an 'App', + /may/ be of unlifted type, but only if + the expression is ok-for-speculation + or the 'Let' is for a join point. + +This means that the let can be floated around +without difficulty. For example, this is OK: + + y::Int# = x +# 1# + +But this is not, as it may affect termination if the +expression is floated out: + + y::Int# = fac 4# + +In this situation you should use @case@ rather than a @let@. The function +'GHC.Core.Utils.needsCaseBinding' can help you determine which to generate, or +alternatively use 'GHC.Core.Make.mkCoreLet' rather than this constructor directly, +which will generate a @case@ if necessary + +The let/app invariant is initially enforced by mkCoreLet and mkCoreApp in +GHC.Core.Make. + +For discussion of some implications of the let/app invariant primops see +Note [Checking versus non-checking primops] in PrimOp. + +Note [Case expression invariants] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Case expressions are one of the more complicated elements of the Core +language, and come with a number of invariants. All of them should be +checked by Core Lint. + +1. The list of alternatives may be empty; + See Note [Empty case alternatives] + +2. The 'DEFAULT' case alternative must be first in the list, + if it occurs at all. Checked in GHC.Core.Lint.checkCaseAlts. + +3. The remaining cases are in order of (strictly) increasing + tag (for 'DataAlts') or + lit (for 'LitAlts'). + This makes finding the relevant constructor easy, and makes + comparison easier too. Checked in GHC.Core.Lint.checkCaseAlts. + +4. The list of alternatives must be exhaustive. An /exhaustive/ case + does not necessarily mention all constructors: + + @ + data Foo = Red | Green | Blue + ... case x of + Red -> True + other -> f (case x of + Green -> ... + Blue -> ... ) ... + @ + + The inner case does not need a @Red@ alternative, because @x@ + can't be @Red@ at that program point. + + This is not checked by Core Lint -- it's very hard to do so. + E.g. suppose that inner case was floated out, thus: + let a = case x of + Green -> ... + Blue -> ... ) + case x of + Red -> True + other -> f a + Now it's really hard to see that the Green/Blue case is + exhaustive. But it is. + + If you have a case-expression that really /isn't/ exhaustive, + we may generate seg-faults. Consider the Green/Blue case + above. Since there are only two branches we may generate + code that tests for Green, and if not Green simply /assumes/ + Blue (since, if the case is exhaustive, that's all that + remains). Of course, if it's not Blue and we start fetching + fields that should be in a Blue constructor, we may die + horribly. See also Note [Core Lint guarantee] in GHC.Core.Lint. + +5. Floating-point values must not be scrutinised against literals. + See #9238 and Note [Rules for floating-point comparisons] + in PrelRules for rationale. Checked in lintCaseExpr; + see the call to isFloatingTy. + +6. The 'ty' field of (Case scrut bndr ty alts) is the type of the + /entire/ case expression. Checked in lintAltExpr. + See also Note [Why does Case have a 'Type' field?]. + +7. The type of the scrutinee must be the same as the type + of the case binder, obviously. Checked in lintCaseExpr. + +Note [Core type and coercion invariant] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We allow a /non-recursive/, /non-top-level/ let to bind type and +coercion variables. These can be very convenient for postponing type +substitutions until the next run of the simplifier. + +* A type variable binding must have a RHS of (Type ty) + +* A coercion variable binding must have a RHS of (Coercion co) + + It is possible to have terms that return a coercion, but we use + case-binding for those; e.g. + case (eq_sel d) of (co :: a ~# b) -> blah + where eq_sel :: (a~b) -> (a~#b) + + Or even even + case (df @Int) of (co :: a ~# b) -> blah + Which is very exotic, and I think never encountered; but see + Note [Equality superclasses in quantified constraints] + in TcCanonical + +Note [Core case invariants] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +See Note [Case expression invariants] + +Note [Levity polymorphism invariants] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The levity-polymorphism invariants are these (as per "Levity Polymorphism", +PLDI '17): + +* The type of a term-binder must not be levity-polymorphic, + unless it is a let(rec)-bound join point + (see Note [Invariants on join points]) + +* The type of the argument of an App must not be levity-polymorphic. + +A type (t::TYPE r) is "levity polymorphic" if 'r' has any free variables. + +For example + \(r::RuntimeRep). \(a::TYPE r). \(x::a). e +is illegal because x's type has kind (TYPE r), which has 'r' free. + +See Note [Levity polymorphism checking] in GHC.HsToCore.Monad to see where these +invariants are established for user-written code. + +Note [Core let goal] +~~~~~~~~~~~~~~~~~~~~ +* The simplifier tries to ensure that if the RHS of a let is a constructor + application, its arguments are trivial, so that the constructor can be + inlined vigorously. + +Note [Type let] +~~~~~~~~~~~~~~~ +See #type_let# + +Note [Empty case alternatives] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The alternatives of a case expression should be exhaustive. But +this exhaustive list can be empty! + +* A case expression can have empty alternatives if (and only if) the + scrutinee is bound to raise an exception or diverge. When do we know + this? See Note [Bottoming expressions] in GHC.Core.Utils. + +* The possibility of empty alternatives is one reason we need a type on + the case expression: if the alternatives are empty we can't get the + type from the alternatives! + +* In the case of empty types (see Note [Bottoming expressions]), say + data T + we do NOT want to replace + case (x::T) of Bool {} --> error Bool "Inaccessible case" + because x might raise an exception, and *that*'s what we want to see! + (#6067 is an example.) To preserve semantics we'd have to say + x `seq` error Bool "Inaccessible case" + but the 'seq' is just such a case, so we are back to square 1. + +* We can use the empty-alternative construct to coerce error values from + one type to another. For example + + f :: Int -> Int + f n = error "urk" + + g :: Int -> (# Char, Bool #) + g x = case f x of { 0 -> ..., n -> ... } + + Then if we inline f in g's RHS we get + case (error Int "urk") of (# Char, Bool #) { ... } + and we can discard the alternatives since the scrutinee is bottom to give + case (error Int "urk") of (# Char, Bool #) {} + + This is nicer than using an unsafe coerce between Int ~ (# Char,Bool #), + if for no other reason that we don't need to instantiate the (~) at an + unboxed type. + +* We treat a case expression with empty alternatives as trivial iff + its scrutinee is (see GHC.Core.Utils.exprIsTrivial). This is actually + important; see Note [Empty case is trivial] in GHC.Core.Utils + +* An empty case is replaced by its scrutinee during the CoreToStg + conversion; remember STG is un-typed, so there is no need for + the empty case to do the type conversion. + +Note [Join points] +~~~~~~~~~~~~~~~~~~ +In Core, a *join point* is a specially tagged function whose only occurrences +are saturated tail calls. A tail call can appear in these places: + + 1. In the branches (not the scrutinee) of a case + 2. Underneath a let (value or join point) + 3. Inside another join point + +We write a join-point declaration as + join j @a @b x y = e1 in e2, +like a let binding but with "join" instead (or "join rec" for "let rec"). Note +that we put the parameters before the = rather than using lambdas; this is +because it's relevant how many parameters the join point takes *as a join +point.* This number is called the *join arity,* distinct from arity because it +counts types as well as values. Note that a join point may return a lambda! So + join j x = x + 1 +is different from + join j = \x -> x + 1 +The former has join arity 1, while the latter has join arity 0. + +The identifier for a join point is called a join id or a *label.* An invocation +is called a *jump.* We write a jump using the jump keyword: + + jump j 3 + +The words *label* and *jump* are evocative of assembly code (or Cmm) for a +reason: join points are indeed compiled as labeled blocks, and jumps become +actual jumps (plus argument passing and stack adjustment). There is no closure +allocated and only a fraction of the function-call overhead. Hence we would +like as many functions as possible to become join points (see OccurAnal) and +the type rules for join points ensure we preserve the properties that make them +efficient. + +In the actual AST, a join point is indicated by the IdDetails of the binder: a +local value binding gets 'VanillaId' but a join point gets a 'JoinId' with its +join arity. + +For more details, see the paper: + + Luke Maurer, Paul Downen, Zena Ariola, and Simon Peyton Jones. "Compiling + without continuations." Submitted to PLDI'17. + + https://www.microsoft.com/en-us/research/publication/compiling-without-continuations/ + +Note [Invariants on join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Join points must follow these invariants: + + 1. All occurrences must be tail calls. Each of these tail calls must pass the + same number of arguments, counting both types and values; we call this the + "join arity" (to distinguish from regular arity, which only counts values). + + See Note [Join points are less general than the paper] + + 2. For join arity n, the right-hand side must begin with at least n lambdas. + No ticks, no casts, just lambdas! C.f. GHC.Core.Utils.joinRhsArity. + + 2a. Moreover, this same constraint applies to any unfolding of + the binder. Reason: if we want to push a continuation into + the RHS we must push it into the unfolding as well. + + 2b. The Arity (in the IdInfo) of a join point is the number of value + binders in the top n lambdas, where n is the join arity. + + So arity <= join arity; the former counts only value binders + while the latter counts all binders. + e.g. Suppose $j has join arity 1 + let j = \x y. e in case x of { A -> j 1; B -> j 2 } + Then its ordinary arity is also 1, not 2. + + The arity of a join point isn't very important; but short of setting + it to zero, it is helpful to have an invariant. E.g. #17294. + + 3. If the binding is recursive, then all other bindings in the recursive group + must also be join points. + + 4. The binding's type must not be polymorphic in its return type (as defined + in Note [The polymorphism rule of join points]). + +However, join points have simpler invariants in other ways + + 5. A join point can have an unboxed type without the RHS being + ok-for-speculation (i.e. drop the let/app invariant) + e.g. let j :: Int# = factorial x in ... + + 6. A join point can have a levity-polymorphic RHS + e.g. let j :: r :: TYPE l = fail void# in ... + This happened in an intermediate program #13394 + +Examples: + + join j1 x = 1 + x in jump j (jump j x) -- Fails 1: non-tail call + join j1' x = 1 + x in if even a + then jump j1 a + else jump j1 a b -- Fails 1: inconsistent calls + join j2 x = flip (+) x in j2 1 2 -- Fails 2: not enough lambdas + join j2' x = \y -> x + y in j3 1 -- Passes: extra lams ok + join j @a (x :: a) = x -- Fails 4: polymorphic in ret type + +Invariant 1 applies to left-hand sides of rewrite rules, so a rule for a join +point must have an exact call as its LHS. + +Strictly speaking, invariant 3 is redundant, since a call from inside a lazy +binding isn't a tail call. Since a let-bound value can't invoke a free join +point, then, they can't be mutually recursive. (A Core binding group *can* +include spurious extra bindings if the occurrence analyser hasn't run, so +invariant 3 does still need to be checked.) For the rigorous definition of +"tail call", see Section 3 of the paper (Note [Join points]). + +Invariant 4 is subtle; see Note [The polymorphism rule of join points]. + +Invariant 6 is to enable code like this: + + f = \(r :: RuntimeRep) (a :: TYPE r) (x :: T). + join j :: a + j = error @r @a "bloop" + in case x of + A -> j + B -> j + C -> error @r @a "blurp" + +Core Lint will check these invariants, anticipating that any binder whose +OccInfo is marked AlwaysTailCalled will become a join point as soon as the +simplifier (or simpleOptPgm) runs. + +Note [Join points are less general than the paper] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In the paper "Compiling without continuations", this expression is +perfectly valid: + + join { j = \_ -> e } + in (case blah of ) + ( True -> j void# ) arg + ( False -> blah ) + +assuming 'j' has arity 1. Here the call to 'j' does not look like a +tail call, but actually everything is fine. See Section 3, "Managing \Delta" +in the paper. + +In GHC, however, we adopt a slightly more restrictive subset, in which +join point calls must be tail calls. I think we /could/ loosen it up, but +in fact the simplifier ensures that we always get tail calls, and it makes +the back end a bit easier I think. Generally, just less to think about; +nothing deeper than that. + +Note [The type of a join point] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A join point has the same type it would have as a function. That is, if it takes +an Int and a Bool and its body produces a String, its type is `Int -> Bool -> +String`. Natural as this may seem, it can be awkward. A join point shouldn't be +thought to "return" in the same sense a function does---a jump is one-way. This +is crucial for understanding how case-of-case interacts with join points: + + case (join + j :: Int -> Bool -> String + j x y = ... + in + jump j z w) of + "" -> True + _ -> False + +The simplifier will pull the case into the join point (see Note [Case-of-case +and join points] in Simplify): + + join + j :: Int -> Bool -> Bool -- changed! + j x y = case ... of "" -> True + _ -> False + in + jump j z w + +The body of the join point now returns a Bool, so the label `j` has to have its +type updated accordingly. Inconvenient though this may be, it has the advantage +that 'GHC.Core.Utils.exprType' can still return a type for any expression, including +a jump. + +This differs from the paper (see Note [Invariants on join points]). In the +paper, we instead give j the type `Int -> Bool -> forall a. a`. Then each jump +carries the "return type" as a parameter, exactly the way other non-returning +functions like `error` work: + + case (join + j :: Int -> Bool -> forall a. a + j x y = ... + in + jump j z w @String) of + "" -> True + _ -> False + +Now we can move the case inward and we only have to change the jump: + + join + j :: Int -> Bool -> forall a. a + j x y = case ... of "" -> True + _ -> False + in + jump j z w @Bool + +(Core Lint would still check that the body of the join point has the right type; +that type would simply not be reflected in the join id.) + +Note [The polymorphism rule of join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Invariant 4 of Note [Invariants on join points] forbids a join point to be +polymorphic in its return type. That is, if its type is + + forall a1 ... ak. t1 -> ... -> tn -> r + +where its join arity is k+n, none of the type parameters ai may occur free in r. + +In some way, this falls out of the fact that given + + join + j @a1 ... @ak x1 ... xn = e1 + in e2 + +then all calls to `j` are in tail-call positions of `e`, and expressions in +tail-call positions in `e` have the same type as `e`. +Therefore the type of `e1` -- the return type of the join point -- must be the +same as the type of e2. +Since the type variables aren't bound in `e2`, its type can't include them, and +thus neither can the type of `e1`. + +This unfortunately prevents the `go` in the following code from being a +join-point: + + iter :: forall a. Int -> (a -> a) -> a -> a + iter @a n f x = go @a n f x + where + go :: forall a. Int -> (a -> a) -> a -> a + go @a 0 _ x = x + go @a n f x = go @a (n-1) f (f x) + +In this case, a static argument transformation would fix that (see +ticket #14620): + + iter :: forall a. Int -> (a -> a) -> a -> a + iter @a n f x = go' @a n f x + where + go' :: Int -> (a -> a) -> a -> a + go' 0 _ x = x + go' n f x = go' (n-1) f (f x) + +In general, loopification could be employed to do that (see #14068.) + +Can we simply drop the requirement, and allow `go` to be a join-point? We +could, and it would work. But we could not longer apply the case-of-join-point +transformation universally. This transformation would do: + + case (join go @a n f x = case n of 0 -> x + n -> go @a (n-1) f (f x) + in go @Bool n neg True) of + True -> e1; False -> e2 + + ===> + + join go @a n f x = case n of 0 -> case x of True -> e1; False -> e2 + n -> go @a (n-1) f (f x) + in go @Bool n neg True + +but that is ill-typed, as `x` is type `a`, not `Bool`. + + +This also justifies why we do not consider the `e` in `e |> co` to be in +tail position: A cast changes the type, but the type must be the same. But +operationally, casts are vacuous, so this is a bit unfortunate! See #14610 for +ideas how to fix this. + +************************************************************************ +* * + In/Out type synonyms +* * +********************************************************************* -} + +{- Many passes apply a substitution, and it's very handy to have type + synonyms to remind us whether or not the substitution has been applied -} + +-- Pre-cloning or substitution +type InBndr = CoreBndr +type InType = Type +type InKind = Kind +type InBind = CoreBind +type InExpr = CoreExpr +type InAlt = CoreAlt +type InArg = CoreArg +type InCoercion = Coercion + +-- Post-cloning or substitution +type OutBndr = CoreBndr +type OutType = Type +type OutKind = Kind +type OutCoercion = Coercion +type OutBind = CoreBind +type OutExpr = CoreExpr +type OutAlt = CoreAlt +type OutArg = CoreArg +type MOutCoercion = MCoercion + + +{- ********************************************************************* +* * + Ticks +* * +************************************************************************ +-} + +-- | Allows attaching extra information to points in expressions + +-- If you edit this type, you may need to update the GHC formalism +-- See Note [GHC Formalism] in GHC.Core.Lint +data Tickish id = + -- | An @{-# SCC #-}@ profiling annotation, either automatically + -- added by the desugarer as a result of -auto-all, or added by + -- the user. + ProfNote { + profNoteCC :: CostCentre, -- ^ the cost centre + profNoteCount :: !Bool, -- ^ bump the entry count? + profNoteScope :: !Bool -- ^ scopes over the enclosed expression + -- (i.e. not just a tick) + } + + -- | A "tick" used by HPC to track the execution of each + -- subexpression in the original source code. + | HpcTick { + tickModule :: Module, + tickId :: !Int + } + + -- | A breakpoint for the GHCi debugger. This behaves like an HPC + -- tick, but has a list of free variables which will be available + -- for inspection in GHCi when the program stops at the breakpoint. + -- + -- NB. we must take account of these Ids when (a) counting free variables, + -- and (b) substituting (don't substitute for them) + | Breakpoint + { breakpointId :: !Int + , breakpointFVs :: [id] -- ^ the order of this list is important: + -- it matches the order of the lists in the + -- appropriate entry in GHC.Driver.Types.ModBreaks. + -- + -- Careful about substitution! See + -- Note [substTickish] in GHC.Core.Subst. + } + + -- | A source note. + -- + -- Source notes are pure annotations: Their presence should neither + -- influence compilation nor execution. The semantics are given by + -- causality: The presence of a source note means that a local + -- change in the referenced source code span will possibly provoke + -- the generated code to change. On the flip-side, the functionality + -- of annotated code *must* be invariant against changes to all + -- source code *except* the spans referenced in the source notes + -- (see "Causality of optimized Haskell" paper for details). + -- + -- Therefore extending the scope of any given source note is always + -- valid. Note that it is still undesirable though, as this reduces + -- their usefulness for debugging and profiling. Therefore we will + -- generally try only to make use of this property where it is + -- necessary to enable optimizations. + | SourceNote + { sourceSpan :: RealSrcSpan -- ^ Source covered + , sourceName :: String -- ^ Name for source location + -- (uses same names as CCs) + } + + deriving (Eq, Ord, Data) + +-- | A "counting tick" (where tickishCounts is True) is one that +-- counts evaluations in some way. We cannot discard a counting tick, +-- and the compiler should preserve the number of counting ticks as +-- far as possible. +-- +-- However, we still allow the simplifier to increase or decrease +-- sharing, so in practice the actual number of ticks may vary, except +-- that we never change the value from zero to non-zero or vice versa. +tickishCounts :: Tickish id -> Bool +tickishCounts n@ProfNote{} = profNoteCount n +tickishCounts HpcTick{} = True +tickishCounts Breakpoint{} = True +tickishCounts _ = False + + +-- | Specifies the scoping behaviour of ticks. This governs the +-- behaviour of ticks that care about the covered code and the cost +-- associated with it. Important for ticks relating to profiling. +data TickishScoping = + -- | No scoping: The tick does not care about what code it + -- covers. Transformations can freely move code inside as well as + -- outside without any additional annotation obligations + NoScope + + -- | Soft scoping: We want all code that is covered to stay + -- covered. Note that this scope type does not forbid + -- transformations from happening, as long as all results of + -- the transformations are still covered by this tick or a copy of + -- it. For example + -- + -- let x = tick<...> (let y = foo in bar) in baz + -- ===> + -- let x = tick<...> bar; y = tick<...> foo in baz + -- + -- Is a valid transformation as far as "bar" and "foo" is + -- concerned, because both still are scoped over by the tick. + -- + -- Note though that one might object to the "let" not being + -- covered by the tick any more. However, we are generally lax + -- with this - constant costs don't matter too much, and given + -- that the "let" was effectively merged we can view it as having + -- lost its identity anyway. + -- + -- Also note that this scoping behaviour allows floating a tick + -- "upwards" in pretty much any situation. For example: + -- + -- case foo of x -> tick<...> bar + -- ==> + -- tick<...> case foo of x -> bar + -- + -- While this is always legal, we want to make a best effort to + -- only make us of this where it exposes transformation + -- opportunities. + | SoftScope + + -- | Cost centre scoping: We don't want any costs to move to other + -- cost-centre stacks. This means we not only want no code or cost + -- to get moved out of their cost centres, but we also object to + -- code getting associated with new cost-centre ticks - or + -- changing the order in which they get applied. + -- + -- A rule of thumb is that we don't want any code to gain new + -- annotations. However, there are notable exceptions, for + -- example: + -- + -- let f = \y -> foo in tick<...> ... (f x) ... + -- ==> + -- tick<...> ... foo[x/y] ... + -- + -- In-lining lambdas like this is always legal, because inlining a + -- function does not change the cost-centre stack when the + -- function is called. + | CostCentreScope + + deriving (Eq) + +-- | Returns the intended scoping rule for a Tickish +tickishScoped :: Tickish id -> TickishScoping +tickishScoped n@ProfNote{} + | profNoteScope n = CostCentreScope + | otherwise = NoScope +tickishScoped HpcTick{} = NoScope +tickishScoped Breakpoint{} = CostCentreScope + -- Breakpoints are scoped: eventually we're going to do call + -- stacks, but also this helps prevent the simplifier from moving + -- breakpoints around and changing their result type (see #1531). +tickishScoped SourceNote{} = SoftScope + +-- | Returns whether the tick scoping rule is at least as permissive +-- as the given scoping rule. +tickishScopesLike :: Tickish id -> TickishScoping -> Bool +tickishScopesLike t scope = tickishScoped t `like` scope + where NoScope `like` _ = True + _ `like` NoScope = False + SoftScope `like` _ = True + _ `like` SoftScope = False + CostCentreScope `like` _ = True + +-- | Returns @True@ for ticks that can be floated upwards easily even +-- where it might change execution counts, such as: +-- +-- Just (tick<...> foo) +-- ==> +-- tick<...> (Just foo) +-- +-- This is a combination of @tickishSoftScope@ and +-- @tickishCounts@. Note that in principle splittable ticks can become +-- floatable using @mkNoTick@ -- even though there's currently no +-- tickish for which that is the case. +tickishFloatable :: Tickish id -> Bool +tickishFloatable t = t `tickishScopesLike` SoftScope && not (tickishCounts t) + +-- | Returns @True@ for a tick that is both counting /and/ scoping and +-- can be split into its (tick, scope) parts using 'mkNoScope' and +-- 'mkNoTick' respectively. +tickishCanSplit :: Tickish id -> Bool +tickishCanSplit ProfNote{profNoteScope = True, profNoteCount = True} + = True +tickishCanSplit _ = False + +mkNoCount :: Tickish id -> Tickish id +mkNoCount n | not (tickishCounts n) = n + | not (tickishCanSplit n) = panic "mkNoCount: Cannot split!" +mkNoCount n@ProfNote{} = n {profNoteCount = False} +mkNoCount _ = panic "mkNoCount: Undefined split!" + +mkNoScope :: Tickish id -> Tickish id +mkNoScope n | tickishScoped n == NoScope = n + | not (tickishCanSplit n) = panic "mkNoScope: Cannot split!" +mkNoScope n@ProfNote{} = n {profNoteScope = False} +mkNoScope _ = panic "mkNoScope: Undefined split!" + +-- | Return @True@ if this source annotation compiles to some backend +-- code. Without this flag, the tickish is seen as a simple annotation +-- that does not have any associated evaluation code. +-- +-- What this means that we are allowed to disregard the tick if doing +-- so means that we can skip generating any code in the first place. A +-- typical example is top-level bindings: +-- +-- foo = tick<...> \y -> ... +-- ==> +-- foo = \y -> tick<...> ... +-- +-- Here there is just no operational difference between the first and +-- the second version. Therefore code generation should simply +-- translate the code as if it found the latter. +tickishIsCode :: Tickish id -> Bool +tickishIsCode SourceNote{} = False +tickishIsCode _tickish = True -- all the rest for now + + +-- | Governs the kind of expression that the tick gets placed on when +-- annotating for example using @mkTick@. If we find that we want to +-- put a tickish on an expression ruled out here, we try to float it +-- inwards until we find a suitable expression. +data TickishPlacement = + + -- | Place ticks exactly on run-time expressions. We can still + -- move the tick through pure compile-time constructs such as + -- other ticks, casts or type lambdas. This is the most + -- restrictive placement rule for ticks, as all tickishs have in + -- common that they want to track runtime processes. The only + -- legal placement rule for counting ticks. + PlaceRuntime + + -- | As @PlaceRuntime@, but we float the tick through all + -- lambdas. This makes sense where there is little difference + -- between annotating the lambda and annotating the lambda's code. + | PlaceNonLam + + -- | In addition to floating through lambdas, cost-centre style + -- tickishs can also be moved from constructors, non-function + -- variables and literals. For example: + -- + -- let x = scc<...> C (scc<...> y) (scc<...> 3) in ... + -- + -- Neither the constructor application, the variable or the + -- literal are likely to have any cost worth mentioning. And even + -- if y names a thunk, the call would not care about the + -- evaluation context. Therefore removing all annotations in the + -- above example is safe. + | PlaceCostCentre + + deriving (Eq) + +-- | Placement behaviour we want for the ticks +tickishPlace :: Tickish id -> TickishPlacement +tickishPlace n@ProfNote{} + | profNoteCount n = PlaceRuntime + | otherwise = PlaceCostCentre +tickishPlace HpcTick{} = PlaceRuntime +tickishPlace Breakpoint{} = PlaceRuntime +tickishPlace SourceNote{} = PlaceNonLam + +-- | Returns whether one tick "contains" the other one, therefore +-- making the second tick redundant. +tickishContains :: Eq b => Tickish b -> Tickish b -> Bool +tickishContains (SourceNote sp1 n1) (SourceNote sp2 n2) + = containsSpan sp1 sp2 && n1 == n2 + -- compare the String last +tickishContains t1 t2 + = t1 == t2 + +{- +************************************************************************ +* * + Orphans +* * +************************************************************************ +-} + +-- | Is this instance an orphan? If it is not an orphan, contains an 'OccName' +-- witnessing the instance's non-orphanhood. +-- See Note [Orphans] +data IsOrphan + = IsOrphan + | NotOrphan OccName -- The OccName 'n' witnesses the instance's non-orphanhood + -- In that case, the instance is fingerprinted as part + -- of the definition of 'n's definition + deriving Data + +-- | Returns true if 'IsOrphan' is orphan. +isOrphan :: IsOrphan -> Bool +isOrphan IsOrphan = True +isOrphan _ = False + +-- | Returns true if 'IsOrphan' is not an orphan. +notOrphan :: IsOrphan -> Bool +notOrphan NotOrphan{} = True +notOrphan _ = False + +chooseOrphanAnchor :: NameSet -> IsOrphan +-- Something (rule, instance) is relate to all the Names in this +-- list. Choose one of them to be an "anchor" for the orphan. We make +-- the choice deterministic to avoid gratuitous changes in the ABI +-- hash (#4012). Specifically, use lexicographic comparison of +-- OccName rather than comparing Uniques +-- +-- NB: 'minimum' use Ord, and (Ord OccName) works lexicographically +-- +chooseOrphanAnchor local_names + | isEmptyNameSet local_names = IsOrphan + | otherwise = NotOrphan (minimum occs) + where + occs = map nameOccName $ nonDetEltsUniqSet local_names + -- It's OK to use nonDetEltsUFM here, see comments above + +instance Binary IsOrphan where + put_ bh IsOrphan = putByte bh 0 + put_ bh (NotOrphan n) = do + putByte bh 1 + put_ bh n + get bh = do + h <- getByte bh + case h of + 0 -> return IsOrphan + _ -> do + n <- get bh + return $ NotOrphan n + +{- +Note [Orphans] +~~~~~~~~~~~~~~ +Class instances, rules, and family instances are divided into orphans +and non-orphans. Roughly speaking, an instance/rule is an orphan if +its left hand side mentions nothing defined in this module. Orphan-hood +has two major consequences + + * A module that contains orphans is called an "orphan module". If + the module being compiled depends (transitively) on an orphan + module M, then M.hi is read in regardless of whether M is otherwise + needed. This is to ensure that we don't miss any instance decls in + M. But it's painful, because it means we need to keep track of all + the orphan modules below us. + + * A non-orphan is not finger-printed separately. Instead, for + fingerprinting purposes it is treated as part of the entity it + mentions on the LHS. For example + data T = T1 | T2 + instance Eq T where .... + The instance (Eq T) is incorporated as part of T's fingerprint. + + In contrast, orphans are all fingerprinted together in the + mi_orph_hash field of the ModIface. + + See GHC.Iface.Utils.addFingerprints. + +Orphan-hood is computed + * For class instances: + when we make a ClsInst + (because it is needed during instance lookup) + + * For rules and family instances: + when we generate an IfaceRule (GHC.Iface.Utils.coreRuleToIfaceRule) + or IfaceFamInst (GHC.Iface.Utils.instanceToIfaceInst) +-} + +{- +************************************************************************ +* * +\subsection{Transformation rules} +* * +************************************************************************ + +The CoreRule type and its friends are dealt with mainly in GHC.Core.Rules, but +GHC.Core.FVs, GHC.Core.Subst, GHC.Core.Ppr, GHC.Core.Op.Tidy also inspect the +representation. +-} + +-- | Gathers a collection of 'CoreRule's. Maps (the name of) an 'Id' to its rules +type RuleBase = NameEnv [CoreRule] + -- The rules are unordered; + -- we sort out any overlaps on lookup + +-- | A full rule environment which we can apply rules from. Like a 'RuleBase', +-- but it also includes the set of visible orphans we use to filter out orphan +-- rules which are not visible (even though we can see them...) +data RuleEnv + = RuleEnv { re_base :: RuleBase + , re_visible_orphs :: ModuleSet + } + +mkRuleEnv :: RuleBase -> [Module] -> RuleEnv +mkRuleEnv rules vis_orphs = RuleEnv rules (mkModuleSet vis_orphs) + +emptyRuleEnv :: RuleEnv +emptyRuleEnv = RuleEnv emptyNameEnv emptyModuleSet + +-- | A 'CoreRule' is: +-- +-- * \"Local\" if the function it is a rule for is defined in the +-- same module as the rule itself. +-- +-- * \"Orphan\" if nothing on the LHS is defined in the same module +-- as the rule itself +data CoreRule + = Rule { + ru_name :: RuleName, -- ^ Name of the rule, for communication with the user + ru_act :: Activation, -- ^ When the rule is active + + -- Rough-matching stuff + -- see comments with InstEnv.ClsInst( is_cls, is_rough ) + ru_fn :: Name, -- ^ Name of the 'Id.Id' at the head of this rule + ru_rough :: [Maybe Name], -- ^ Name at the head of each argument to the left hand side + + -- Proper-matching stuff + -- see comments with InstEnv.ClsInst( is_tvs, is_tys ) + ru_bndrs :: [CoreBndr], -- ^ Variables quantified over + ru_args :: [CoreExpr], -- ^ Left hand side arguments + + -- And the right-hand side + ru_rhs :: CoreExpr, -- ^ Right hand side of the rule + -- Occurrence info is guaranteed correct + -- See Note [OccInfo in unfoldings and rules] + + -- Locality + ru_auto :: Bool, -- ^ @True@ <=> this rule is auto-generated + -- (notably by Specialise or SpecConstr) + -- @False@ <=> generated at the user's behest + -- See Note [Trimming auto-rules] in GHC.Iface.Tidy + -- for the sole purpose of this field. + + ru_origin :: !Module, -- ^ 'Module' the rule was defined in, used + -- to test if we should see an orphan rule. + + ru_orphan :: !IsOrphan, -- ^ Whether or not the rule is an orphan. + + ru_local :: Bool -- ^ @True@ iff the fn at the head of the rule is + -- defined in the same module as the rule + -- and is not an implicit 'Id' (like a record selector, + -- class operation, or data constructor). This + -- is different from 'ru_orphan', where a rule + -- can avoid being an orphan if *any* Name in + -- LHS of the rule was defined in the same + -- module as the rule. + } + + -- | Built-in rules are used for constant folding + -- and suchlike. They have no free variables. + -- A built-in rule is always visible (there is no such thing as + -- an orphan built-in rule.) + | BuiltinRule { + ru_name :: RuleName, -- ^ As above + ru_fn :: Name, -- ^ As above + ru_nargs :: Int, -- ^ Number of arguments that 'ru_try' consumes, + -- if it fires, including type arguments + ru_try :: RuleFun + -- ^ This function does the rewrite. It given too many + -- arguments, it simply discards them; the returned 'CoreExpr' + -- is just the rewrite of 'ru_fn' applied to the first 'ru_nargs' args + } + -- See Note [Extra args in rule matching] in GHC.Core.Rules + +type RuleFun = DynFlags -> InScopeEnv -> Id -> [CoreExpr] -> Maybe CoreExpr +type InScopeEnv = (InScopeSet, IdUnfoldingFun) + +type IdUnfoldingFun = Id -> Unfolding +-- A function that embodies how to unfold an Id if you need +-- to do that in the Rule. The reason we need to pass this info in +-- is that whether an Id is unfoldable depends on the simplifier phase + +isBuiltinRule :: CoreRule -> Bool +isBuiltinRule (BuiltinRule {}) = True +isBuiltinRule _ = False + +isAutoRule :: CoreRule -> Bool +isAutoRule (BuiltinRule {}) = False +isAutoRule (Rule { ru_auto = is_auto }) = is_auto + +-- | The number of arguments the 'ru_fn' must be applied +-- to before the rule can match on it +ruleArity :: CoreRule -> Int +ruleArity (BuiltinRule {ru_nargs = n}) = n +ruleArity (Rule {ru_args = args}) = length args + +ruleName :: CoreRule -> RuleName +ruleName = ru_name + +ruleModule :: CoreRule -> Maybe Module +ruleModule Rule { ru_origin } = Just ru_origin +ruleModule BuiltinRule {} = Nothing + +ruleActivation :: CoreRule -> Activation +ruleActivation (BuiltinRule { }) = AlwaysActive +ruleActivation (Rule { ru_act = act }) = act + +-- | The 'Name' of the 'Id.Id' at the head of the rule left hand side +ruleIdName :: CoreRule -> Name +ruleIdName = ru_fn + +isLocalRule :: CoreRule -> Bool +isLocalRule = ru_local + +-- | Set the 'Name' of the 'Id.Id' at the head of the rule left hand side +setRuleIdName :: Name -> CoreRule -> CoreRule +setRuleIdName nm ru = ru { ru_fn = nm } + +{- +************************************************************************ +* * + Unfoldings +* * +************************************************************************ + +The @Unfolding@ type is declared here to avoid numerous loops +-} + +-- | Records the /unfolding/ of an identifier, which is approximately the form the +-- identifier would have if we substituted its definition in for the identifier. +-- This type should be treated as abstract everywhere except in GHC.Core.Unfold +data Unfolding + = NoUnfolding -- ^ We have no information about the unfolding. + + | BootUnfolding -- ^ We have no information about the unfolding, because + -- this 'Id' came from an @hi-boot@ file. + -- See Note [Inlining and hs-boot files] in GHC.CoreToIface + -- for what this is used for. + + | OtherCon [AltCon] -- ^ It ain't one of these constructors. + -- @OtherCon xs@ also indicates that something has been evaluated + -- and hence there's no point in re-evaluating it. + -- @OtherCon []@ is used even for non-data-type values + -- to indicated evaluated-ness. Notably: + -- + -- > data C = C !(Int -> Int) + -- > case x of { C f -> ... } + -- + -- Here, @f@ gets an @OtherCon []@ unfolding. + + | DFunUnfolding { -- The Unfolding of a DFunId + -- See Note [DFun unfoldings] + -- df = /\a1..am. \d1..dn. MkD t1 .. tk + -- (op1 a1..am d1..dn) + -- (op2 a1..am d1..dn) + df_bndrs :: [Var], -- The bound variables [a1..m],[d1..dn] + df_con :: DataCon, -- The dictionary data constructor (never a newtype datacon) + df_args :: [CoreExpr] -- Args of the data con: types, superclasses and methods, + } -- in positional order + + | CoreUnfolding { -- An unfolding for an Id with no pragma, + -- or perhaps a NOINLINE pragma + -- (For NOINLINE, the phase, if any, is in the + -- InlinePragInfo for this Id.) + uf_tmpl :: CoreExpr, -- Template; occurrence info is correct + uf_src :: UnfoldingSource, -- Where the unfolding came from + uf_is_top :: Bool, -- True <=> top level binding + uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard + -- a `seq` on this variable + uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function + -- Cached version of exprIsConLike + uf_is_work_free :: Bool, -- True <=> doesn't waste (much) work to expand + -- inside an inlining + -- Cached version of exprIsCheap + uf_expandable :: Bool, -- True <=> can expand in RULE matching + -- Cached version of exprIsExpandable + uf_guidance :: UnfoldingGuidance -- Tells about the *size* of the template. + } + -- ^ An unfolding with redundant cached information. Parameters: + -- + -- uf_tmpl: Template used to perform unfolding; + -- NB: Occurrence info is guaranteed correct: + -- see Note [OccInfo in unfoldings and rules] + -- + -- uf_is_top: Is this a top level binding? + -- + -- uf_is_value: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on + -- this variable + -- + -- uf_is_work_free: Does this waste only a little work if we expand it inside an inlining? + -- Basically this is a cached version of 'exprIsWorkFree' + -- + -- uf_guidance: Tells us about the /size/ of the unfolding template + + +------------------------------------------------ +data UnfoldingSource + = -- See also Note [Historical note: unfoldings for wrappers] + + InlineRhs -- The current rhs of the function + -- Replace uf_tmpl each time around + + | InlineStable -- From an INLINE or INLINABLE pragma + -- INLINE if guidance is UnfWhen + -- INLINABLE if guidance is UnfIfGoodArgs/UnfoldNever + -- (well, technically an INLINABLE might be made + -- UnfWhen if it was small enough, and then + -- it will behave like INLINE outside the current + -- module, but that is the way automatic unfoldings + -- work so it is consistent with the intended + -- meaning of INLINABLE). + -- + -- uf_tmpl may change, but only as a result of + -- gentle simplification, it doesn't get updated + -- to the current RHS during compilation as with + -- InlineRhs. + -- + -- See Note [InlineStable] + + | InlineCompulsory -- Something that *has* no binding, so you *must* inline it + -- Only a few primop-like things have this property + -- (see MkId.hs, calls to mkCompulsoryUnfolding). + -- Inline absolutely always, however boring the context. + + + +-- | 'UnfoldingGuidance' says when unfolding should take place +data UnfoldingGuidance + = UnfWhen { -- Inline without thinking about the *size* of the uf_tmpl + -- Used (a) for small *and* cheap unfoldings + -- (b) for INLINE functions + -- See Note [INLINE for small functions] in GHC.Core.Unfold + ug_arity :: Arity, -- Number of value arguments expected + + ug_unsat_ok :: Bool, -- True <=> ok to inline even if unsaturated + ug_boring_ok :: Bool -- True <=> ok to inline even if the context is boring + -- So True,True means "always" + } + + | UnfIfGoodArgs { -- Arose from a normal Id; the info here is the + -- result of a simple analysis of the RHS + + ug_args :: [Int], -- Discount if the argument is evaluated. + -- (i.e., a simplification will definitely + -- be possible). One elt of the list per *value* arg. + + ug_size :: Int, -- The "size" of the unfolding. + + ug_res :: Int -- Scrutinee discount: the discount to subtract if the thing is in + } -- a context (case (thing args) of ...), + -- (where there are the right number of arguments.) + + | UnfNever -- The RHS is big, so don't inline it + deriving (Eq) + +{- +Note [Historical note: unfoldings for wrappers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We used to have a nice clever scheme in interface files for +wrappers. A wrapper's unfolding can be reconstructed from its worker's +id and its strictness. This decreased .hi file size (sometimes +significantly, for modules like GHC.Classes with many high-arity w/w +splits) and had a slight corresponding effect on compile times. + +However, when we added the second demand analysis, this scheme lead to +some Core lint errors. The second analysis could change the strictness +signatures, which sometimes resulted in a wrapper's regenerated +unfolding applying the wrapper to too many arguments. + +Instead of repairing the clever .hi scheme, we abandoned it in favor +of simplicity. The .hi sizes are usually insignificant (excluding the ++1M for base libraries), and compile time barely increases (~+1% for +nofib). The nicer upshot is that the UnfoldingSource no longer mentions +an Id, so, eg, substitutions need not traverse them. + + +Note [DFun unfoldings] +~~~~~~~~~~~~~~~~~~~~~~ +The Arity in a DFunUnfolding is total number of args (type and value) +that the DFun needs to produce a dictionary. That's not necessarily +related to the ordinary arity of the dfun Id, esp if the class has +one method, so the dictionary is represented by a newtype. Example + + class C a where { op :: a -> Int } + instance C a -> C [a] where op xs = op (head xs) + +The instance translates to + + $dfCList :: forall a. C a => C [a] -- Arity 2! + $dfCList = /\a.\d. $copList {a} d |> co + + $copList :: forall a. C a => [a] -> Int -- Arity 2! + $copList = /\a.\d.\xs. op {a} d (head xs) + +Now we might encounter (op (dfCList {ty} d) a1 a2) +and we want the (op (dfList {ty} d)) rule to fire, because $dfCList +has all its arguments, even though its (value) arity is 2. That's +why we record the number of expected arguments in the DFunUnfolding. + +Note that although it's an Arity, it's most convenient for it to give +the *total* number of arguments, both type and value. See the use +site in exprIsConApp_maybe. +-} + +-- Constants for the UnfWhen constructor +needSaturated, unSaturatedOk :: Bool +needSaturated = False +unSaturatedOk = True + +boringCxtNotOk, boringCxtOk :: Bool +boringCxtOk = True +boringCxtNotOk = False + +------------------------------------------------ +noUnfolding :: Unfolding +-- ^ There is no known 'Unfolding' +evaldUnfolding :: Unfolding +-- ^ This unfolding marks the associated thing as being evaluated + +noUnfolding = NoUnfolding +evaldUnfolding = OtherCon [] + +-- | There is no known 'Unfolding', because this came from an +-- hi-boot file. +bootUnfolding :: Unfolding +bootUnfolding = BootUnfolding + +mkOtherCon :: [AltCon] -> Unfolding +mkOtherCon = OtherCon + +isStableSource :: UnfoldingSource -> Bool +-- Keep the unfolding template +isStableSource InlineCompulsory = True +isStableSource InlineStable = True +isStableSource InlineRhs = False + +-- | Retrieves the template of an unfolding: panics if none is known +unfoldingTemplate :: Unfolding -> CoreExpr +unfoldingTemplate = uf_tmpl + +-- | Retrieves the template of an unfolding if possible +-- maybeUnfoldingTemplate is used mainly wnen specialising, and we do +-- want to specialise DFuns, so it's important to return a template +-- for DFunUnfoldings +maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr +maybeUnfoldingTemplate (CoreUnfolding { uf_tmpl = expr }) + = Just expr +maybeUnfoldingTemplate (DFunUnfolding { df_bndrs = bndrs, df_con = con, df_args = args }) + = Just (mkLams bndrs (mkApps (Var (dataConWorkId con)) args)) +maybeUnfoldingTemplate _ + = Nothing + +-- | The constructors that the unfolding could never be: +-- returns @[]@ if no information is available +otherCons :: Unfolding -> [AltCon] +otherCons (OtherCon cons) = cons +otherCons _ = [] + +-- | Determines if it is certainly the case that the unfolding will +-- yield a value (something in HNF): returns @False@ if unsure +isValueUnfolding :: Unfolding -> Bool + -- Returns False for OtherCon +isValueUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald +isValueUnfolding _ = False + +-- | Determines if it possibly the case that the unfolding will +-- yield a value. Unlike 'isValueUnfolding' it returns @True@ +-- for 'OtherCon' +isEvaldUnfolding :: Unfolding -> Bool + -- Returns True for OtherCon +isEvaldUnfolding (OtherCon _) = True +isEvaldUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald +isEvaldUnfolding _ = False + +-- | @True@ if the unfolding is a constructor application, the application +-- of a CONLIKE function or 'OtherCon' +isConLikeUnfolding :: Unfolding -> Bool +isConLikeUnfolding (OtherCon _) = True +isConLikeUnfolding (CoreUnfolding { uf_is_conlike = con }) = con +isConLikeUnfolding _ = False + +-- | Is the thing we will unfold into certainly cheap? +isCheapUnfolding :: Unfolding -> Bool +isCheapUnfolding (CoreUnfolding { uf_is_work_free = is_wf }) = is_wf +isCheapUnfolding _ = False + +isExpandableUnfolding :: Unfolding -> Bool +isExpandableUnfolding (CoreUnfolding { uf_expandable = is_expable }) = is_expable +isExpandableUnfolding _ = False + +expandUnfolding_maybe :: Unfolding -> Maybe CoreExpr +-- Expand an expandable unfolding; this is used in rule matching +-- See Note [Expanding variables] in GHC.Core.Rules +-- The key point here is that CONLIKE things can be expanded +expandUnfolding_maybe (CoreUnfolding { uf_expandable = True, uf_tmpl = rhs }) = Just rhs +expandUnfolding_maybe _ = Nothing + +isCompulsoryUnfolding :: Unfolding -> Bool +isCompulsoryUnfolding (CoreUnfolding { uf_src = InlineCompulsory }) = True +isCompulsoryUnfolding _ = False + +isStableUnfolding :: Unfolding -> Bool +-- True of unfoldings that should not be overwritten +-- by a CoreUnfolding for the RHS of a let-binding +isStableUnfolding (CoreUnfolding { uf_src = src }) = isStableSource src +isStableUnfolding (DFunUnfolding {}) = True +isStableUnfolding _ = False + +-- | Only returns False if there is no unfolding information available at all +hasSomeUnfolding :: Unfolding -> Bool +hasSomeUnfolding NoUnfolding = False +hasSomeUnfolding BootUnfolding = False +hasSomeUnfolding _ = True + +isBootUnfolding :: Unfolding -> Bool +isBootUnfolding BootUnfolding = True +isBootUnfolding _ = False + +neverUnfoldGuidance :: UnfoldingGuidance -> Bool +neverUnfoldGuidance UnfNever = True +neverUnfoldGuidance _ = False + +isFragileUnfolding :: Unfolding -> Bool +-- An unfolding is fragile if it mentions free variables or +-- is otherwise subject to change. A robust one can be kept. +-- See Note [Fragile unfoldings] +isFragileUnfolding (CoreUnfolding {}) = True +isFragileUnfolding (DFunUnfolding {}) = True +isFragileUnfolding _ = False + -- NoUnfolding, BootUnfolding, OtherCon are all non-fragile + +canUnfold :: Unfolding -> Bool +canUnfold (CoreUnfolding { uf_guidance = g }) = not (neverUnfoldGuidance g) +canUnfold _ = False + +{- Note [Fragile unfoldings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An unfolding is "fragile" if it mentions free variables (and hence would +need substitution) or might be affected by optimisation. The non-fragile +ones are + + NoUnfolding, BootUnfolding + + OtherCon {} If we know this binder (say a lambda binder) will be + bound to an evaluated thing, we want to retain that + info in simpleOptExpr; see #13077. + +We consider even a StableUnfolding as fragile, because it needs substitution. + +Note [InlineStable] +~~~~~~~~~~~~~~~~~ +When you say + {-# INLINE f #-} + f x = <rhs> +you intend that calls (f e) are replaced by <rhs>[e/x] So we +should capture (\x.<rhs>) in the Unfolding of 'f', and never meddle +with it. Meanwhile, we can optimise <rhs> to our heart's content, +leaving the original unfolding intact in Unfolding of 'f'. For example + all xs = foldr (&&) True xs + any p = all . map p {-# INLINE any #-} +We optimise any's RHS fully, but leave the InlineRule saying "all . map p", +which deforests well at the call site. + +So INLINE pragma gives rise to an InlineRule, which captures the original RHS. + +Moreover, it's only used when 'f' is applied to the +specified number of arguments; that is, the number of argument on +the LHS of the '=' sign in the original source definition. +For example, (.) is now defined in the libraries like this + {-# INLINE (.) #-} + (.) f g = \x -> f (g x) +so that it'll inline when applied to two arguments. If 'x' appeared +on the left, thus + (.) f g x = f (g x) +it'd only inline when applied to three arguments. This slightly-experimental +change was requested by Roman, but it seems to make sense. + +See also Note [Inlining an InlineRule] in GHC.Core.Unfold. + + +Note [OccInfo in unfoldings and rules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In unfoldings and rules, we guarantee that the template is occ-analysed, +so that the occurrence info on the binders is correct. This is important, +because the Simplifier does not re-analyse the template when using it. If +the occurrence info is wrong + - We may get more simplifier iterations than necessary, because + once-occ info isn't there + - More seriously, we may get an infinite loop if there's a Rec + without a loop breaker marked + + +************************************************************************ +* * + AltCon +* * +************************************************************************ +-} + +-- The Ord is needed for the FiniteMap used in the lookForConstructor +-- in SimplEnv. If you declared that lookForConstructor *ignores* +-- constructor-applications with LitArg args, then you could get +-- rid of this Ord. + +instance Outputable AltCon where + ppr (DataAlt dc) = ppr dc + ppr (LitAlt lit) = ppr lit + ppr DEFAULT = text "__DEFAULT" + +cmpAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Ordering +cmpAlt (con1, _, _) (con2, _, _) = con1 `cmpAltCon` con2 + +ltAlt :: (AltCon, a, b) -> (AltCon, a, b) -> Bool +ltAlt a1 a2 = (a1 `cmpAlt` a2) == LT + +cmpAltCon :: AltCon -> AltCon -> Ordering +-- ^ Compares 'AltCon's within a single list of alternatives +-- DEFAULT comes out smallest, so that sorting by AltCon puts +-- alternatives in the order required: see Note [Case expression invariants] +cmpAltCon DEFAULT DEFAULT = EQ +cmpAltCon DEFAULT _ = LT + +cmpAltCon (DataAlt d1) (DataAlt d2) = dataConTag d1 `compare` dataConTag d2 +cmpAltCon (DataAlt _) DEFAULT = GT +cmpAltCon (LitAlt l1) (LitAlt l2) = l1 `compare` l2 +cmpAltCon (LitAlt _) DEFAULT = GT + +cmpAltCon con1 con2 = WARN( True, text "Comparing incomparable AltCons" <+> + ppr con1 <+> ppr con2 ) + LT + +{- +************************************************************************ +* * +\subsection{Useful synonyms} +* * +************************************************************************ + +Note [CoreProgram] +~~~~~~~~~~~~~~~~~~ +The top level bindings of a program, a CoreProgram, are represented as +a list of CoreBind + + * Later bindings in the list can refer to earlier ones, but not vice + versa. So this is OK + NonRec { x = 4 } + Rec { p = ...q...x... + ; q = ...p...x } + Rec { f = ...p..x..f.. } + NonRec { g = ..f..q...x.. } + But it would NOT be ok for 'f' to refer to 'g'. + + * The occurrence analyser does strongly-connected component analysis + on each Rec binding, and splits it into a sequence of smaller + bindings where possible. So the program typically starts life as a + single giant Rec, which is then dependency-analysed into smaller + chunks. +-} + +-- If you edit this type, you may need to update the GHC formalism +-- See Note [GHC Formalism] in GHC.Core.Lint +type CoreProgram = [CoreBind] -- See Note [CoreProgram] + +-- | The common case for the type of binders and variables when +-- we are manipulating the Core language within GHC +type CoreBndr = Var +-- | Expressions where binders are 'CoreBndr's +type CoreExpr = Expr CoreBndr +-- | Argument expressions where binders are 'CoreBndr's +type CoreArg = Arg CoreBndr +-- | Binding groups where binders are 'CoreBndr's +type CoreBind = Bind CoreBndr +-- | Case alternatives where binders are 'CoreBndr's +type CoreAlt = Alt CoreBndr + +{- +************************************************************************ +* * +\subsection{Tagging} +* * +************************************************************************ +-} + +-- | Binders are /tagged/ with a t +data TaggedBndr t = TB CoreBndr t -- TB for "tagged binder" + +type TaggedBind t = Bind (TaggedBndr t) +type TaggedExpr t = Expr (TaggedBndr t) +type TaggedArg t = Arg (TaggedBndr t) +type TaggedAlt t = Alt (TaggedBndr t) + +instance Outputable b => Outputable (TaggedBndr b) where + ppr (TB b l) = char '<' <> ppr b <> comma <> ppr l <> char '>' + +deTagExpr :: TaggedExpr t -> CoreExpr +deTagExpr (Var v) = Var v +deTagExpr (Lit l) = Lit l +deTagExpr (Type ty) = Type ty +deTagExpr (Coercion co) = Coercion co +deTagExpr (App e1 e2) = App (deTagExpr e1) (deTagExpr e2) +deTagExpr (Lam (TB b _) e) = Lam b (deTagExpr e) +deTagExpr (Let bind body) = Let (deTagBind bind) (deTagExpr body) +deTagExpr (Case e (TB b _) ty alts) = Case (deTagExpr e) b ty (map deTagAlt alts) +deTagExpr (Tick t e) = Tick t (deTagExpr e) +deTagExpr (Cast e co) = Cast (deTagExpr e) co + +deTagBind :: TaggedBind t -> CoreBind +deTagBind (NonRec (TB b _) rhs) = NonRec b (deTagExpr rhs) +deTagBind (Rec prs) = Rec [(b, deTagExpr rhs) | (TB b _, rhs) <- prs] + +deTagAlt :: TaggedAlt t -> CoreAlt +deTagAlt (con, bndrs, rhs) = (con, [b | TB b _ <- bndrs], deTagExpr rhs) + +{- +************************************************************************ +* * +\subsection{Core-constructing functions with checking} +* * +************************************************************************ +-} + +-- | Apply a list of argument expressions to a function expression in a nested fashion. Prefer to +-- use 'GHC.Core.Make.mkCoreApps' if possible +mkApps :: Expr b -> [Arg b] -> Expr b +-- | Apply a list of type argument expressions to a function expression in a nested fashion +mkTyApps :: Expr b -> [Type] -> Expr b +-- | Apply a list of coercion argument expressions to a function expression in a nested fashion +mkCoApps :: Expr b -> [Coercion] -> Expr b +-- | Apply a list of type or value variables to a function expression in a nested fashion +mkVarApps :: Expr b -> [Var] -> Expr b +-- | Apply a list of argument expressions to a data constructor in a nested fashion. Prefer to +-- use 'GHC.Core.Make.mkCoreConApps' if possible +mkConApp :: DataCon -> [Arg b] -> Expr b + +mkApps f args = foldl' App f args +mkCoApps f args = foldl' (\ e a -> App e (Coercion a)) f args +mkVarApps f vars = foldl' (\ e a -> App e (varToCoreExpr a)) f vars +mkConApp con args = mkApps (Var (dataConWorkId con)) args + +mkTyApps f args = foldl' (\ e a -> App e (mkTyArg a)) f args + +mkConApp2 :: DataCon -> [Type] -> [Var] -> Expr b +mkConApp2 con tys arg_ids = Var (dataConWorkId con) + `mkApps` map Type tys + `mkApps` map varToCoreExpr arg_ids + +mkTyArg :: Type -> Expr b +mkTyArg ty + | Just co <- isCoercionTy_maybe ty = Coercion co + | otherwise = Type ty + +-- | Create a machine integer literal expression of type @Int#@ from an @Integer@. +-- If you want an expression of type @Int@ use 'GHC.Core.Make.mkIntExpr' +mkIntLit :: DynFlags -> Integer -> Expr b +-- | Create a machine integer literal expression of type @Int#@ from an @Int@. +-- If you want an expression of type @Int@ use 'GHC.Core.Make.mkIntExpr' +mkIntLitInt :: DynFlags -> Int -> Expr b + +mkIntLit dflags n = Lit (mkLitInt dflags n) +mkIntLitInt dflags n = Lit (mkLitInt dflags (toInteger n)) + +-- | Create a machine word literal expression of type @Word#@ from an @Integer@. +-- If you want an expression of type @Word@ use 'GHC.Core.Make.mkWordExpr' +mkWordLit :: DynFlags -> Integer -> Expr b +-- | Create a machine word literal expression of type @Word#@ from a @Word@. +-- If you want an expression of type @Word@ use 'GHC.Core.Make.mkWordExpr' +mkWordLitWord :: DynFlags -> Word -> Expr b + +mkWordLit dflags w = Lit (mkLitWord dflags w) +mkWordLitWord dflags w = Lit (mkLitWord dflags (toInteger w)) + +mkWord64LitWord64 :: Word64 -> Expr b +mkWord64LitWord64 w = Lit (mkLitWord64 (toInteger w)) + +mkInt64LitInt64 :: Int64 -> Expr b +mkInt64LitInt64 w = Lit (mkLitInt64 (toInteger w)) + +-- | Create a machine character literal expression of type @Char#@. +-- If you want an expression of type @Char@ use 'GHC.Core.Make.mkCharExpr' +mkCharLit :: Char -> Expr b +-- | Create a machine string literal expression of type @Addr#@. +-- If you want an expression of type @String@ use 'GHC.Core.Make.mkStringExpr' +mkStringLit :: String -> Expr b + +mkCharLit c = Lit (mkLitChar c) +mkStringLit s = Lit (mkLitString s) + +-- | Create a machine single precision literal expression of type @Float#@ from a @Rational@. +-- If you want an expression of type @Float@ use 'GHC.Core.Make.mkFloatExpr' +mkFloatLit :: Rational -> Expr b +-- | Create a machine single precision literal expression of type @Float#@ from a @Float@. +-- If you want an expression of type @Float@ use 'GHC.Core.Make.mkFloatExpr' +mkFloatLitFloat :: Float -> Expr b + +mkFloatLit f = Lit (mkLitFloat f) +mkFloatLitFloat f = Lit (mkLitFloat (toRational f)) + +-- | Create a machine double precision literal expression of type @Double#@ from a @Rational@. +-- If you want an expression of type @Double@ use 'GHC.Core.Make.mkDoubleExpr' +mkDoubleLit :: Rational -> Expr b +-- | Create a machine double precision literal expression of type @Double#@ from a @Double@. +-- If you want an expression of type @Double@ use 'GHC.Core.Make.mkDoubleExpr' +mkDoubleLitDouble :: Double -> Expr b + +mkDoubleLit d = Lit (mkLitDouble d) +mkDoubleLitDouble d = Lit (mkLitDouble (toRational d)) + +-- | Bind all supplied binding groups over an expression in a nested let expression. Assumes +-- that the rhs satisfies the let/app invariant. Prefer to use 'GHC.Core.Make.mkCoreLets' if +-- possible, which does guarantee the invariant +mkLets :: [Bind b] -> Expr b -> Expr b +-- | Bind all supplied binders over an expression in a nested lambda expression. Prefer to +-- use 'GHC.Core.Make.mkCoreLams' if possible +mkLams :: [b] -> Expr b -> Expr b + +mkLams binders body = foldr Lam body binders +mkLets binds body = foldr mkLet body binds + +mkLet :: Bind b -> Expr b -> Expr b +-- The desugarer sometimes generates an empty Rec group +-- which Lint rejects, so we kill it off right away +mkLet (Rec []) body = body +mkLet bind body = Let bind body + +-- | @mkLetNonRec bndr rhs body@ wraps @body@ in a @let@ binding @bndr@. +mkLetNonRec :: b -> Expr b -> Expr b -> Expr b +mkLetNonRec b rhs body = Let (NonRec b rhs) body + +-- | @mkLetRec binds body@ wraps @body@ in a @let rec@ with the given set of +-- @binds@ if binds is non-empty. +mkLetRec :: [(b, Expr b)] -> Expr b -> Expr b +mkLetRec [] body = body +mkLetRec bs body = Let (Rec bs) body + +-- | Create a binding group where a type variable is bound to a type. Per "GHC.Core#type_let", +-- this can only be used to bind something in a non-recursive @let@ expression +mkTyBind :: TyVar -> Type -> CoreBind +mkTyBind tv ty = NonRec tv (Type ty) + +-- | Create a binding group where a type variable is bound to a type. Per "GHC.Core#type_let", +-- this can only be used to bind something in a non-recursive @let@ expression +mkCoBind :: CoVar -> Coercion -> CoreBind +mkCoBind cv co = NonRec cv (Coercion co) + +-- | Convert a binder into either a 'Var' or 'Type' 'Expr' appropriately +varToCoreExpr :: CoreBndr -> Expr b +varToCoreExpr v | isTyVar v = Type (mkTyVarTy v) + | isCoVar v = Coercion (mkCoVarCo v) + | otherwise = ASSERT( isId v ) Var v + +varsToCoreExprs :: [CoreBndr] -> [Expr b] +varsToCoreExprs vs = map varToCoreExpr vs + +{- +************************************************************************ +* * + Getting a result type +* * +************************************************************************ + +These are defined here to avoid a module loop between GHC.Core.Utils and GHC.Core.FVs + +-} + +applyTypeToArg :: Type -> CoreExpr -> Type +-- ^ Determines the type resulting from applying an expression with given type +-- to a given argument expression +applyTypeToArg fun_ty arg = piResultTy fun_ty (exprToType arg) + +-- | If the expression is a 'Type', converts. Otherwise, +-- panics. NB: This does /not/ convert 'Coercion' to 'CoercionTy'. +exprToType :: CoreExpr -> Type +exprToType (Type ty) = ty +exprToType _bad = pprPanic "exprToType" empty + +-- | If the expression is a 'Coercion', converts. +exprToCoercion_maybe :: CoreExpr -> Maybe Coercion +exprToCoercion_maybe (Coercion co) = Just co +exprToCoercion_maybe _ = Nothing + +{- +************************************************************************ +* * +\subsection{Simple access functions} +* * +************************************************************************ +-} + +-- | Extract every variable by this group +bindersOf :: Bind b -> [b] +-- If you edit this function, you may need to update the GHC formalism +-- See Note [GHC Formalism] in GHC.Core.Lint +bindersOf (NonRec binder _) = [binder] +bindersOf (Rec pairs) = [binder | (binder, _) <- pairs] + +-- | 'bindersOf' applied to a list of binding groups +bindersOfBinds :: [Bind b] -> [b] +bindersOfBinds binds = foldr ((++) . bindersOf) [] binds + +rhssOfBind :: Bind b -> [Expr b] +rhssOfBind (NonRec _ rhs) = [rhs] +rhssOfBind (Rec pairs) = [rhs | (_,rhs) <- pairs] + +rhssOfAlts :: [Alt b] -> [Expr b] +rhssOfAlts alts = [e | (_,_,e) <- alts] + +-- | Collapse all the bindings in the supplied groups into a single +-- list of lhs\/rhs pairs suitable for binding in a 'Rec' binding group +flattenBinds :: [Bind b] -> [(b, Expr b)] +flattenBinds (NonRec b r : binds) = (b,r) : flattenBinds binds +flattenBinds (Rec prs1 : binds) = prs1 ++ flattenBinds binds +flattenBinds [] = [] + +-- | We often want to strip off leading lambdas before getting down to +-- business. Variants are 'collectTyBinders', 'collectValBinders', +-- and 'collectTyAndValBinders' +collectBinders :: Expr b -> ([b], Expr b) +collectTyBinders :: CoreExpr -> ([TyVar], CoreExpr) +collectValBinders :: CoreExpr -> ([Id], CoreExpr) +collectTyAndValBinders :: CoreExpr -> ([TyVar], [Id], CoreExpr) +-- | Strip off exactly N leading lambdas (type or value). Good for use with +-- join points. +collectNBinders :: Int -> Expr b -> ([b], Expr b) + +collectBinders expr + = go [] expr + where + go bs (Lam b e) = go (b:bs) e + go bs e = (reverse bs, e) + +collectTyBinders expr + = go [] expr + where + go tvs (Lam b e) | isTyVar b = go (b:tvs) e + go tvs e = (reverse tvs, e) + +collectValBinders expr + = go [] expr + where + go ids (Lam b e) | isId b = go (b:ids) e + go ids body = (reverse ids, body) + +collectTyAndValBinders expr + = (tvs, ids, body) + where + (tvs, body1) = collectTyBinders expr + (ids, body) = collectValBinders body1 + +collectNBinders orig_n orig_expr + = go orig_n [] orig_expr + where + go 0 bs expr = (reverse bs, expr) + go n bs (Lam b e) = go (n-1) (b:bs) e + go _ _ _ = pprPanic "collectNBinders" $ int orig_n + +-- | Takes a nested application expression and returns the function +-- being applied and the arguments to which it is applied +collectArgs :: Expr b -> (Expr b, [Arg b]) +collectArgs expr + = go expr [] + where + go (App f a) as = go f (a:as) + go e as = (e, as) + +-- | Attempt to remove the last N arguments of a function call. +-- Strip off any ticks or coercions encountered along the way and any +-- at the end. +stripNArgs :: Word -> Expr a -> Maybe (Expr a) +stripNArgs !n (Tick _ e) = stripNArgs n e +stripNArgs n (Cast f _) = stripNArgs n f +stripNArgs 0 e = Just e +stripNArgs n (App f _) = stripNArgs (n - 1) f +stripNArgs _ _ = Nothing + +-- | Like @collectArgs@, but also collects looks through floatable +-- ticks if it means that we can find more arguments. +collectArgsTicks :: (Tickish Id -> Bool) -> Expr b + -> (Expr b, [Arg b], [Tickish Id]) +collectArgsTicks skipTick expr + = go expr [] [] + where + go (App f a) as ts = go f (a:as) ts + go (Tick t e) as ts + | skipTick t = go e as (t:ts) + go e as ts = (e, as, reverse ts) + + +{- +************************************************************************ +* * +\subsection{Predicates} +* * +************************************************************************ + +At one time we optionally carried type arguments through to runtime. +@isRuntimeVar v@ returns if (Lam v _) really becomes a lambda at runtime, +i.e. if type applications are actual lambdas because types are kept around +at runtime. Similarly isRuntimeArg. +-} + +-- | Will this variable exist at runtime? +isRuntimeVar :: Var -> Bool +isRuntimeVar = isId + +-- | Will this argument expression exist at runtime? +isRuntimeArg :: CoreExpr -> Bool +isRuntimeArg = isValArg + +-- | Returns @True@ for value arguments, false for type args +-- NB: coercions are value arguments (zero width, to be sure, +-- like State#, but still value args). +isValArg :: Expr b -> Bool +isValArg e = not (isTypeArg e) + +-- | Returns @True@ iff the expression is a 'Type' or 'Coercion' +-- expression at its top level +isTyCoArg :: Expr b -> Bool +isTyCoArg (Type {}) = True +isTyCoArg (Coercion {}) = True +isTyCoArg _ = False + +-- | Returns @True@ iff the expression is a 'Coercion' +-- expression at its top level +isCoArg :: Expr b -> Bool +isCoArg (Coercion {}) = True +isCoArg _ = False + +-- | Returns @True@ iff the expression is a 'Type' expression at its +-- top level. Note this does NOT include 'Coercion's. +isTypeArg :: Expr b -> Bool +isTypeArg (Type {}) = True +isTypeArg _ = False + +-- | The number of binders that bind values rather than types +valBndrCount :: [CoreBndr] -> Int +valBndrCount = count isId + +-- | The number of argument expressions that are values rather than types at their top level +valArgCount :: [Arg b] -> Int +valArgCount = count isValArg + +{- +************************************************************************ +* * +\subsection{Annotated core} +* * +************************************************************************ +-} + +-- | Annotated core: allows annotation at every node in the tree +type AnnExpr bndr annot = (annot, AnnExpr' bndr annot) + +-- | A clone of the 'Expr' type but allowing annotation at every tree node +data AnnExpr' bndr annot + = AnnVar Id + | AnnLit Literal + | AnnLam bndr (AnnExpr bndr annot) + | AnnApp (AnnExpr bndr annot) (AnnExpr bndr annot) + | AnnCase (AnnExpr bndr annot) bndr Type [AnnAlt bndr annot] + | AnnLet (AnnBind bndr annot) (AnnExpr bndr annot) + | AnnCast (AnnExpr bndr annot) (annot, Coercion) + -- Put an annotation on the (root of) the coercion + | AnnTick (Tickish Id) (AnnExpr bndr annot) + | AnnType Type + | AnnCoercion Coercion + +-- | A clone of the 'Alt' type but allowing annotation at every tree node +type AnnAlt bndr annot = (AltCon, [bndr], AnnExpr bndr annot) + +-- | A clone of the 'Bind' type but allowing annotation at every tree node +data AnnBind bndr annot + = AnnNonRec bndr (AnnExpr bndr annot) + | AnnRec [(bndr, AnnExpr bndr annot)] + +-- | Takes a nested application expression and returns the function +-- being applied and the arguments to which it is applied +collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a]) +collectAnnArgs expr + = go expr [] + where + go (_, AnnApp f a) as = go f (a:as) + go e as = (e, as) + +collectAnnArgsTicks :: (Tickish Var -> Bool) -> AnnExpr b a + -> (AnnExpr b a, [AnnExpr b a], [Tickish Var]) +collectAnnArgsTicks tickishOk expr + = go expr [] [] + where + go (_, AnnApp f a) as ts = go f (a:as) ts + go (_, AnnTick t e) as ts | tickishOk t + = go e as (t:ts) + go e as ts = (e, as, reverse ts) + +deAnnotate :: AnnExpr bndr annot -> Expr bndr +deAnnotate (_, e) = deAnnotate' e + +deAnnotate' :: AnnExpr' bndr annot -> Expr bndr +deAnnotate' (AnnType t) = Type t +deAnnotate' (AnnCoercion co) = Coercion co +deAnnotate' (AnnVar v) = Var v +deAnnotate' (AnnLit lit) = Lit lit +deAnnotate' (AnnLam binder body) = Lam binder (deAnnotate body) +deAnnotate' (AnnApp fun arg) = App (deAnnotate fun) (deAnnotate arg) +deAnnotate' (AnnCast e (_,co)) = Cast (deAnnotate e) co +deAnnotate' (AnnTick tick body) = Tick tick (deAnnotate body) + +deAnnotate' (AnnLet bind body) + = Let (deAnnBind bind) (deAnnotate body) +deAnnotate' (AnnCase scrut v t alts) + = Case (deAnnotate scrut) v t (map deAnnAlt alts) + +deAnnAlt :: AnnAlt bndr annot -> Alt bndr +deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs) + +deAnnBind :: AnnBind b annot -> Bind b +deAnnBind (AnnNonRec var rhs) = NonRec var (deAnnotate rhs) +deAnnBind (AnnRec pairs) = Rec [(v,deAnnotate rhs) | (v,rhs) <- pairs] + +-- | As 'collectBinders' but for 'AnnExpr' rather than 'Expr' +collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot) +collectAnnBndrs e + = collect [] e + where + collect bs (_, AnnLam b body) = collect (b:bs) body + collect bs body = (reverse bs, body) + +-- | As 'collectNBinders' but for 'AnnExpr' rather than 'Expr' +collectNAnnBndrs :: Int -> AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot) +collectNAnnBndrs orig_n e + = collect orig_n [] e + where + collect 0 bs body = (reverse bs, body) + collect n bs (_, AnnLam b body) = collect (n-1) (b:bs) body + collect _ _ _ = pprPanic "collectNBinders" $ int orig_n |