summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/basicTypes/Module.lhs6
-rw-r--r--compiler/basicTypes/Name.lhs5
-rw-r--r--compiler/cmm/CmmExpr.hs5
-rw-r--r--compiler/cmm/CmmPipeline.hs (renamed from compiler/cmm/CmmCPS.hs)27
-rw-r--r--compiler/cmm/CmmRewriteAssignments.hs608
-rw-r--r--compiler/cmm/CmmSpillReload.hs483
-rw-r--r--compiler/cmm/cmm-notes27
-rw-r--r--compiler/codeGen/CgPrimOp.hs49
-rw-r--r--compiler/codeGen/StgCmmPrim.hs60
-rw-r--r--compiler/ghc.cabal.in6
-rw-r--r--compiler/ghci/Linker.lhs41
-rw-r--r--compiler/hsSyn/Convert.lhs100
-rw-r--r--compiler/iface/IfaceEnv.lhs2
-rw-r--r--compiler/main/DynFlags.hs28
-rw-r--r--compiler/main/DynamicLoading.hs150
-rw-r--r--compiler/main/GhcPlugins.hs83
-rw-r--r--compiler/main/HscMain.lhs53
-rw-r--r--compiler/nativeGen/Size.hs16
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs90
-rw-r--r--compiler/prelude/PrelNames.lhs18
-rw-r--r--compiler/rename/RnBinds.lhs10
-rw-r--r--compiler/rename/RnEnv.lhs35
-rw-r--r--compiler/rename/RnPat.lhs15
-rw-r--r--compiler/simplCore/CoreMonad.lhs261
-rw-r--r--compiler/simplCore/SimplCore.lhs298
-rw-r--r--compiler/typecheck/TcCanonical.lhs31
-rw-r--r--compiler/vectorise/Vectorise/Builtins.hs55
-rw-r--r--compiler/vectorise/Vectorise/Builtins/Initialise.hs188
-rw-r--r--compiler/vectorise/Vectorise/Builtins/Modules.hs10
-rw-r--r--compiler/vectorise/Vectorise/Builtins/Prelude.hs209
-rw-r--r--compiler/vectorise/Vectorise/Env.hs17
-rw-r--r--compiler/vectorise/Vectorise/Monad.hs5
-rwxr-xr-xvalidate11
33 files changed, 1773 insertions, 1229 deletions
diff --git a/compiler/basicTypes/Module.lhs b/compiler/basicTypes/Module.lhs
index 89b3eddfd7..6e566a23ad 100644
--- a/compiler/basicTypes/Module.lhs
+++ b/compiler/basicTypes/Module.lhs
@@ -39,7 +39,8 @@ module Module
dphSeqPackageId,
dphParPackageId,
mainPackageId,
-
+ thisGhcPackageId,
+
-- * The Module type
Module,
modulePackageId, moduleName,
@@ -342,7 +343,7 @@ packageIdString = unpackFS . packageIdFS
integerPackageId, primPackageId,
basePackageId, rtsPackageId,
thPackageId, dphSeqPackageId, dphParPackageId,
- mainPackageId :: PackageId
+ mainPackageId, thisGhcPackageId :: PackageId
primPackageId = fsToPackageId (fsLit "ghc-prim")
integerPackageId = fsToPackageId (fsLit cIntegerLibrary)
basePackageId = fsToPackageId (fsLit "base")
@@ -350,6 +351,7 @@ rtsPackageId = fsToPackageId (fsLit "rts")
thPackageId = fsToPackageId (fsLit "template-haskell")
dphSeqPackageId = fsToPackageId (fsLit "dph-seq")
dphParPackageId = fsToPackageId (fsLit "dph-par")
+thisGhcPackageId = fsToPackageId (fsLit ("ghc-" ++ cProjectVersion))
-- | This is the package Id for the current program. It is the default
-- package Id if you don't specify a package name. We don't add this prefix
diff --git a/compiler/basicTypes/Name.lhs b/compiler/basicTypes/Name.lhs
index a2b42a278e..e88e4a1b02 100644
--- a/compiler/basicTypes/Name.lhs
+++ b/compiler/basicTypes/Name.lhs
@@ -244,7 +244,10 @@ isSystemName _ = False
-- | Create a name which is (for now at least) local to the current module and hence
-- does not need a 'Module' to disambiguate it from other 'Name's
mkInternalName :: Unique -> OccName -> SrcSpan -> Name
-mkInternalName uniq occ loc = Name { n_uniq = getKeyFastInt uniq, n_sort = Internal, n_occ = occ, n_loc = loc }
+mkInternalName uniq occ loc = Name { n_uniq = getKeyFastInt uniq
+ , n_sort = Internal
+ , n_occ = occ
+ , n_loc = loc }
-- NB: You might worry that after lots of huffing and
-- puffing we might end up with two local names with distinct
-- uniques, but the same OccName. Indeed we can, but that's ok
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 869bc1b4ac..b8cd3280e8 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -10,7 +10,7 @@ module CmmExpr
, DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
, RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
, plusRegSet, minusRegSet, timesRegSet
- , regUsedIn
+ , regUsedIn, regSlot
, Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf
, module CmmMachOp
, module CmmType
@@ -267,6 +267,9 @@ isStackSlotOf :: CmmExpr -> LocalReg -> Bool
isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r'
isStackSlotOf _ _ = False
+regSlot :: LocalReg -> CmmExpr
+regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r)
+
-----------------------------------------------------------------------------
-- Stack slot use information for expressions and other types [_$_]
-----------------------------------------------------------------------------
diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmPipeline.hs
index 35eabb3317..a63413cf53 100644
--- a/compiler/cmm/CmmCPS.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -2,11 +2,11 @@
-- Norman likes local bindings
-- If this module lives on I'd like to get rid of this flag in due course
-module CmmCPS (
- -- | Converts C-- with full proceedures and parameters
- -- to a CPS transformed C-- with the stack made manifest.
- -- Well, sort of.
- protoCmmCPS
+module CmmPipeline (
+ -- | Converts C-- with an implicit stack and native C-- calls into
+ -- optimized, CPS converted and native-call-less C--. The latter
+ -- C-- can be used to generate assembly.
+ cmmPipeline
) where
import CLabel
@@ -16,7 +16,9 @@ import CmmBuildInfoTables
import CmmCommonBlockElim
import CmmProcPoint
import CmmSpillReload
+import CmmRewriteAssignments
import CmmStackLayout
+import CmmContFlowOpt
import OptimizationFuel
import DynFlags
@@ -30,7 +32,7 @@ import Outputable
import StaticFlags
-----------------------------------------------------------------------------
--- |Top level driver for the CPS pass
+-- | Top level driver for C-- pipeline
-----------------------------------------------------------------------------
-- There are two complications here:
-- 1. We need to compile the procedures in two stages because we need
@@ -45,20 +47,27 @@ import StaticFlags
-- 2. We need to thread the module's SRT around when the SRT tables
-- are computed for each procedure.
-- The SRT needs to be threaded because it is grown lazily.
-protoCmmCPS :: HscEnv -- Compilation env including
+-- 3. We run control flow optimizations twice, once before any pipeline
+-- work is done, and once again at the very end on all of the
+-- resulting C-- blocks. EZY: It's unclear whether or not whether
+-- we actually need to do the initial pass.
+cmmPipeline :: HscEnv -- Compilation env including
-- dynamic flags: -dcmm-lint -ddump-cps-cmm
-> (TopSRT, [Cmm]) -- SRT table and accumulating list of compiled procs
-> Cmm -- Input C-- with Procedures
-> IO (TopSRT, [Cmm]) -- Output CPS transformed C--
-protoCmmCPS hsc_env (topSRT, rst) (Cmm tops) =
+cmmPipeline hsc_env (topSRT, rst) prog =
do let dflags = hsc_dflags hsc_env
+ (Cmm tops) = runCmmContFlowOpts prog
showPass dflags "CPSZ"
(cafEnvs, tops) <- liftM unzip $ mapM (cpsTop hsc_env) tops
let topCAFEnv = mkTopCAFInfo (concat cafEnvs)
(topSRT, tops) <- foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops
let cmms = Cmm (reverse (concat tops))
dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr cmms)
- return (topSRT, cmms : rst)
+ -- SRT is not affected by control flow optimization pass
+ let prog' = map runCmmContFlowOpts (cmms : rst)
+ return (topSRT, prog')
{- [Note global fuel]
~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs
new file mode 100644
index 0000000000..56045d514c
--- /dev/null
+++ b/compiler/cmm/CmmRewriteAssignments.hs
@@ -0,0 +1,608 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE FlexibleContexts #-}
+
+{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
+#if __GLASGOW_HASKELL__ < 701
+-- GHC 7.0.1 improved incomplete pattern warnings with GADTs
+{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
+#endif
+
+-- This module implements generalized code motion for assignments to
+-- local registers, inlining and sinking when possible. It also does
+-- some amount of rewriting for stores to register slots, which are
+-- effectively equivalent to local registers.
+module CmmRewriteAssignments
+ ( rewriteAssignments
+ ) where
+
+import Cmm
+import CmmExpr
+import OptimizationFuel
+import StgCmmUtils
+
+import Control.Monad
+import UniqFM
+import Unique
+import BlockId
+
+import Compiler.Hoopl hiding (Unique)
+import Data.Maybe
+import Prelude hiding (succ, zip)
+
+----------------------------------------------------------------
+--- Main function
+
+rewriteAssignments :: CmmGraph -> FuelUniqSM CmmGraph
+rewriteAssignments g = do
+ -- Because we need to act on forwards and backwards information, we
+ -- first perform usage analysis and bake this information into the
+ -- graph (backwards transform), and then do a forwards transform
+ -- to actually perform inlining and sinking.
+ g' <- annotateUsage g
+ g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
+ analRewFwd assignmentLattice assignmentTransfer assignmentRewrite
+ return (modifyGraph eraseRegUsage g'')
+
+----------------------------------------------------------------
+--- Usage information
+
+-- We decorate all register assignments with approximate usage
+-- information, that is, the maximum number of times the register is
+-- referenced while it is live along all outgoing control paths.
+-- This analysis provides a precise upper bound for usage, so if a
+-- register is never referenced, we can remove it, as that assignment is
+-- dead.
+--
+-- This analysis is very similar to liveness analysis; we just keep a
+-- little extra info. (Maybe we should move it to CmmLive, and subsume
+-- the old liveness analysis.)
+--
+-- There are a few subtleties here:
+--
+-- - If a register goes dead, and then becomes live again, the usages
+-- of the disjoint live range don't count towards the original range.
+--
+-- a = 1; // used once
+-- b = a;
+-- a = 2; // used once
+-- c = a;
+--
+-- - A register may be used multiple times, but these all reside in
+-- different control paths, such that any given execution only uses
+-- it once. In that case, the usage count may still be 1.
+--
+-- a = 1; // used once
+-- if (b) {
+-- c = a + 3;
+-- } else {
+-- c = a + 1;
+-- }
+--
+-- This policy corresponds to an inlining strategy that does not
+-- duplicate computation but may increase binary size.
+--
+-- - If we naively implement a usage count, we have a counting to
+-- infinity problem across joins. Furthermore, knowing that
+-- something is used 2 or more times in one runtime execution isn't
+-- particularly useful for optimizations (inlining may be beneficial,
+-- but there's no way of knowing that without register pressure
+-- information.)
+--
+-- while (...) {
+-- // first iteration, b used once
+-- // second iteration, b used twice
+-- // third iteration ...
+-- a = b;
+-- }
+-- // b used zero times
+--
+-- There is an orthogonal question, which is that for every runtime
+-- execution, the register may be used only once, but if we inline it
+-- in every conditional path, the binary size might increase a lot.
+-- But tracking this information would be tricky, because it violates
+-- the finite lattice restriction Hoopl requires for termination;
+-- we'd thus need to supply an alternate proof, which is probably
+-- something we should defer until we actually have an optimization
+-- that would take advantage of this. (This might also interact
+-- strangely with liveness information.)
+--
+-- a = ...;
+-- // a is used one time, but in X different paths
+-- case (b) of
+-- 1 -> ... a ...
+-- 2 -> ... a ...
+-- 3 -> ... a ...
+-- ...
+--
+-- - Memory stores to local register slots (CmmStore (CmmStackSlot
+-- (LocalReg _) 0) _) have similar behavior to local registers,
+-- in that these locations are all disjoint from each other. Thus,
+-- we attempt to inline them too. Note that because these are only
+-- generated as part of the spilling process, most of the time this
+-- will refer to a local register and the assignment will immediately
+-- die on the subsequent call. However, if we manage to replace that
+-- local register with a memory location, it means that we've managed
+-- to preserve a value on the stack without having to move it to
+-- another memory location again! We collect usage information just
+-- to be safe in case extra computation is involved.
+
+data RegUsage = SingleUse | ManyUse
+ deriving (Ord, Eq, Show)
+-- Absence in map = ZeroUse
+
+{-
+-- minBound is bottom, maxBound is top, least-upper-bound is max
+-- ToDo: Put this in Hoopl. Note that this isn't as useful as I
+-- originally hoped, because you usually want to leave out the bottom
+-- element when you have things like this put in maps. Maybe f is
+-- useful on its own as a combining function.
+boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
+boundedOrdLattice n = DataflowLattice n minBound f
+ where f _ (OldFact x) (NewFact y)
+ | x >= y = (NoChange, x)
+ | otherwise = (SomeChange, y)
+-}
+
+-- Custom node type we'll rewrite to. CmmAssign nodes to local
+-- registers are replaced with AssignLocal nodes.
+data WithRegUsage n e x where
+ -- Plain will not contain CmmAssign nodes immediately after
+ -- transformation, but as we rewrite assignments, we may have
+ -- assignments here: these are assignments that should not be
+ -- rewritten!
+ Plain :: n e x -> WithRegUsage n e x
+ AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
+
+instance UserOfLocalRegs (n e x) => UserOfLocalRegs (WithRegUsage n e x) where
+ foldRegsUsed f z (Plain n) = foldRegsUsed f z n
+ foldRegsUsed f z (AssignLocal _ e _) = foldRegsUsed f z e
+
+instance DefinerOfLocalRegs (n e x) => DefinerOfLocalRegs (WithRegUsage n e x) where
+ foldRegsDefd f z (Plain n) = foldRegsDefd f z n
+ foldRegsDefd f z (AssignLocal r _ _) = foldRegsDefd f z r
+
+instance NonLocal n => NonLocal (WithRegUsage n) where
+ entryLabel (Plain n) = entryLabel n
+ successors (Plain n) = successors n
+
+liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
+liftRegUsage = mapGraph Plain
+
+eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
+eraseRegUsage = mapGraph f
+ where f :: WithRegUsage CmmNode e x -> CmmNode e x
+ f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
+ f (Plain n) = n
+
+type UsageMap = UniqFM RegUsage
+
+usageLattice :: DataflowLattice UsageMap
+usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
+ where f _ (OldFact x) (NewFact y)
+ | x >= y = (NoChange, x)
+ | otherwise = (SomeChange, y)
+
+-- We reuse the names 'gen' and 'kill', although we're doing something
+-- slightly different from the Dragon Book
+usageTransfer :: BwdTransfer (WithRegUsage CmmNode) UsageMap
+usageTransfer = mkBTransfer3 first middle last
+ where first _ f = f
+ middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
+ middle n f = gen_kill n f
+ last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
+ -- Checking for CmmCall/CmmForeignCall is unnecessary, because
+ -- spills/reloads have already occurred by the time we do this
+ -- analysis.
+ -- XXX Deprecated warning is puzzling: what label are we
+ -- supposed to use?
+ -- ToDo: With a bit more cleverness here, we can avoid
+ -- disappointment and heartbreak associated with the inability
+ -- to inline into CmmCall and CmmForeignCall by
+ -- over-estimating the usage to be ManyUse.
+ last n f = gen_kill n (joinOutFacts usageLattice n f)
+ gen_kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
+ gen_kill a = gen a . kill a
+ gen :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
+ gen a f = foldRegsUsed increaseUsage f a
+ kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
+ kill a f = foldRegsDefd delFromUFM f a
+ increaseUsage f r = addToUFM_C combine f r SingleUse
+ where combine _ _ = ManyUse
+
+usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap
+usageRewrite = mkBRewrite3 first middle last
+ where first _ _ = return Nothing
+ middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
+ middle (Plain (CmmAssign (CmmLocal l) e)) f
+ = return . Just
+ $ case lookupUFM f l of
+ Nothing -> emptyGraph
+ Just usage -> mkMiddle (AssignLocal l e usage)
+ middle _ _ = return Nothing
+ last _ _ = return Nothing
+
+type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
+annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage)
+annotateUsage vanilla_g =
+ let g = modifyGraph liftRegUsage vanilla_g
+ in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
+ analRewBwd usageLattice usageTransfer usageRewrite
+
+----------------------------------------------------------------
+--- Assignment tracking
+
+-- The idea is to maintain a map of local registers do expressions,
+-- such that the value of that register is the same as the value of that
+-- expression at any given time. We can then do several things,
+-- as described by Assignment.
+
+-- Assignment describes the various optimizations that are valid
+-- at a given point in the program.
+data Assignment =
+-- This assignment can always be inlined. It is cheap or single-use.
+ AlwaysInline CmmExpr
+-- This assignment should be sunk down to its first use. (This will
+-- increase code size if the register is used in multiple control flow
+-- paths, but won't increase execution time, and the reduction of
+-- register pressure is worth it, I think.)
+ | AlwaysSink CmmExpr
+-- We cannot safely optimize occurrences of this local register. (This
+-- corresponds to top in the lattice structure.)
+ | NeverOptimize
+
+-- Extract the expression that is being assigned to
+xassign :: Assignment -> Maybe CmmExpr
+xassign (AlwaysInline e) = Just e
+xassign (AlwaysSink e) = Just e
+xassign NeverOptimize = Nothing
+
+-- Extracts the expression, but only if they're the same constructor
+xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
+xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
+xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e')
+xassign2 _ = Nothing
+
+-- Note: We'd like to make decisions about "not optimizing" as soon as
+-- possible, because this will make running the transfer function more
+-- efficient.
+type AssignmentMap = UniqFM Assignment
+
+assignmentLattice :: DataflowLattice AssignmentMap
+assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
+ where add _ (OldFact old) (NewFact new)
+ = case (old, new) of
+ (NeverOptimize, _) -> (NoChange, NeverOptimize)
+ (_, NeverOptimize) -> (SomeChange, NeverOptimize)
+ (xassign2 -> Just (e, e'))
+ | e == e' -> (NoChange, old)
+ | otherwise -> (SomeChange, NeverOptimize)
+ _ -> (SomeChange, NeverOptimize)
+
+-- Deletes sinks from assignment map, because /this/ is the place
+-- where it will be sunk to.
+deleteSinks :: UserOfLocalRegs n => n -> AssignmentMap -> AssignmentMap
+deleteSinks n m = foldRegsUsed (adjustUFM f) m n
+ where f (AlwaysSink _) = NeverOptimize
+ f old = old
+
+-- Invalidates any expressions that use a register.
+invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
+-- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
+invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
+ where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
+ f _ _ m = m
+{- This requires the entire spine of the map to be continually rebuilt,
+ - which causes crazy memory usage!
+invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
+ where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
+ invalidateUsers' _ old = old
+-}
+
+-- Note [foldUFM performance]
+-- These calls to fold UFM no longer leak memory, but they do cause
+-- pretty killer amounts of allocation. So they'll be something to
+-- optimize; we need an algorithmic change to prevent us from having to
+-- traverse the /entire/ map continually.
+
+middleAssignment :: WithRegUsage CmmNode O O -> AssignmentMap -> AssignmentMap
+
+-- Algorithm for annotated assignments:
+-- 1. Delete any sinking assignments that were used by this instruction
+-- 2. Add the assignment to our list of valid local assignments with
+-- the correct optimization policy.
+-- 3. Look for all assignments that reference that register and
+-- invalidate them.
+middleAssignment n@(AssignLocal r e usage) assign
+ = invalidateUsersOf (CmmLocal r) . add . deleteSinks n $ assign
+ where add m = addToUFM m r
+ $ case usage of
+ SingleUse -> AlwaysInline e
+ ManyUse -> decide e
+ decide CmmLit{} = AlwaysInline e
+ decide CmmReg{} = AlwaysInline e
+ decide CmmLoad{} = AlwaysSink e
+ decide CmmStackSlot{} = AlwaysSink e
+ decide CmmMachOp{} = AlwaysSink e
+ -- We'll always inline simple operations on the global
+ -- registers, to reduce register pressure: Sp - 4 or Hp - 8
+ -- EZY: Justify this optimization more carefully.
+ decide CmmRegOff{} = AlwaysInline e
+
+-- Algorithm for unannotated assignments of global registers:
+-- 1. Delete any sinking assignments that were used by this instruction
+-- 2. Look for all assignments that reference this register and
+-- invalidate them.
+middleAssignment (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
+ = invalidateUsersOf reg . deleteSinks n $ assign
+
+-- Algorithm for unannotated assignments of *local* registers: do
+-- nothing (it's a reload, so no state should have changed)
+middleAssignment (Plain (CmmAssign (CmmLocal _) _)) assign = assign
+
+-- Algorithm for stores:
+-- 1. Delete any sinking assignments that were used by this instruction
+-- 2. Look for all assignments that load from memory locations that
+-- were clobbered by this store and invalidate them.
+middleAssignment (Plain n@(CmmStore lhs rhs)) assign
+ = let m = deleteSinks n assign
+ in foldUFM_Directly f m m -- [foldUFM performance]
+ where f u (xassign -> Just x) m | (lhs, rhs) `clobbers` (u, x) = addToUFM_Directly m u NeverOptimize
+ f _ _ m = m
+{- Also leaky
+ = mapUFM_Directly p . deleteSinks n $ assign
+ -- ToDo: There's a missed opportunity here: even if a memory
+ -- access we're attempting to sink gets clobbered at some
+ -- location, it's still /better/ to sink it to right before the
+ -- point where it gets clobbered. How might we do this?
+ -- Unfortunately, it's too late to change the assignment...
+ where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
+ p _ old = old
+-}
+
+-- Assumption: Unsafe foreign calls don't clobber memory
+-- Since foreign calls clobber caller saved registers, we need
+-- invalidate any assignments that reference those global registers.
+-- This is kind of expensive. (One way to optimize this might be to
+-- store extra information about expressions that allow this and other
+-- checks to be done cheaply.)
+middleAssignment (Plain n@(CmmUnsafeForeignCall{})) assign
+ = deleteCallerSaves (foldRegsDefd (\m r -> addToUFM m r NeverOptimize) (deleteSinks n assign) n)
+ where deleteCallerSaves m = foldUFM_Directly f m m
+ f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
+ f _ _ m = m
+ g (CmmReg (CmmGlobal r)) _ | callerSaves r = True
+ g (CmmRegOff (CmmGlobal r) _) _ | callerSaves r = True
+ g _ b = b
+
+middleAssignment (Plain (CmmComment {})) assign
+ = assign
+
+-- Assumptions:
+-- * Writes using Hp do not overlap with any other memory locations
+-- (An important invariant being relied on here is that we only ever
+-- use Hp to allocate values on the heap, which appears to be the
+-- case given hpReg usage, and that our heap writing code doesn't
+-- do anything stupid like overlapping writes.)
+-- * Stack slots do not overlap with any other memory locations
+-- * Stack slots for different areas do not overlap
+-- * Stack slots within the same area and different offsets may
+-- overlap; we need to do a size check (see 'overlaps').
+-- * Register slots only overlap with themselves. (But this shouldn't
+-- happen in practice, because we'll fail to inline a reload across
+-- the next spill.)
+-- * Non stack-slot stores always conflict with each other. (This is
+-- not always the case; we could probably do something special for Hp)
+clobbers :: (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
+ -> (Unique, CmmExpr) -- (register, expression) that may be clobbered
+ -> Bool
+clobbers (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
+clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
+-- ToDo: Also catch MachOp case
+clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
+ | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
+clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
+ where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
+ = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
+ f (CmmLoad e _) = containsStackSlot e
+ f (CmmMachOp _ es) = or (map f es)
+ f _ = False
+ -- Maybe there's an invariant broken if this actually ever
+ -- returns True
+ containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
+ containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
+ containsStackSlot (CmmStackSlot{}) = True
+ containsStackSlot _ = False
+clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
+ where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
+ f _ = False
+clobbers _ (_, e) = f e
+ where f (CmmLoad (CmmStackSlot _ _) _) = False
+ f (CmmLoad{}) = True -- conservative
+ f (CmmMachOp _ es) = or (map f es)
+ f _ = False
+
+-- Check for memory overlapping.
+-- Diagram:
+-- 4 8 12
+-- s -w- o
+-- [ I32 ]
+-- [ F64 ]
+-- s' -w'- o'
+type CallSubArea = (AreaId, Int, Int) -- area, offset, width
+overlaps :: CallSubArea -> CallSubArea -> Bool
+overlaps (a, _, _) (a', _, _) | a /= a' = False
+overlaps (_, o, w) (_, o', w') =
+ let s = o - w
+ s' = o' - w'
+ in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK
+
+lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
+lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, invalidateVolatile k assign)]
+lastAssignment (Plain (CmmForeignCall {succ=k})) assign = [(k, invalidateVolatile k assign)]
+lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l
+
+-- Invalidates any expressions that have volatile contents: essentially,
+-- all terminals volatile except for literals and loads of stack slots
+-- that do not correspond to the call area for 'k' (the current call
+-- area is volatile because overflow return parameters may be written
+-- there.)
+-- Note: mapUFM could be expensive, but hopefully block boundaries
+-- aren't too common. If it is a problem, replace with something more
+-- clever.
+invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
+invalidateVolatile k m = mapUFM p m
+ where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
+ where exp CmmLit{} = True
+ exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _)
+ | k' == k = False
+ exp (CmmLoad (CmmStackSlot _ _) _) = True
+ exp (CmmMachOp _ es) = and (map exp es)
+ exp _ = False
+ p _ = NeverOptimize -- probably shouldn't happen with AlwaysSink
+
+assignmentTransfer :: FwdTransfer (WithRegUsage CmmNode) AssignmentMap
+assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment)
+
+-- Note [Soundness of inlining]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- In the Hoopl paper, the soundness condition on rewrite functions is
+-- described as follows:
+--
+-- "If it replaces a node n by a replacement graph g, then g must
+-- be observationally equivalent to n under the assumptions
+-- expressed by the incoming dataflow fact f. Moreover, analysis of
+-- g must produce output fact(s) that are at least as informative
+-- as the fact(s) produced by applying the transfer function to n."
+--
+-- We consider the second condition in more detail here. It says given
+-- the rewrite R(n, f) = g, then for any incoming fact f' consistent
+-- with f (f' >= f), then running the transfer function T(f', n) <= T(f', g).
+-- For inlining this is not necessarily the case:
+--
+-- n = "x = a + 2"
+-- f = f' = {a = y}
+-- g = "x = y + 2"
+-- T(f', n) = {x = a + 2, a = y}
+-- T(f', g) = {x = y + 2, a = y}
+--
+-- y + 2 and a + 2 are not obviously comparable, and a naive
+-- implementation of the lattice would say they are incomparable.
+-- At best, this means we may be over-conservative, at worst, it means
+-- we may not terminate.
+--
+-- However, in the original Lerner-Grove-Chambers paper, soundness and
+-- termination are separated, and only equivalence of facts is required
+-- for soundness. Monotonicity of the transfer function is not required
+-- for termination (as the calculation of least-upper-bound prevents
+-- this from being a problem), but it means we won't necessarily find
+-- the least-fixed point.
+
+-- Note [Coherency of annotations]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Is it possible for our usage annotations to become invalid after we
+-- start performing transformations? As the usage info only provides
+-- an upper bound, we only need to consider cases where the usages of
+-- a register may increase due to transformations--e.g. any reference
+-- to a local register in an AlwaysInline or AlwaysSink instruction, whose
+-- originating assignment was single use (we don't care about the
+-- many use case, because it is the top of the lattice). But such a
+-- case is not possible, because we always inline any single use
+-- register. QED.
+--
+-- TODO: A useful lint option would be to check this invariant that
+-- there is never a local register in the assignment map that is
+-- single-use.
+
+-- Note [Soundness of store rewriting]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Its soundness depends on the invariant that no assignment is made to
+-- the local register before its store is accessed. This is clearly
+-- true with unoptimized spill-reload code, and as the store will always
+-- be rewritten first (if possible), there is no chance of it being
+-- propagated down before getting written (possibly with incorrect
+-- values from the assignment map, due to reassignment of the local
+-- register.) This is probably not locally sound.
+
+assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap
+assignmentRewrite = mkFRewrite3 first middle last
+ where
+ first _ _ = return Nothing
+ middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
+ middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
+ middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) l e u
+ last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
+ -- Tuple is (inline?, reloads for sinks)
+ precompute :: AssignmentMap -> CmmNode O x -> (Bool, [WithRegUsage CmmNode O O])
+ precompute assign n = foldRegsUsed f (False, []) n -- duplicates are harmless
+ where f (i, l) r = case lookupUFM assign r of
+ Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
+ Just (AlwaysInline _) -> (True, l)
+ Just NeverOptimize -> (i, l)
+ -- This case can show up when we have
+ -- limited optimization fuel.
+ Nothing -> (i, l)
+ rewrite :: AssignmentMap
+ -> (Bool, [WithRegUsage CmmNode O O])
+ -> (WithRegUsage CmmNode O x -> Graph (WithRegUsage CmmNode) O x)
+ -> CmmNode O x
+ -> Maybe (Graph (WithRegUsage CmmNode) O x)
+ rewrite _ (False, []) _ _ = Nothing
+ -- Note [CmmCall Inline Hack]
+ -- Conservative hack: don't do any inlining on what will
+ -- be translated into an OldCmm CmmCalls, since the code
+ -- produced here tends to be unproblematic and I need to write
+ -- lint passes to ensure that we don't put anything in the
+ -- arguments that could be construed as a global register by
+ -- some later translation pass. (For example, slots will turn
+ -- into dereferences of Sp). See [Register parameter passing].
+ -- ToDo: Fix this up to only bug out if all inlines were for
+ -- CmmExprs with global registers (we can't use the
+ -- straightforward mapExpDeep call, in this case.) ToDo: We miss
+ -- an opportunity here, where all possible inlinings should
+ -- instead be sunk.
+ rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
+ rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
+
+ rewriteLocal :: AssignmentMap
+ -> (Bool, [WithRegUsage CmmNode O O])
+ -> LocalReg -> CmmExpr -> RegUsage
+ -> Maybe (Graph (WithRegUsage CmmNode) O O)
+ rewriteLocal _ (False, []) _ _ _ = Nothing
+ rewriteLocal assign (i, xs) l e u = Just $ mkMiddles xs <*> mkMiddle n'
+ where n' = AssignLocal l e' u
+ e' = if i then wrapRecExp (inlineExp assign) e else e
+ -- inlinable check omitted, since we can always inline into
+ -- assignments.
+
+ inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
+ inline False _ n = n
+ inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
+ inline True assign n = mapExpDeep (inlineExp assign) n
+
+ inlineExp assign old@(CmmReg (CmmLocal r))
+ = case lookupUFM assign r of
+ Just (AlwaysInline x) -> x
+ _ -> old
+ inlineExp assign old@(CmmRegOff (CmmLocal r) i)
+ = case lookupUFM assign r of
+ Just (AlwaysInline x) ->
+ case x of
+ (CmmRegOff r' i') -> CmmRegOff r' (i + i')
+ _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
+ where rep = typeWidth (localRegType r)
+ _ -> old
+ -- See Note [Soundness of store rewriting]
+ inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _)
+ = case lookupUFM assign r of
+ Just (AlwaysInline x) -> x
+ _ -> old
+ inlineExp _ old = old
+
+ inlinable :: CmmNode e x -> Bool
+ inlinable (CmmCall{}) = False
+ inlinable (CmmForeignCall{}) = False
+ inlinable (CmmUnsafeForeignCall{}) = False
+ inlinable _ = True
+
+-- ToDo: Outputable instance for UsageMap and AssignmentMap
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
index 4f24238638..1dbfbb051b 100644
--- a/compiler/cmm/CmmSpillReload.hs
+++ b/compiler/cmm/CmmSpillReload.hs
@@ -1,12 +1,11 @@
-{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts, ViewPatterns #-}
+{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts #-}
-- Norman likes local bindings
-- If this module lives on I'd like to get rid of this flag in due course
{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
-{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
-#if __GLASGOW_HASKELL__ >= 701
+#if __GLASGOW_HASKELL__ < 701
-- GHC 7.0.1 improved incomplete pattern warnings with GADTs
-{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
+{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
#endif
module CmmSpillReload
@@ -15,7 +14,6 @@ module CmmSpillReload
--, insertSpillsAndReloads --- XXX todo check live-in at entry against formals
, dualLivenessWithInsertion
- , rewriteAssignments
, removeDeadAssignmentsAndReloads
)
where
@@ -25,14 +23,11 @@ import Cmm
import CmmExpr
import CmmLive
import OptimizationFuel
-import StgCmmUtils
import Control.Monad
import Outputable hiding (empty)
import qualified Outputable as PP
import UniqSet
-import UniqFM
-import Unique
import Compiler.Hoopl hiding (Unique)
import Data.Maybe
@@ -52,8 +47,15 @@ A variable can be expected to be live in a register, live on the
stack, or both. This analysis ensures that spills and reloads are
inserted as needed to make sure that every live variable needed
after a call is available on the stack. Spills are pushed back to
-their reaching definitions, but reloads are dropped wherever needed
-and will have to be sunk by a later forward transformation.
+their reaching definitions, but reloads are dropped immediately after
+we return from a call and will have to be sunk by a later forward
+transformation.
+
+Note that we offer no guarantees about the consistency of the value
+in memory and the value in the register, except that they are
+equal across calls/procpoints. If the variable is changed, this
+mapping breaks: but as the original value of the register may still
+be useful in a different context, the memory location is not updated.
-}
data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet }
@@ -178,9 +180,6 @@ insertSpillAndReloadRewrites graph procPoints = deepBwdRw3 first middle nothing
nothing _ _ = return Nothing
-regSlot :: LocalReg -> CmmExpr
-regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r)
-
spill, reload :: LocalReg -> CmmNode O O
spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r)
@@ -202,462 +201,6 @@ removeDeadAssignmentsAndReloads procPoints g =
nothing _ _ = return Nothing
-----------------------------------------------------------------
---- Usage information
-
--- We decorate all register assignments with usage information,
--- that is, the maximum number of times the register is referenced
--- while it is live along all outgoing control paths. There are a few
--- subtleties here:
---
--- - If a register goes dead, and then becomes live again, the usages
--- of the disjoint live range don't count towards the original range.
---
--- a = 1; // used once
--- b = a;
--- a = 2; // used once
--- c = a;
---
--- - A register may be used multiple times, but these all reside in
--- different control paths, such that any given execution only uses
--- it once. In that case, the usage count may still be 1.
---
--- a = 1; // used once
--- if (b) {
--- c = a + 3;
--- } else {
--- c = a + 1;
--- }
---
--- This policy corresponds to an inlining strategy that does not
--- duplicate computation but may increase binary size.
---
--- - If we naively implement a usage count, we have a counting to
--- infinity problem across joins. Furthermore, knowing that
--- something is used 2 or more times in one runtime execution isn't
--- particularly useful for optimizations (inlining may be beneficial,
--- but there's no way of knowing that without register pressure
--- information.)
---
--- while (...) {
--- // first iteration, b used once
--- // second iteration, b used twice
--- // third iteration ...
--- a = b;
--- }
--- // b used zero times
---
--- There is an orthogonal question, which is that for every runtime
--- execution, the register may be used only once, but if we inline it
--- in every conditional path, the binary size might increase a lot.
--- But tracking this information would be tricky, because it violates
--- the finite lattice restriction Hoopl requires for termination;
--- we'd thus need to supply an alternate proof, which is probably
--- something we should defer until we actually have an optimization
--- that would take advantage of this. (This might also interact
--- strangely with liveness information.)
---
--- a = ...;
--- // a is used one time, but in X different paths
--- case (b) of
--- 1 -> ... a ...
--- 2 -> ... a ...
--- 3 -> ... a ...
--- ...
---
--- This analysis is very similar to liveness analysis; we just keep a
--- little extra info. (Maybe we should move it to CmmLive, and subsume
--- the old liveness analysis.)
-
-data RegUsage = SingleUse | ManyUse
- deriving (Ord, Eq, Show)
--- Absence in map = ZeroUse
-
-{-
--- minBound is bottom, maxBound is top, least-upper-bound is max
--- ToDo: Put this in Hoopl. Note that this isn't as useful as I
--- originally hoped, because you usually want to leave out the bottom
--- element when you have things like this put in maps. Maybe f is
--- useful on its own as a combining function.
-boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
-boundedOrdLattice n = DataflowLattice n minBound f
- where f _ (OldFact x) (NewFact y)
- | x >= y = (NoChange, x)
- | otherwise = (SomeChange, y)
--}
-
--- Custom node type we'll rewrite to. CmmAssign nodes to local
--- registers are replaced with AssignLocal nodes.
-data WithRegUsage n e x where
- -- Plain will not contain CmmAssign nodes immediately after
- -- transformation, but as we rewrite assignments, we may have
- -- assignments here: these are assignments that should not be
- -- rewritten!
- Plain :: n e x -> WithRegUsage n e x
- AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
-
-instance UserOfLocalRegs (n e x) => UserOfLocalRegs (WithRegUsage n e x) where
- foldRegsUsed f z (Plain n) = foldRegsUsed f z n
- foldRegsUsed f z (AssignLocal _ e _) = foldRegsUsed f z e
-
-instance DefinerOfLocalRegs (n e x) => DefinerOfLocalRegs (WithRegUsage n e x) where
- foldRegsDefd f z (Plain n) = foldRegsDefd f z n
- foldRegsDefd f z (AssignLocal r _ _) = foldRegsDefd f z r
-
-instance NonLocal n => NonLocal (WithRegUsage n) where
- entryLabel (Plain n) = entryLabel n
- successors (Plain n) = successors n
-
-liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
-liftRegUsage = mapGraph Plain
-
-eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
-eraseRegUsage = mapGraph f
- where f :: WithRegUsage CmmNode e x -> CmmNode e x
- f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
- f (Plain n) = n
-
-type UsageMap = UniqFM RegUsage
-
-usageLattice :: DataflowLattice UsageMap
-usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
- where f _ (OldFact x) (NewFact y)
- | x >= y = (NoChange, x)
- | otherwise = (SomeChange, y)
-
--- We reuse the names 'gen' and 'kill', although we're doing something
--- slightly different from the Dragon Book
-usageTransfer :: BwdTransfer (WithRegUsage CmmNode) UsageMap
-usageTransfer = mkBTransfer3 first middle last
- where first _ f = f
- middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
- middle n f = gen_kill n f
- last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
- -- Checking for CmmCall/CmmForeignCall is unnecessary, because
- -- spills/reloads have already occurred by the time we do this
- -- analysis.
- -- XXX Deprecated warning is puzzling: what label are we
- -- supposed to use?
- -- ToDo: With a bit more cleverness here, we can avoid
- -- disappointment and heartbreak associated with the inability
- -- to inline into CmmCall and CmmForeignCall by
- -- over-estimating the usage to be ManyUse.
- last n f = gen_kill n (joinOutFacts usageLattice n f)
- gen_kill a = gen a . kill a
- gen a f = foldRegsUsed increaseUsage f a
- kill a f = foldRegsDefd delFromUFM f a
- increaseUsage f r = addToUFM_C combine f r SingleUse
- where combine _ _ = ManyUse
-
-usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap
-usageRewrite = mkBRewrite3 first middle last
- where first _ _ = return Nothing
- middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
- middle (Plain (CmmAssign (CmmLocal l) e)) f
- = return . Just
- $ case lookupUFM f l of
- Nothing -> emptyGraph
- Just usage -> mkMiddle (AssignLocal l e usage)
- middle _ _ = return Nothing
- last _ _ = return Nothing
-
-type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
-annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage)
-annotateUsage vanilla_g =
- let g = modifyGraph liftRegUsage vanilla_g
- in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
- analRewBwd usageLattice usageTransfer usageRewrite
-
-----------------------------------------------------------------
---- Assignment tracking
-
--- The idea is to maintain a map of local registers do expressions,
--- such that the value of that register is the same as the value of that
--- expression at any given time. We can then do several things,
--- as described by Assignment.
-
--- Assignment describes the various optimizations that are valid
--- at a given point in the program.
-data Assignment =
--- This assignment can always be inlined. It is cheap or single-use.
- AlwaysInline CmmExpr
--- This assignment should be sunk down to its first use. (This will
--- increase code size if the register is used in multiple control flow
--- paths, but won't increase execution time, and the reduction of
--- register pressure is worth it.)
- | AlwaysSink CmmExpr
--- We cannot safely optimize occurrences of this local register. (This
--- corresponds to top in the lattice structure.)
- | NeverOptimize
-
--- Extract the expression that is being assigned to
-xassign :: Assignment -> Maybe CmmExpr
-xassign (AlwaysInline e) = Just e
-xassign (AlwaysSink e) = Just e
-xassign NeverOptimize = Nothing
-
--- Extracts the expression, but only if they're the same constructor
-xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
-xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
-xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e')
-xassign2 _ = Nothing
-
--- Note: We'd like to make decisions about "not optimizing" as soon as
--- possible, because this will make running the transfer function more
--- efficient.
-type AssignmentMap = UniqFM Assignment
-
-assignmentLattice :: DataflowLattice AssignmentMap
-assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
- where add _ (OldFact old) (NewFact new)
- = case (old, new) of
- (NeverOptimize, _) -> (NoChange, NeverOptimize)
- (_, NeverOptimize) -> (SomeChange, NeverOptimize)
- (xassign2 -> Just (e, e'))
- | e == e' -> (NoChange, old)
- | otherwise -> (SomeChange, NeverOptimize)
- _ -> (SomeChange, NeverOptimize)
-
--- Deletes sinks from assignment map, because /this/ is the place
--- where it will be sunk to.
-deleteSinks :: UserOfLocalRegs n => n -> AssignmentMap -> AssignmentMap
-deleteSinks n m = foldRegsUsed (adjustUFM f) m n
- where f (AlwaysSink _) = NeverOptimize
- f old = old
-
--- Invalidates any expressions that use a register.
-invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
--- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
-invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
- where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
- f _ _ m = m
-{- This requires the entire spine of the map to be continually rebuilt,
- - which causes crazy memory usage!
-invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
- where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
- invalidateUsers' _ old = old
--}
-
--- Note [foldUFM performance]
--- These calls to fold UFM no longer leak memory, but they do cause
--- pretty killer amounts of allocation. So they'll be something to
--- optimize; we need an algorithmic change to prevent us from having to
--- traverse the /entire/ map continually.
-
-middleAssignment :: WithRegUsage CmmNode O O -> AssignmentMap -> AssignmentMap
-
--- Algorithm for annotated assignments:
--- 1. Delete any sinking assignments that were used by this instruction
--- 2. Add the assignment to our list of valid local assignments with
--- the correct optimization policy.
--- 3. Look for all assignments that reference that register and
--- invalidate them.
-middleAssignment n@(AssignLocal r e usage) assign
- = invalidateUsersOf (CmmLocal r) . add . deleteSinks n $ assign
- where add m = addToUFM m r
- $ case usage of
- SingleUse -> AlwaysInline e
- ManyUse -> decide e
- decide CmmLit{} = AlwaysInline e
- decide CmmReg{} = AlwaysInline e
- decide CmmLoad{} = AlwaysSink e
- decide CmmStackSlot{} = AlwaysSink e
- decide CmmMachOp{} = AlwaysSink e
- -- We'll always inline simple operations on the global
- -- registers, to reduce register pressure: Sp - 4 or Hp - 8
- -- EZY: Justify this optimization more carefully.
- decide CmmRegOff{} = AlwaysInline e
-
--- Algorithm for unannotated assignments of global registers:
--- 1. Delete any sinking assignments that were used by this instruction
--- 2. Look for all assignments that reference this register and
--- invalidate them.
-middleAssignment (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
- = invalidateUsersOf reg . deleteSinks n $ assign
-
--- Algorithm for unannotated assignments of *local* registers: do
--- nothing (it's a reload, so no state should have changed)
-middleAssignment (Plain (CmmAssign (CmmLocal _) _)) assign = assign
-
--- Algorithm for stores:
--- 1. Delete any sinking assignments that were used by this instruction
--- 2. Look for all assignments that load from memory locations that
--- were clobbered by this store and invalidate them.
-middleAssignment (Plain n@(CmmStore lhs rhs)) assign
- = let m = deleteSinks n assign
- in foldUFM_Directly f m m -- [foldUFM performance]
- where f u (xassign -> Just x) m | (lhs, rhs) `clobbers` (u, x) = addToUFM_Directly m u NeverOptimize
- f _ _ m = m
-{- Also leaky
- = mapUFM_Directly p . deleteSinks n $ assign
- -- ToDo: There's a missed opportunity here: even if a memory
- -- access we're attempting to sink gets clobbered at some
- -- location, it's still /better/ to sink it to right before the
- -- point where it gets clobbered. How might we do this?
- -- Unfortunately, it's too late to change the assignment...
- where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
- p _ old = old
--}
-
--- Assumption: Unsafe foreign calls don't clobber memory
--- Since foreign calls clobber caller saved registers, we need
--- invalidate any assignments that reference those global registers.
--- This is kind of expensive. (One way to optimize this might be to
--- store extra information about expressions that allow this and other
--- checks to be done cheaply.)
-middleAssignment (Plain n@(CmmUnsafeForeignCall{})) assign
- = deleteCallerSaves (foldRegsDefd (\m r -> addToUFM m r NeverOptimize) (deleteSinks n assign) n)
- where deleteCallerSaves m = foldUFM_Directly f m m
- f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
- f _ _ m = m
- g (CmmReg (CmmGlobal r)) _ | callerSaves r = True
- g (CmmRegOff (CmmGlobal r) _) _ | callerSaves r = True
- g _ b = b
-
-middleAssignment (Plain (CmmComment {})) assign
- = assign
-
--- Assumptions:
--- * Writes using Hp do not overlap with any other memory locations
--- (An important invariant being relied on here is that we only ever
--- use Hp to allocate values on the heap, which appears to be the
--- case given hpReg usage, and that our heap writing code doesn't
--- do anything stupid like overlapping writes.)
--- * Stack slots do not overlap with any other memory locations
--- * Stack slots for different areas do not overlap
--- * Stack slots within the same area and different offsets may
--- overlap; we need to do a size check (see 'overlaps').
--- * Register slots only overlap with themselves. (But this shouldn't
--- happen in practice, because we'll fail to inline a reload across
--- the next spill.)
--- * Non stack-slot stores always conflict with each other. (This is
--- not always the case; we could probably do something special for Hp)
-clobbers :: (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
- -> (Unique, CmmExpr) -- (register, expression) that may be clobbered
- -> Bool
-clobbers (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
-clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
--- ToDo: Also catch MachOp case
-clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
- | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
-clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
- = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
- f (CmmLoad e _) = containsStackSlot e
- f (CmmMachOp _ es) = or (map f es)
- f _ = False
- -- Maybe there's an invariant broken if this actually ever
- -- returns True
- containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
- containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
- containsStackSlot (CmmStackSlot{}) = True
- containsStackSlot _ = False
-clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
- f _ = False
-clobbers _ (_, e) = f e
- where f (CmmLoad (CmmStackSlot _ _) _) = False
- f (CmmLoad{}) = True -- conservative
- f (CmmMachOp _ es) = or (map f es)
- f _ = False
-
--- Check for memory overlapping.
--- Diagram:
--- 4 8 12
--- s -w- o
--- [ I32 ]
--- [ F64 ]
--- s' -w'- o'
-type CallSubArea = (AreaId, Int, Int) -- area, offset, width
-overlaps :: CallSubArea -> CallSubArea -> Bool
-overlaps (a, _, _) (a', _, _) | a /= a' = False
-overlaps (_, o, w) (_, o', w') =
- let s = o - w
- s' = o' - w'
- in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK
-
-lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
--- Variables are dead across calls, so invalidating all mappings is justified
-lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, mapUFM (const NeverOptimize) assign)]
-lastAssignment (Plain (CmmForeignCall {succ=k})) assign = [(k, mapUFM (const NeverOptimize) assign)]
-lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l
-
-assignmentTransfer :: FwdTransfer (WithRegUsage CmmNode) AssignmentMap
-assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment)
-
-assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap
-assignmentRewrite = mkFRewrite3 first middle last
- where
- first _ _ = return Nothing
- middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
- middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
- middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) mkMiddle l e u
- last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
- -- Tuple is (inline?, reloads)
- precompute assign n = foldRegsUsed f (False, []) n -- duplicates are harmless
- where f (i, l) r = case lookupUFM assign r of
- Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
- Just (AlwaysInline _) -> (True, l)
- Just NeverOptimize -> (i, l)
- -- This case can show up when we have
- -- limited optimization fuel.
- Nothing -> (i, l)
- rewrite _ (False, []) _ _ = Nothing
- -- Note [CmmCall Inline Hack]
- -- Conservative hack: don't do any inlining on what will
- -- be translated into an OldCmm CmmCalls, since the code
- -- produced here tends to be unproblematic and I need to write
- -- lint passes to ensure that we don't put anything in the
- -- arguments that could be construed as a global register by
- -- some later translation pass. (For example, slots will turn
- -- into dereferences of Sp). See [Register parameter passing].
- -- ToDo: Fix this up to only bug out if all inlines were for
- -- CmmExprs with global registers (we can't use the
- -- straightforward mapExpDeep call, in this case.) ToDo: We miss
- -- an opportunity here, where all possible inlinings should
- -- instead be sunk.
- rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
- rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
-
- rewriteLocal _ (False, []) _ _ _ _ = Nothing
- rewriteLocal assign (i, xs) mk l e u = Just $ mkMiddles xs <*> mk n'
- where n' = AssignLocal l e' u
- e' = if i then wrapRecExp (inlineExp assign) e else e
- -- inlinable check omitted, since we can always inline into
- -- assignments.
-
- inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
- inline False _ n = n
- inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
- inline True assign n = mapExpDeep (inlineExp assign) n
-
- inlineExp assign old@(CmmReg (CmmLocal r))
- = case lookupUFM assign r of
- Just (AlwaysInline x) -> x
- _ -> old
- inlineExp assign old@(CmmRegOff (CmmLocal r) i)
- = case lookupUFM assign r of
- Just (AlwaysInline x) ->
- case x of
- (CmmRegOff r' i') -> CmmRegOff r' (i + i')
- _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
- where rep = typeWidth (localRegType r)
- _ -> old
- inlineExp _ old = old
-
- inlinable :: CmmNode e x -> Bool
- inlinable (CmmCall{}) = False
- inlinable (CmmForeignCall{}) = False
- inlinable (CmmUnsafeForeignCall{}) = False
- inlinable _ = True
-
-rewriteAssignments :: CmmGraph -> FuelUniqSM CmmGraph
-rewriteAssignments g = do
- g' <- annotateUsage g
- g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
- analRewFwd assignmentLattice assignmentTransfer assignmentRewrite
- return (modifyGraph eraseRegUsage g'')
-
---------------------
-- prettyprinting
@@ -675,8 +218,6 @@ instance Outputable DualLive where
if isEmptyUniqSet stack then PP.empty
else (ppr_regs "live on stack =" stack)]
--- ToDo: Outputable instance for UsageMap and AssignmentMap
-
my_trace :: String -> SDoc -> a -> a
my_trace = if False then pprTrace else \_ _ a -> a
diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes
index 546f9aeb4c..4a87911ec5 100644
--- a/compiler/cmm/cmm-notes
+++ b/compiler/cmm/cmm-notes
@@ -8,9 +8,6 @@ More notes (June 11)
or parameterise FCode over its envt; the CgState part seem useful for both
-* Move top and tail calls to runCmmContFlowOpts from HscMain to CmmCps.cpsTop
- (and rename the latter!)
-
* "Remove redundant reloads" in CmmSpillReload should be redundant; since
insertLateReloads is now gone, every reload is reloading a live variable.
Test and nuke.
@@ -251,7 +248,7 @@ CmmCvt.hs Conversion between old and new Cmm reps
CmmOpt.hs Hopefully-redundant optimiser
-------- Stuff to keep ------------
-CmmCPS.hs Driver for new pipeline
+CmmPipeline.hs Driver for new pipeline
CmmLive.hs Liveness analysis, dead code elim
CmmProcPoint.hs Identifying and splitting out proc-points
@@ -298,24 +295,24 @@ BlockId.hs BlockId, BlockEnv, BlockSet
type RawCmm = GenCmm CmmStatic [CmmStatic] (ListGraph CmmStmt)
* HscMain.tryNewCodeGen
- - STG->Cmm: StgCmm.codeGen (new codegen)
- - Optimise: CmmContFlowOpt (simple optimisations, very self contained)
- - Cps convert: CmmCPS.protoCmmCPS
- - Optimise: CmmContFlowOpt again
- - Convert: CmmCvt.cmmOfZgraph (convert to old rep) very self contained
+ - STG->Cmm: StgCmm.codeGen (new codegen)
+ - Optimize and CPS: CmmPipeline.cmmPipeline
+ - Convert: CmmCvt.cmmOfZgraph (convert to old rep) very self contained
* StgCmm.hs The new STG -> Cmm conversion code generator
Lots of modules StgCmmXXX
----------------------------------------------------
- CmmCPS.protoCmmCPS The new pipeline
+ CmmPipeline.cmmPipeline The new pipeline
----------------------------------------------------
-CmmCPS.protoCmmCPS:
- 1. Do cpsTop for each procedures separately
- 2. Build SRT representation; this spans multiple procedures
- (unless split-objs)
+CmmPipeline.cmmPipeline:
+ 1. Do control flow optimization
+ 2. Do cpsTop for each procedures separately
+ 3. Build SRT representation; this spans multiple procedures
+ (unless split-objs)
+ 4. Do control flow optimization on all resulting procedures
cpsTop:
* CmmCommonBlockElim.elimCommonBlocks:
@@ -453,7 +450,7 @@ a dominator analysis, using the Dataflow Engine.
f's keep-alive refs to include h1.
* The SRT info is the C_SRT field of Cmm.ClosureTypeInfo in a
- CmmInfoTable attached to each CmmProc. CmmCPS.toTops actually does
+ CmmInfoTable attached to each CmmProc. CmmPipeline.toTops actually does
the attaching, right at the end of the pipeline. The C_SRT part
gives offsets within a single, shared table of closure pointers.
diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs
index 99e5c26077..f47fbe39c2 100644
--- a/compiler/codeGen/CgPrimOp.hs
+++ b/compiler/codeGen/CgPrimOp.hs
@@ -655,7 +655,8 @@ doCopyArrayOp = emitCopyArray copy
where
-- Copy data (we assume the arrays aren't overlapping since
-- they're of different types)
- copy _src _dst = emitMemcpyCall
+ copy _src _dst dst_p src_p bytes live =
+ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)) live
-- | Takes a source 'MutableArray#', an offset in the source array, a
-- destination 'MutableArray#', an offset into the destination array,
@@ -670,8 +671,8 @@ doCopyMutableArrayOp = emitCopyArray copy
-- TODO: Optimize branch for common case of no aliasing.
copy src dst dst_p src_p bytes live =
emitIfThenElse (cmmEqWord src dst)
- (emitMemmoveCall dst_p src_p bytes live)
- (emitMemcpyCall dst_p src_p bytes live)
+ (emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)) live)
+ (emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)) live)
emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> StgLiveVars -> Code)
@@ -737,11 +738,13 @@ emitCloneArray info_p res_r src0 src_off0 n0 live = do
src_p <- assignTemp $ cmmOffsetExprW (cmmOffsetB src arrPtrsHdrSize)
src_off
- emitMemcpyCall dst_p src_p (n `cmmMulWord` wordSize) live
+ emitMemcpyCall dst_p src_p (n `cmmMulWord` wordSize)
+ (CmmLit (mkIntCLit wORD_SIZE)) live
emitMemsetCall (cmmOffsetExprW dst_p n)
(CmmLit (mkIntCLit 1))
(card_words `cmmMulWord` wordSize)
+ (CmmLit (mkIntCLit wORD_SIZE))
live
stmtC $ CmmAssign (CmmLocal res_r) arr
where
@@ -761,65 +764,63 @@ emitSetCards dst_start dst_cards_start n live = do
(CmmLit (mkIntCLit 1))
((card (dst_start `cmmAddWord` n) `cmmSubWord` start_card)
`cmmAddWord` CmmLit (mkIntCLit 1))
+ (CmmLit (mkIntCLit wORD_SIZE))
live
where
-- Convert an element index to a card index
card i = i `cmmUShrWord` (CmmLit (mkIntCLit mUT_ARR_PTRS_CARD_BITS))
-- | Emit a call to @memcpy@.
-emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
-emitMemcpyCall dst src n live = do
+emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars
+ -> Code
+emitMemcpyCall dst src n align live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[{-no results-}]
- (CmmCallee memcpy CCallConv)
+ (CmmPrim MO_Memcpy)
[ (CmmHinted dst AddrHint)
, (CmmHinted src AddrHint)
, (CmmHinted n NoHint)
+ , (CmmHinted align NoHint)
]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
- where
- memcpy = CmmLit (CmmLabel (mkForeignLabel (fsLit "memcpy") Nothing
- ForeignLabelInExternalPackage IsFunction))
-- | Emit a call to @memmove@.
-emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
-emitMemmoveCall dst src n live = do
+emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars
+ -> Code
+emitMemmoveCall dst src n align live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[{-no results-}]
- (CmmCallee memmove CCallConv)
+ (CmmPrim MO_Memmove)
[ (CmmHinted dst AddrHint)
, (CmmHinted src AddrHint)
, (CmmHinted n NoHint)
+ , (CmmHinted align NoHint)
]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
- where
- memmove = CmmLit (CmmLabel (mkForeignLabel (fsLit "memmove") Nothing
- ForeignLabelInExternalPackage IsFunction))
--- | Emit a call to @memset@. The second argument must fit inside an
--- unsigned char.
-emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
-emitMemsetCall dst c n live = do
+-- | Emit a call to @memset@. The second argument must be a word but
+-- its value must fit inside an unsigned char.
+emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> StgLiveVars
+ -> Code
+emitMemsetCall dst c n align live = do
vols <- getVolatileRegs live
emitForeignCall' PlayRisky
[{-no results-}]
- (CmmCallee memset CCallConv)
+ (CmmPrim MO_Memset)
[ (CmmHinted dst AddrHint)
, (CmmHinted c NoHint)
, (CmmHinted n NoHint)
+ , (CmmHinted align NoHint)
]
(Just vols)
NoC_SRT -- No SRT b/c we do PlayRisky
CmmMayReturn
- where
- memset = CmmLit (CmmLabel (mkForeignLabel (fsLit "memset") Nothing
- ForeignLabelInExternalPackage IsFunction))
-- | Emit a call to @allocate@.
emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> StgLiveVars -> Code
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index e6dbcec7f9..2cf72270aa 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -730,7 +730,9 @@ doCopyArrayOp = emitCopyArray copy
where
-- Copy data (we assume the arrays aren't overlapping since
-- they're of different types)
- copy _src _dst = emitMemcpyCall
+ copy _src _dst dst_p src_p bytes =
+ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE))
+
-- | Takes a source 'MutableArray#', an offset in the source array, a
-- destination 'MutableArray#', an offset into the destination array,
@@ -745,8 +747,8 @@ doCopyMutableArrayOp = emitCopyArray copy
-- TODO: Optimize branch for common case of no aliasing.
copy src dst dst_p src_p bytes = do
[moveCall, cpyCall] <- forkAlts [
- getCode $ emitMemmoveCall dst_p src_p bytes,
- getCode $ emitMemcpyCall dst_p src_p bytes
+ getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)),
+ getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE))
]
emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
@@ -811,11 +813,12 @@ emitCloneArray info_p res_r src0 src_off0 n0 = do
src_p <- assignTempE $ cmmOffsetExprW (cmmOffsetB src arrPtrsHdrSize)
src_off
- emitMemcpyCall dst_p src_p (n `cmmMulWord` wordSize)
+ emitMemcpyCall dst_p src_p (n `cmmMulWord` wordSize) (CmmLit (mkIntCLit wORD_SIZE))
emitMemsetCall (cmmOffsetExprW dst_p n)
(CmmLit (mkIntCLit 1))
(card_words `cmmMulWord` wordSize)
+ (CmmLit (mkIntCLit wORD_SIZE))
emit $ mkAssign (CmmLocal res_r) arr
where
arrPtrsHdrSizeW = CmmLit $ mkIntCLit $ fixedHdrSize +
@@ -834,52 +837,35 @@ emitSetCards dst_start dst_cards_start n = do
(CmmLit (mkIntCLit 1))
((card (dst_start `cmmAddWord` n) `cmmSubWord` start_card)
`cmmAddWord` CmmLit (mkIntCLit 1))
+ (CmmLit (mkIntCLit wORD_SIZE))
where
-- Convert an element index to a card index
card i = i `cmmUShrWord` (CmmLit (mkIntCLit mUT_ARR_PTRS_CARD_BITS))
-- | Emit a call to @memcpy@.
-emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-emitMemcpyCall dst src n = do
- emitCCall
+emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+emitMemcpyCall dst src n align = do
+ emitPrimCall
[ {-no results-} ]
- memcpy
- [ (dst, AddrHint)
- , (src, AddrHint)
- , (n, NoHint)
- ]
- where
- memcpy = CmmLit (CmmLabel (mkForeignLabel (fsLit "memcpy") Nothing
- ForeignLabelInExternalPackage IsFunction))
+ MO_Memcpy
+ [ dst, src, n, align ]
-- | Emit a call to @memmove@.
-emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-emitMemmoveCall dst src n = do
- emitCCall
+emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+emitMemmoveCall dst src n align = do
+ emitPrimCall
[ {- no results -} ]
- memmove
- [ (dst, AddrHint)
- , (src, AddrHint)
- , (n, NoHint)
- ]
- where
- memmove = CmmLit (CmmLabel (mkForeignLabel (fsLit "memmove") Nothing
- ForeignLabelInExternalPackage IsFunction))
+ MO_Memmove
+ [ dst, src, n, align ]
-- | Emit a call to @memset@. The second argument must fit inside an
-- unsigned char.
-emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
-emitMemsetCall dst c n = do
- emitCCall
+emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+emitMemsetCall dst c n align = do
+ emitPrimCall
[ {- no results -} ]
- memset
- [ (dst, AddrHint)
- , (c, NoHint)
- , (n, NoHint)
- ]
- where
- memset = CmmLit (CmmLabel (mkForeignLabel (fsLit "memset") Nothing
- ForeignLabelInExternalPackage IsFunction))
+ MO_Memset
+ [ dst, c, n, align ]
-- | Emit a call to @allocate@.
emitAllocateCall :: LocalReg -> CmmExpr -> CmmExpr -> FCode ()
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index b3d9f0cd2a..2711c1b20e 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -182,7 +182,7 @@ Library
CLabel
Cmm
CmmBuildInfoTables
- CmmCPS
+ CmmPipeline
CmmCallConv
CmmCommonBlockElim
CmmContFlowOpt
@@ -199,6 +199,7 @@ Library
CmmParse
CmmProcPoint
CmmSpillReload
+ CmmRewriteAssignments
CmmStackLayout
CmmType
CmmUtils
@@ -313,6 +314,8 @@ Library
Finder
GHC
GhcMake
+ GhcPlugins
+ DynamicLoading
HeaderInfo
HscMain
HscStats
@@ -455,7 +458,6 @@ Library
Vectorise.Builtins.Base
Vectorise.Builtins.Initialise
Vectorise.Builtins.Modules
- Vectorise.Builtins.Prelude
Vectorise.Builtins
Vectorise.Monad.Base
Vectorise.Monad.Naming
diff --git a/compiler/ghci/Linker.lhs b/compiler/ghci/Linker.lhs
index eaf452199e..ef349ebb10 100644
--- a/compiler/ghci/Linker.lhs
+++ b/compiler/ghci/Linker.lhs
@@ -15,8 +15,8 @@ module Linker ( HValue, getHValue, showLinkerState,
linkExpr, unload, withExtendedLinkEnv,
extendLinkEnv, deleteFromLinkEnv,
extendLoadedPkgs,
- linkPackages,initDynLinker,
- dataConInfoPtrToName
+ linkPackages,initDynLinker,linkModule,
+ dataConInfoPtrToName, lessUnsafeCoerce
) where
#include "HsVersions.h"
@@ -55,6 +55,8 @@ import Constants
import FastString
import Config
+import GHC.Exts (unsafeCoerce#)
+
-- Standard libraries
import Control.Monad
@@ -264,6 +266,7 @@ dataConInfoPtrToName x = do
-- Throws a 'ProgramError' if loading fails or the name cannot be found.
getHValue :: HscEnv -> Name -> IO HValue
getHValue hsc_env name = do
+ initDynLinker (hsc_dflags hsc_env)
pls <- modifyMVar v_PersistentLinkerState $ \pls -> do
if (isExternalName name) then do
(pls', ok) <- linkDependencies hsc_env pls noSrcSpan [nameModule name]
@@ -277,6 +280,7 @@ linkDependencies :: HscEnv -> PersistentLinkerState
-> SrcSpan -> [Module]
-> IO (PersistentLinkerState, SuccessFlag)
linkDependencies hsc_env pls span needed_mods = do
+-- initDynLinker (hsc_dflags hsc_env)
let hpt = hsc_HPT hsc_env
dflags = hsc_dflags hsc_env
-- The interpreter and dynamic linker can only handle object code built
@@ -696,6 +700,38 @@ getLinkDeps hsc_env hpt pls maybe_normal_osuf span mods
adjust_ul _ _ = panic "adjust_ul"
\end{code}
+%************************************************************************
+%* *
+ Loading a single module
+%* *
+%************************************************************************
+\begin{code}
+
+-- | Link a single module
+linkModule :: HscEnv -> Module -> IO ()
+linkModule hsc_env mod = do
+ initDynLinker (hsc_dflags hsc_env)
+ modifyMVar v_PersistentLinkerState $ \pls -> do
+ (pls', ok) <- linkDependencies hsc_env pls noSrcSpan [mod]
+ if (failed ok) then ghcError (ProgramError "could not link module")
+ else return (pls',())
+
+-- | Coerce a value as usual, but:
+--
+-- 1) Evaluate it immediately to get a segfault early if the coercion was wrong
+--
+-- 2) Wrap it in some debug messages at verbosity 3 or higher so we can see what happened
+-- if it /does/ segfault
+lessUnsafeCoerce :: DynFlags -> String -> a -> IO b
+lessUnsafeCoerce dflags context what = do
+ debugTraceMsg dflags 3 $ (ptext $ sLit "Coercing a value in") <+> (text context) <> (ptext $ sLit "...")
+ output <- evaluate (unsafeCoerce# what)
+ debugTraceMsg dflags 3 $ ptext $ sLit "Successfully evaluated coercion"
+ return output
+
+
+
+\end{code}
%************************************************************************
%* *
@@ -997,6 +1033,7 @@ linkPackages :: DynFlags -> [PackageId] -> IO ()
linkPackages dflags new_pkgs = do
-- It's probably not safe to try to load packages concurrently, so we take
-- a lock.
+ initDynLinker dflags
modifyMVar_ v_PersistentLinkerState $ \pls -> do
linkPackages' dflags new_pkgs pls
diff --git a/compiler/hsSyn/Convert.lhs b/compiler/hsSyn/Convert.lhs
index 8d79afe7fd..7b0d8c4f0d 100644
--- a/compiler/hsSyn/Convert.lhs
+++ b/compiler/hsSyn/Convert.lhs
@@ -831,13 +831,17 @@ thRdrName :: OccName.NameSpace -> String -> TH.NameFlavour -> RdrName
-- which will give confusing error messages later
--
-- The strict applications ensure that any buried exceptions get forced
-thRdrName _ occ (TH.NameG th_ns pkg mod) = thOrigRdrName occ th_ns pkg mod
-thRdrName ctxt_ns occ (TH.NameL uniq) = nameRdrName $! (((Name.mkInternalName $! (mk_uniq uniq)) $! (mk_occ ctxt_ns occ)) noSrcSpan)
-thRdrName ctxt_ns occ (TH.NameQ mod) = (mkRdrQual $! (mk_mod mod)) $! (mk_occ ctxt_ns occ)
-thRdrName ctxt_ns occ (TH.NameU uniq) = mkRdrUnqual $! (mk_uniq_occ ctxt_ns occ uniq)
-thRdrName ctxt_ns occ TH.NameS
- | Just name <- isBuiltInOcc ctxt_ns occ = nameRdrName $! name
- | otherwise = mkRdrUnqual $! (mk_occ ctxt_ns occ)
+thRdrName ctxt_ns th_occ th_name
+ = case th_name of
+ TH.NameG th_ns pkg mod -> thOrigRdrName th_occ th_ns pkg mod
+ TH.NameQ mod -> (mkRdrQual $! mk_mod mod) $! occ
+ TH.NameL uniq -> nameRdrName $! (((Name.mkInternalName $! mk_uniq uniq) $! occ) noSrcSpan)
+ TH.NameU uniq -> nameRdrName $! (((Name.mkSystemName $! mk_uniq uniq) $! occ))
+ TH.NameS | Just name <- isBuiltInOcc ctxt_ns th_occ -> nameRdrName $! name
+ | otherwise -> mkRdrUnqual $! occ
+ where
+ occ :: OccName.OccName
+ occ = mk_occ ctxt_ns th_occ
thOrigRdrName :: String -> TH.NameSpace -> PkgName -> ModName -> RdrName
thOrigRdrName occ th_ns pkg mod = (mkOrig $! (mkModule (mk_pkg pkg) (mk_mod mod))) $! (mk_occ (mk_ghc_ns th_ns) occ)
@@ -873,14 +877,9 @@ isBuiltInOcc ctxt_ns occ
| OccName.isTcClsNameSpace ctxt_ns = Name.getName (tupleTyCon Boxed n)
| otherwise = Name.getName (tupleCon Boxed n)
-mk_uniq_occ :: OccName.NameSpace -> String -> Int# -> OccName.OccName
-mk_uniq_occ ns occ uniq
- = OccName.mkOccName ns (occ ++ '[' : shows (mk_uniq uniq) "]")
- -- See Note [Unique OccNames from Template Haskell]
-
-- The packing and unpacking is rather turgid :-(
mk_occ :: OccName.NameSpace -> String -> OccName.OccName
-mk_occ ns occ = OccName.mkOccNameFS ns (mkFastString occ)
+mk_occ ns occ = OccName.mkOccName ns occ
mk_ghc_ns :: TH.NameSpace -> OccName.NameSpace
mk_ghc_ns TH.DataName = OccName.dataName
@@ -897,17 +896,64 @@ mk_uniq :: Int# -> Unique
mk_uniq u = mkUniqueGrimily (I# u)
\end{code}
-Note [Unique OccNames from Template Haskell]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The idea here is to make a name that
- a) the user could not possibly write (it has a "["
- and letters or digits from the unique)
- b) cannot clash with another NameU
-Previously I generated an Exact RdrName with mkInternalName. This
-works fine for local binders, but does not work at all for top-level
-binders, which must have External Names, since they are rapidly baked
-into data constructors and the like. Baling out and generating an
-unqualified RdrName here is the simple solution
-
-See also Note [Suppressing uniques in OccNames] in OccName, which
-suppresses the unique when opt_SuppressUniques is on.
+Note [Binders in Template Haskell]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this TH term construction:
+ do { x1 <- TH.newName "x" -- newName :: String -> Q TH.Name
+ ; x2 <- TH.newName "x" -- Builds a NameU
+ ; x3 <- TH.newName "x"
+
+ ; let x = mkName "x" -- mkName :: String -> TH.Name
+ -- Builds a NameL
+
+ ; return (LamE (..pattern [x1,x2]..) $
+ LamE (VarPat x3) $
+ ..tuple (x1,x2,x3,x)) }
+
+It represents the term \[x1,x2]. \x3. (x1,x2,x3,x)
+
+a) We don't want to complain about "x" being bound twice in
+ the pattern [x1,x2]
+b) We don't want x3 to shadow the x1,x2
+c) We *do* want 'x' (dynamically bound with mkName) to bind
+ to the innermost binding of "x", namely x3.. (In this
+d) When pretty printing, we want to print a unique with x1,x2
+ etc, else they'll all print as "x" which isn't very helpful
+
+When we convert all this to HsSyn, the TH.Names are converted with
+thRdrName. To achieve (b) we want the binders to be Exact RdrNames.
+Achieving (a) is a bit awkward, because
+ - We must check for duplicate and shadowed names on Names,
+ not RdrNames, *after* renaming.
+ See Note [Collect binders only after renaming] in HsUtils
+
+ - But to achieve (a) we must distinguish between the Exact
+ RdrNames arising from TH and the Unqual RdrNames that would
+ come from a user writing \[x,x] -> blah
+
+So in Convert (here) we translate
+ TH Name RdrName
+ --------------------------------------------------------
+ NameU (arising from newName) --> Exact (Name{ System })
+ NameS (arising from mkName) --> Unqual
+
+Notice that the NameUs generate *System* Names. Then, when
+figuring out shadowing and duplicates, we can filter out
+System Names.
+
+This use of System Names fits with other uses of System Names, eg for
+temporary variables "a". Since there are lots of things called "a" we
+usually want to print the name with the unique, and that is indeed
+the way System Names are printed.
+
+There's a small complication of course. For data types and
+classes we'll now have system Names in the binding positions
+for constructors, TyCons etc. For example
+ [d| data T = MkT Int |]
+when we splice in and Convert to HsSyn RdrName, we'll get
+ data (Exact (system Name "T")) = (Exact (system Name "MkT")) ...
+So RnEnv.newGlobalBinder we spot Exact RdrNames that wrap a
+non-External Name, and make an External name for. (Remember,
+constructors and the like need External Names.) Oddly, the
+*occurrences* will continue to be that (non-External) System Name,
+but that will come out in the wash.
diff --git a/compiler/iface/IfaceEnv.lhs b/compiler/iface/IfaceEnv.lhs
index 0e30f31280..36024ebb91 100644
--- a/compiler/iface/IfaceEnv.lhs
+++ b/compiler/iface/IfaceEnv.lhs
@@ -13,7 +13,7 @@ module IfaceEnv (
ifaceExportNames,
-- Name-cache stuff
- allocateGlobalBinder, initNameCache,
+ allocateGlobalBinder, initNameCache, updNameCache,
getNameCache, mkNameCacheUpdater, NameCacheUpdater
) where
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index b49b860a9b..167177703e 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -163,6 +163,7 @@ data DynFlag
| Opt_D_dump_occur_anal
| Opt_D_dump_parsed
| Opt_D_dump_rn
+ | Opt_D_dump_core_pipeline -- TODO FIXME: dump after simplifier stats
| Opt_D_dump_simpl
| Opt_D_dump_simpl_iterations
| Opt_D_dump_simpl_phases
@@ -469,6 +470,10 @@ data DynFlags = DynFlags {
hpcDir :: String, -- ^ Path to store the .mix files
+ -- Plugins
+ pluginModNames :: [ModuleName],
+ pluginModNameOpts :: [(ModuleName,String)],
+
settings :: Settings,
-- For ghc -M
@@ -788,6 +793,9 @@ defaultDynFlags mySettings =
hcSuf = phaseInputExt HCc,
hiSuf = "hi",
+ pluginModNames = [],
+ pluginModNameOpts = [],
+
outputFile = Nothing,
outputHi = Nothing,
dynLibLoader = SystemDependent,
@@ -979,6 +987,16 @@ setHcSuf f d = d{ hcSuf = f}
setOutputFile f d = d{ outputFile = f}
setOutputHi f d = d{ outputHi = f}
+addPluginModuleName :: String -> DynFlags -> DynFlags
+addPluginModuleName name d = d { pluginModNames = (mkModuleName name) : (pluginModNames d) }
+
+addPluginModuleNameOption :: String -> DynFlags -> DynFlags
+addPluginModuleNameOption optflag d = d { pluginModNameOpts = (mkModuleName m, option) : (pluginModNameOpts d) }
+ where (m, rest) = break (== ':') optflag
+ option = case rest of
+ [] -> "" -- should probably signal an error
+ (_:plug_opt) -> plug_opt -- ignore the ':' from break
+
parseDynLibLoaderMode f d =
case splitAt 8 f of
("deploy", "") -> d{ dynLibLoader = Deployable }
@@ -1319,6 +1337,7 @@ dynamic_flags = [
, Flag "ddump-occur-anal" (setDumpFlag Opt_D_dump_occur_anal)
, Flag "ddump-parsed" (setDumpFlag Opt_D_dump_parsed)
, Flag "ddump-rn" (setDumpFlag Opt_D_dump_rn)
+ , Flag "ddump-core-pipeline" (setDumpFlag Opt_D_dump_core_pipeline)
, Flag "ddump-simpl" (setDumpFlag Opt_D_dump_simpl)
, Flag "ddump-simpl-iterations" (setDumpFlag Opt_D_dump_simpl_iterations)
, Flag "ddump-simpl-phases" (OptPrefix setDumpSimplPhases)
@@ -1377,7 +1396,11 @@ dynamic_flags = [
, Flag "Wnot" (NoArg (do { mapM_ unSetDynFlag minusWallOpts
; deprecate "Use -w instead" }))
, Flag "w" (NoArg (mapM_ unSetDynFlag minuswRemovesOpts))
-
+
+ ------ Plugin flags ------------------------------------------------
+ , Flag "fplugin" (hasArg addPluginModuleName)
+ , Flag "fplugin-opt" (hasArg addPluginModuleNameOption)
+
------ Optimisation flags ------------------------------------------
, Flag "O" (noArgM (setOptLevel 1))
, Flag "Onot" (noArgM (\dflags -> do deprecate "Use -O0 instead"
@@ -1688,7 +1711,8 @@ xFlags = [
( "ExplicitForAll", Opt_ExplicitForAll, nop ),
( "AlternativeLayoutRule", Opt_AlternativeLayoutRule, nop ),
( "AlternativeLayoutRuleTransitional",Opt_AlternativeLayoutRuleTransitional, nop ),
- ( "DatatypeContexts", Opt_DatatypeContexts, nop ),
+ ( "DatatypeContexts", Opt_DatatypeContexts,
+ \ turn_on -> when turn_on $ deprecate "It was widely considered a misfeature, and has been removed from the Haskell language." ),
( "NondecreasingIndentation", Opt_NondecreasingIndentation, nop ),
( "RelaxedLayout", Opt_RelaxedLayout, nop ),
( "MonoLocalBinds", Opt_MonoLocalBinds, nop ),
diff --git a/compiler/main/DynamicLoading.hs b/compiler/main/DynamicLoading.hs
new file mode 100644
index 0000000000..5c7f6c7f0a
--- /dev/null
+++ b/compiler/main/DynamicLoading.hs
@@ -0,0 +1,150 @@
+-- | Dynamically lookup up values from modules and loading them.
+module DynamicLoading (
+#ifdef GHCI
+ -- * Force loading information
+ forceLoadModuleInterfaces,
+ forceLoadNameModuleInterface,
+ forceLoadTyCon,
+
+ -- * Finding names
+ lookupRdrNameInModule,
+
+ -- * Loading values
+ getValueSafely,
+ lessUnsafeCoerce
+#endif
+ ) where
+
+#ifdef GHCI
+import Linker ( linkModule, getHValue, lessUnsafeCoerce )
+import OccName ( occNameSpace )
+import Name ( nameOccName )
+import SrcLoc ( noSrcSpan )
+import Finder ( findImportedModule, cannotFindModule )
+import DriverPhases ( HscSource(HsSrcFile) )
+import TcRnDriver ( getModuleExports )
+import TcRnMonad ( initTc, initIfaceTcRn )
+import LoadIface ( loadUserInterface )
+import RdrName ( RdrName, Provenance(..), ImportSpec(..), ImpDeclSpec(..), ImpItemSpec(..),
+ mkGlobalRdrEnv, lookupGRE_RdrName, gre_name, rdrNameSpace )
+import RnNames ( gresFromAvails )
+import PrelNames ( iNTERACTIVE )
+
+import HscTypes ( HscEnv(..), FindResult(..), lookupTypeHscEnv )
+import TypeRep ( TyThing(..), pprTyThingCategory )
+import Type ( Type, eqType )
+import TyCon ( TyCon )
+import Name ( Name, nameModule_maybe )
+import Id ( idType )
+import Module ( Module, ModuleName )
+import Panic ( GhcException(..), throwGhcException )
+import FastString
+import Outputable
+
+import Data.Maybe ( mapMaybe )
+
+
+-- | Force the interfaces for the given modules to be loaded. The 'SDoc' parameter is used
+-- for debugging (@-ddump-if-trace@) only: it is shown as the reason why the module is being loaded.
+forceLoadModuleInterfaces :: HscEnv -> SDoc -> [Module] -> IO ()
+forceLoadModuleInterfaces hsc_env doc modules
+ = (initTc hsc_env HsSrcFile False iNTERACTIVE $ initIfaceTcRn $ mapM_ (loadUserInterface False doc) modules) >> return ()
+
+-- | Force the interface for the module containing the name to be loaded. The 'SDoc' parameter is used
+-- for debugging (@-ddump-if-trace@) only: it is shown as the reason why the module is being loaded.
+forceLoadNameModuleInterface :: HscEnv -> SDoc -> Name -> IO ()
+forceLoadNameModuleInterface hsc_env reason name = do
+ let name_modules = mapMaybe nameModule_maybe [name]
+ forceLoadModuleInterfaces hsc_env reason name_modules
+
+-- | Load the 'TyCon' associated with the given name, come hell or high water. Fails if:
+--
+-- * The interface could not be loaded
+-- * The name is not that of a 'TyCon'
+-- * The name did not exist in the loaded module
+forceLoadTyCon :: HscEnv -> Name -> IO TyCon
+forceLoadTyCon hsc_env con_name = do
+ forceLoadNameModuleInterface hsc_env (ptext (sLit "contains a name used in an invocation of loadTyConTy")) con_name
+
+ mb_con_thing <- lookupTypeHscEnv hsc_env con_name
+ case mb_con_thing of
+ Nothing -> throwCmdLineErrorS $ missingTyThingError con_name
+ Just (ATyCon tycon) -> return tycon
+ Just con_thing -> throwCmdLineErrorS $ wrongTyThingError con_name con_thing
+
+-- | Loads the value corresponding to a 'Name' if that value has the given 'Type'. This only provides limited safety
+-- in that it is up to the user to ensure that that type corresponds to the type you try to use the return value at!
+--
+-- If the value found was not of the correct type, returns @Nothing@. Any other condition results in an exception:
+--
+-- * If we could not load the names module
+-- * If the thing being loaded is not a value
+-- * If the Name does not exist in the module
+-- * If the link failed
+
+getValueSafely :: HscEnv -> Name -> Type -> IO (Maybe a)
+getValueSafely hsc_env val_name expected_type = do
+ forceLoadNameModuleInterface hsc_env (ptext (sLit "contains a name used in an invocation of getValueSafely")) val_name
+
+ -- Now look up the names for the value and type constructor in the type environment
+ mb_val_thing <- lookupTypeHscEnv hsc_env val_name
+ case mb_val_thing of
+ Nothing -> throwCmdLineErrorS $ missingTyThingError val_name
+ Just (AnId id) -> do
+ -- Check the value type in the interface against the type recovered from the type constructor
+ -- before finally casting the value to the type we assume corresponds to that constructor
+ if expected_type `eqType` idType id
+ then do
+ -- Link in the module that contains the value, if it has such a module
+ case nameModule_maybe val_name of
+ Just mod -> do linkModule hsc_env mod
+ return ()
+ Nothing -> return ()
+ -- Find the value that we just linked in and cast it given that we have proved it's type
+ hval <- getHValue hsc_env val_name
+ value <- lessUnsafeCoerce (hsc_dflags hsc_env) "getValueSafely" hval
+ return $ Just value
+ else return Nothing
+ Just val_thing -> throwCmdLineErrorS $ wrongTyThingError val_name val_thing
+
+-- | Finds the 'Name' corresponding to the given 'RdrName' in the context of the 'ModuleName'. Returns @Nothing@ if no
+-- such 'Name' could be found. Any other condition results in an exception:
+--
+-- * If the module could not be found
+-- * If we could not determine the imports of the module
+lookupRdrNameInModule :: HscEnv -> ModuleName -> RdrName -> IO (Maybe Name)
+lookupRdrNameInModule hsc_env mod_name rdr_name = do
+ -- First find the package the module resides in by searching exposed packages and home modules
+ found_module <- findImportedModule hsc_env mod_name Nothing
+ case found_module of
+ Found _ mod -> do
+ -- Find the exports of the module
+ (_, mb_avail_info) <- getModuleExports hsc_env mod
+ case mb_avail_info of
+ Just avail_info -> do
+ -- Try and find the required name in the exports
+ let decl_spec = ImpDeclSpec { is_mod = mod_name, is_as = mod_name, is_qual = False, is_dloc = noSrcSpan }
+ provenance = Imported [ImpSpec decl_spec ImpAll]
+ env = mkGlobalRdrEnv (gresFromAvails provenance avail_info)
+ case [name | gre <- lookupGRE_RdrName rdr_name env, let name = gre_name gre, rdrNameSpace rdr_name == occNameSpace (nameOccName name)] of
+ [name] -> return (Just name)
+ [] -> return Nothing
+ _ -> panic "lookupRdrNameInModule"
+ Nothing -> throwCmdLineErrorS $ hsep [ptext (sLit "Could not determine the exports of the module"), ppr mod_name]
+ err -> throwCmdLineErrorS $ cannotFindModule dflags mod_name err
+ where
+ dflags = hsc_dflags hsc_env
+
+
+wrongTyThingError :: Name -> TyThing -> SDoc
+wrongTyThingError name got_thing = hsep [ptext (sLit "The name"), ppr name, ptext (sLit "is not that of a value but rather a"), pprTyThingCategory got_thing]
+
+missingTyThingError :: Name -> SDoc
+missingTyThingError name = hsep [ptext (sLit "The name"), ppr name, ptext (sLit "is not in the type environment: are you sure it exists?")]
+
+throwCmdLineErrorS :: SDoc -> IO a
+throwCmdLineErrorS = throwCmdLineError . showSDoc
+
+throwCmdLineError :: String -> IO a
+throwCmdLineError = throwGhcException . CmdLineError
+#endif
diff --git a/compiler/main/GhcPlugins.hs b/compiler/main/GhcPlugins.hs
new file mode 100644
index 0000000000..0fc87f0fd0
--- /dev/null
+++ b/compiler/main/GhcPlugins.hs
@@ -0,0 +1,83 @@
+{-# OPTIONS_GHC -fno-warn-duplicate-exports #-}
+
+-- | This module is not used by GHC itself. Rather, it exports all of
+-- the functions and types you are likely to need when writing a
+-- plugin for GHC. So authors of plugins can probably get away simply
+-- with saying "import GhcPlugins".
+--
+-- Particularly interesting modules for plugin writers include
+-- "CoreSyn" and "CoreMonad".
+module GhcPlugins(
+ module CoreMonad,
+ module RdrName, module OccName, module Name, module Var, module Id, module IdInfo,
+ module CoreSyn, module Literal, module DataCon,
+ module CoreUtils, module MkCore, module CoreFVs, module CoreSubst,
+ module Rules, module Annotations,
+ module DynFlags, module Packages,
+ module Module, module Type, module TyCon, module Coercion,
+ module TysWiredIn, module HscTypes, module BasicTypes,
+ module VarSet, module VarEnv, module NameSet, module NameEnv,
+ module UniqSet, module UniqFM, module FiniteMap,
+ module Util, module Serialized, module SrcLoc, module Outputable,
+ module UniqSupply, module Unique, module FastString, module FastTypes
+ ) where
+
+-- Plugin stuff itself
+import CoreMonad
+
+-- Variable naming
+import RdrName
+import OccName hiding ( varName {- conflicts with Var.varName -} )
+import Name hiding ( varName {- reexport from OccName, conflicts with Var.varName -} )
+import Var
+import Id hiding ( lazySetIdInfo, setIdExported, setIdNotExported {- all three conflict with Var -} )
+import IdInfo
+
+-- Core
+import CoreSyn
+import Literal
+import DataCon
+import CoreUtils
+import MkCore
+import CoreFVs
+import CoreSubst
+
+-- Core "extras"
+import Rules
+import Annotations
+
+-- Pipeline-related stuff
+import DynFlags
+import Packages
+
+-- Important GHC types
+import Module
+import Type hiding {- conflict with CoreSubst -}
+ ( substTy, extendTvSubst, extendTvSubstList, isInScope )
+import Coercion hiding {- conflict with CoreSubst -}
+ ( substTy, extendTvSubst, substCo, substTyVarBndr, lookupTyVar )
+import TyCon
+import TysWiredIn
+import HscTypes
+import BasicTypes hiding ( Version {- conflicts with Packages.Version -} )
+
+-- Collections and maps
+import VarSet
+import VarEnv
+import NameSet
+import NameEnv
+import UniqSet
+import UniqFM
+-- Conflicts with UniqFM:
+--import LazyUniqFM
+import FiniteMap
+
+-- Common utilities
+import Util
+import Serialized
+import SrcLoc
+import Outputable
+import UniqSupply
+import Unique ( Unique, Uniquable(..) )
+import FastString
+import FastTypes
diff --git a/compiler/main/HscMain.lhs b/compiler/main/HscMain.lhs
index 6542a06147..a120926717 100644
--- a/compiler/main/HscMain.lhs
+++ b/compiler/main/HscMain.lhs
@@ -118,7 +118,7 @@ import OldCmm ( Cmm )
import PprCmm ( pprCmms )
import CmmParse ( parseCmmFile )
import CmmBuildInfoTables
-import CmmCPS
+import CmmPipeline
import CmmInfo
import OptimizationFuel ( initOptFuelState )
import CmmCvt
@@ -967,34 +967,27 @@ hscCompileCmmFile hsc_env filename
-------------------- Stuff for new code gen ---------------------
tryNewCodeGen :: HscEnv -> Module -> [TyCon]
- -> CollectedCCs
- -> [(StgBinding,[(Id,[Id])])]
- -> HpcInfo
- -> IO [Cmm]
+ -> CollectedCCs
+ -> [(StgBinding,[(Id,[Id])])]
+ -> HpcInfo
+ -> IO [Cmm]
tryNewCodeGen hsc_env this_mod data_tycons
- cost_centre_info stg_binds hpc_info =
- do { let dflags = hsc_dflags hsc_env
+ cost_centre_info stg_binds hpc_info =
+ do { let dflags = hsc_dflags hsc_env
; prog <- StgCmm.codeGen dflags this_mod data_tycons
- cost_centre_info stg_binds hpc_info
- ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "Cmm produced by new codegen"
- (pprCmms prog)
-
- ; prog <- return $ map runCmmContFlowOpts prog
- -- Control flow optimisation
+ cost_centre_info stg_binds hpc_info
+ ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "Cmm produced by new codegen"
+ (pprCmms prog)
-- We are building a single SRT for the entire module, so
-- we must thread it through all the procedures as we cps-convert them.
; us <- mkSplitUniqSupply 'S'
- ; let topSRT = initUs_ us emptySRT
- ; (topSRT, prog) <- foldM (protoCmmCPS hsc_env) (topSRT, []) prog
- -- The main CPS conversion
-
- ; prog <- return $ map runCmmContFlowOpts (srtToData topSRT : prog)
- -- Control flow optimisation, again
+ ; let initTopSRT = initUs_ us emptySRT
+ ; (topSRT, prog) <- foldM (cmmPipeline hsc_env) (initTopSRT, []) prog
- ; let prog' = map cmmOfZgraph prog
- ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" (ppr prog')
- ; return prog' }
+ ; let prog' = map cmmOfZgraph (srtToData topSRT : prog)
+ ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "Output Cmm" (ppr prog')
+ ; return prog' }
optionallyConvertAndOrCPS :: HscEnv -> [Cmm] -> IO [Cmm]
@@ -1014,15 +1007,17 @@ testCmmConversion hsc_env cmm =
dumpIfSet_dyn dflags Opt_D_dump_cvt_cmm "C-- pre-conversion" (ppr cmm)
--continuationC <- cmmCPS dflags abstractC >>= cmmToRawCmm
us <- mkSplitUniqSupply 'C'
- let cvtm = runCmmContFlowOpts `liftM` cmmToZgraph cmm
- let zgraph = initUs_ us cvtm
- us <- mkSplitUniqSupply 'S'
- let topSRT = initUs_ us emptySRT
- (_, [cps_zgraph]) <- protoCmmCPS hsc_env (topSRT, []) zgraph
- let chosen_graph = if dopt Opt_RunCPSZ dflags then cps_zgraph else zgraph
+ let zgraph = initUs_ us (cmmToZgraph cmm)
+ chosen_graph <-
+ if dopt Opt_RunCPSZ dflags
+ then do us <- mkSplitUniqSupply 'S'
+ let topSRT = initUs_ us emptySRT
+ (_, [zgraph]) <- cmmPipeline hsc_env (topSRT, []) zgraph
+ return zgraph
+ else return (runCmmContFlowOpts zgraph)
dumpIfSet_dyn dflags Opt_D_dump_cmmz "C-- Zipper Graph" (ppr chosen_graph)
showPass dflags "Convert from Z back to Cmm"
- let cvt = cmmOfZgraph $ runCmmContFlowOpts $ chosen_graph
+ let cvt = cmmOfZgraph chosen_graph
dumpIfSet_dyn dflags Opt_D_dump_cvt_cmm "C-- post-conversion" (ppr cvt)
return cvt
diff --git a/compiler/nativeGen/Size.hs b/compiler/nativeGen/Size.hs
index 6b5b1aff59..5d939d7d98 100644
--- a/compiler/nativeGen/Size.hs
+++ b/compiler/nativeGen/Size.hs
@@ -12,12 +12,13 @@
-- properly. eg SPARC doesn't care about FF80.
--
module Size (
- Size(..),
- intSize,
- floatSize,
- isFloatSize,
- cmmTypeSize,
- sizeToWidth
+ Size(..),
+ intSize,
+ floatSize,
+ isFloatSize,
+ cmmTypeSize,
+ sizeToWidth,
+ sizeInBytes
)
where
@@ -99,5 +100,6 @@ sizeToWidth size
FF32 -> W32
FF64 -> W64
FF80 -> W80
-
+sizeInBytes :: Size -> Int
+sizeInBytes = widthInBytes . sizeToWidth
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 090136085c..3898f27863 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -55,13 +55,13 @@ import Constants ( wORD_SIZE )
import DynFlags
import Control.Monad ( mapAndUnzipM )
+import Data.Bits
import Data.Maybe ( catMaybes )
import Data.Int
#if WORD_SIZE_IN_BITS==32
import Data.Maybe ( fromJust )
import Data.Word
-import Data.Bits
#endif
sse2Enabled :: NatM Bool
@@ -1504,6 +1504,89 @@ genCCall
-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+-- Unroll memcpy calls if the source and destination pointers are at
+-- least DWORD aligned and the number of bytes to copy isn't too
+-- large. Otherwise, call C's memcpy.
+genCCall (CmmPrim MO_Memcpy) _ [CmmHinted dst _, CmmHinted src _,
+ CmmHinted (CmmLit (CmmInt n _)) _,
+ CmmHinted (CmmLit (CmmInt align _)) _]
+ | n <= maxInlineSizeThreshold && align .&. 3 == 0 = do
+ code_dst <- getAnyReg dst
+ dst_r <- getNewRegNat size
+ code_src <- getAnyReg src
+ src_r <- getNewRegNat size
+ tmp_r <- getNewRegNat size
+ return $ code_dst dst_r `appOL` code_src src_r `appOL`
+ go dst_r src_r tmp_r n
+ where
+ size = if align .&. 4 /= 0 then II32 else archWordSize
+
+ sizeBytes = fromIntegral (sizeInBytes size)
+
+ go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
+ go dst src tmp i
+ | i >= sizeBytes =
+ unitOL (MOV size (OpAddr src_addr) (OpReg tmp)) `appOL`
+ unitOL (MOV size (OpReg tmp) (OpAddr dst_addr)) `appOL`
+ go dst src tmp (i - sizeBytes)
+ -- Deal with remaining bytes.
+ | i >= 4 = -- Will never happen on 32-bit
+ unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
+ unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+ go dst src tmp (i - 4)
+ | i >= 2 =
+ unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
+ unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+ go dst src tmp (i - 2)
+ | i >= 1 =
+ unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
+ unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
+ go dst src tmp (i - 1)
+ | otherwise = nilOL
+ where
+ src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
+ (ImmInteger (n - i))
+ dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
+ (ImmInteger (n - i))
+
+genCCall (CmmPrim MO_Memset) _ [CmmHinted dst _,
+ CmmHinted (CmmLit (CmmInt c _)) _,
+ CmmHinted (CmmLit (CmmInt n _)) _,
+ CmmHinted (CmmLit (CmmInt align _)) _]
+ | n <= maxInlineSizeThreshold && align .&. 3 == 0 = do
+ code_dst <- getAnyReg dst
+ dst_r <- getNewRegNat size
+ return $ code_dst dst_r `appOL` go dst_r n
+ where
+ (size, val) = case align .&. 3 of
+ 2 -> (II16, c2)
+ 0 -> (II32, c4)
+ _ -> (II8, c)
+ c2 = c `shiftL` 8 .|. c
+ c4 = c2 `shiftL` 16 .|. c2
+
+ sizeBytes = fromIntegral (sizeInBytes size)
+
+ go :: Reg -> Integer -> OrdList Instr
+ go dst i
+ -- TODO: Add movabs instruction and support 64-bit sets.
+ | i >= sizeBytes = -- This might be smaller than the below sizes
+ unitOL (MOV size (OpImm (ImmInteger val)) (OpAddr dst_addr)) `appOL`
+ go dst (i - sizeBytes)
+ | i >= 4 = -- Will never happen on 32-bit
+ unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr dst_addr)) `appOL`
+ go dst (i - 4)
+ | i >= 2 =
+ unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr dst_addr)) `appOL`
+ go dst (i - 2)
+ | i >= 1 =
+ unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr dst_addr)) `appOL`
+ go dst (i - 1)
+ | otherwise = nilOL
+ where
+ dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
+ (ImmInteger (n - i))
+
#if i386_TARGET_ARCH
genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
@@ -1874,6 +1957,11 @@ genCCall = panic "X86.genCCAll: not defined"
#endif /* x86_64_TARGET_ARCH */
+-- | We're willing to inline and unroll memcpy/memset calls that touch
+-- at most these many bytes. This threshold is the same as the one
+-- used by GCC and LLVM.
+maxInlineSizeThreshold :: Integer
+maxInlineSizeThreshold = 128
outOfLineCmmOp :: CallishMachOp -> Maybe HintedCmmFormal -> [HintedCmmActual] -> NatM InstrBlock
outOfLineCmmOp mop res args
diff --git a/compiler/prelude/PrelNames.lhs b/compiler/prelude/PrelNames.lhs
index d226cbebdc..4fd23ee712 100644
--- a/compiler/prelude/PrelNames.lhs
+++ b/compiler/prelude/PrelNames.lhs
@@ -219,6 +219,9 @@ basicKnownKeyNames
-- The Either type
, eitherTyConName, leftDataConName, rightDataConName
+ -- Plugins
+ , pluginTyConName
+
-- dotnet interop
, objectTyConName, marshalObjectName, unmarshalObjectName
, marshalStringName, unmarshalStringName, checkDotnetResName
@@ -371,6 +374,12 @@ mkBaseModule m = mkModule basePackageId (mkModuleNameFS m)
mkBaseModule_ :: ModuleName -> Module
mkBaseModule_ m = mkModule basePackageId m
+mkThisGhcModule :: FastString -> Module
+mkThisGhcModule m = mkModule thisGhcPackageId (mkModuleNameFS m)
+
+mkThisGhcModule_ :: ModuleName -> Module
+mkThisGhcModule_ m = mkModule thisGhcPackageId m
+
mkMainModule :: FastString -> Module
mkMainModule m = mkModule mainPackageId (mkModuleNameFS m)
@@ -973,6 +982,12 @@ marshalObjectName = varQual dOTNET (fsLit "marshalObject") marshalObjectIdKey
marshalStringName = varQual dOTNET (fsLit "marshalString") marshalStringIdKey
unmarshalStringName = varQual dOTNET (fsLit "unmarshalString") unmarshalStringIdKey
checkDotnetResName = varQual dOTNET (fsLit "checkResult") checkDotnetResNameIdKey
+
+-- plugins
+cORE_MONAD :: Module
+cORE_MONAD = mkThisGhcModule (fsLit "CoreMonad")
+pluginTyConName :: Name
+pluginTyConName = tcQual cORE_MONAD (fsLit "Plugin") pluginTyConKey
\end{code}
%************************************************************************
@@ -1193,6 +1208,9 @@ csel1CoercionTyConKey = mkPreludeTyConUnique 99
csel2CoercionTyConKey = mkPreludeTyConUnique 100
cselRCoercionTyConKey = mkPreludeTyConUnique 101
+pluginTyConKey :: Unique
+pluginTyConKey = mkPreludeTyConUnique 102
+
unknownTyConKey, unknown1TyConKey, unknown2TyConKey, unknown3TyConKey,
opaqueTyConKey :: Unique
unknownTyConKey = mkPreludeTyConUnique 129
diff --git a/compiler/rename/RnBinds.lhs b/compiler/rename/RnBinds.lhs
index 3052a314fd..86acfa46b0 100644
--- a/compiler/rename/RnBinds.lhs
+++ b/compiler/rename/RnBinds.lhs
@@ -251,7 +251,13 @@ rnLocalValBindsLHS :: MiniFixityEnv
-> HsValBinds RdrName
-> RnM ([Name], HsValBindsLR Name RdrName)
rnLocalValBindsLHS fix_env binds
- = do { -- Do error checking: we need to check for dups here because we
+ = do { binds' <- rnValBindsLHS (localRecNameMaker fix_env) binds
+
+ -- Check for duplicates and shadowing
+ -- Must do this *after* renaming the patterns
+ -- See Note [Collect binders only after renaming] in HsUtils
+
+ -- We need to check for dups here because we
-- don't don't bind all of the variables from the ValBinds at once
-- with bindLocatedLocals any more.
--
@@ -265,10 +271,10 @@ rnLocalValBindsLHS fix_env binds
-- import A(f)
-- g = let f = ... in f
-- should.
- ; binds' <- rnValBindsLHS (localRecNameMaker fix_env) binds
; let bound_names = collectHsValBinders binds'
; envs <- getRdrEnvs
; checkDupAndShadowedNames envs bound_names
+
; return (bound_names, binds') }
-- renames the left-hand sides
diff --git a/compiler/rename/RnEnv.lhs b/compiler/rename/RnEnv.lhs
index 4492b52a60..58df462532 100644
--- a/compiler/rename/RnEnv.lhs
+++ b/compiler/rename/RnEnv.lhs
@@ -35,11 +35,11 @@ module RnEnv (
#include "HsVersions.h"
import LoadIface ( loadInterfaceForName, loadSrcInterface )
-import IfaceEnv ( lookupOrig, newGlobalBinder, newIPName )
+import IfaceEnv ( lookupOrig, newGlobalBinder, newIPName, updNameCache, extendNameCache )
import HsSyn
import RdrHsSyn ( extractHsTyRdrTyVars )
import RdrName
-import HscTypes ( availNames, ModIface(..), FixItem(..), lookupFixity)
+import HscTypes ( NameCache(..), availNames, ModIface(..), FixItem(..), lookupFixity)
import TcEnv ( tcLookupDataCon, tcLookupField, isBrackStage )
import TcRnMonad
import Id ( isRecordSelector )
@@ -90,12 +90,19 @@ newTopSrcBinder (L loc rdr_name)
-- very confused indeed. This test rejects code like
-- data T = (,) Int Int
-- unless we are in GHC.Tup
- ASSERT2( isExternalName name, ppr name )
- do { this_mod <- getModule
- ; unless (this_mod == nameModule name)
- (addErrAt loc (badOrigBinding rdr_name))
- ; return name }
-
+ if isExternalName name then
+ do { this_mod <- getModule
+ ; unless (this_mod == nameModule name)
+ (addErrAt loc (badOrigBinding rdr_name))
+ ; return name }
+ else -- See Note [Binders in Template Haskell] in Convert.hs
+ do { let occ = nameOccName name
+ ; occ `seq` return () -- c.f. seq in newGlobalBinder
+ ; this_mod <- getModule
+ ; updNameCache $ \ ns ->
+ let name' = mkExternalName (nameUnique name) this_mod occ loc
+ ns' = ns { nsNames = extendNameCache (nsNames ns) this_mod occ name' }
+ in (ns', name') }
| Just (rdr_mod, rdr_occ) <- isOrig_maybe rdr_name
= do { this_mod <- getModule
@@ -939,18 +946,20 @@ extendTyVarEnvFVRn tyvars thing_inside = bindLocalNamesFV tyvars thing_inside
-------------------------------------
checkDupRdrNames :: [Located RdrName] -> RnM ()
+-- Check for duplicated names in a binding group
checkDupRdrNames rdr_names_w_loc
- = -- Check for duplicated names in a binding group
- mapM_ (dupNamesErr getLoc) dups
+ = mapM_ (dupNamesErr getLoc) dups
where
(_, dups) = removeDups (\n1 n2 -> unLoc n1 `compare` unLoc n2) rdr_names_w_loc
checkDupNames :: [Name] -> RnM ()
+-- Check for duplicated names in a binding group
checkDupNames names
- = -- Check for duplicated names in a binding group
- mapM_ (dupNamesErr nameSrcSpan) dups
+ = mapM_ (dupNamesErr nameSrcSpan) dups
where
- (_, dups) = removeDups (\n1 n2 -> nameOccName n1 `compare` nameOccName n2) names
+ (_, dups) = removeDups (\n1 n2 -> nameOccName n1 `compare` nameOccName n2) $
+ filterOut isSystemName names
+ -- See Note [Binders in Template Haskell] in Convert
---------------------
checkDupAndShadowedRdrNames :: [Located RdrName] -> RnM ()
diff --git a/compiler/rename/RnPat.lhs b/compiler/rename/RnPat.lhs
index 844a1f90c2..3a60066342 100644
--- a/compiler/rename/RnPat.lhs
+++ b/compiler/rename/RnPat.lhs
@@ -229,12 +229,15 @@ rnPats ctxt pats thing_inside
; bindPatSigTyVarsFV (collectSigTysFromPats pats) $
unCpsRn (rnLPatsAndThen (matchNameMaker ctxt) pats) $ \ pats' -> do
{ -- Check for duplicated and shadowed names
- -- Because we don't bind the vars all at once, we can't
- -- check incrementally for duplicates;
- -- Nor can we check incrementally for shadowing, else we'll
- -- complain *twice* about duplicates e.g. f (x,x) = ...
- ; let names = collectPatsBinders pats'
- ; addErrCtxt doc_pat $ checkDupAndShadowedNames envs_before names
+ -- Must do this *after* renaming the patterns
+ -- See Note [Collect binders only after renaming] in HsUtils
+ -- Because we don't bind the vars all at once, we can't
+ -- check incrementally for duplicates;
+ -- Nor can we check incrementally for shadowing, else we'll
+ -- complain *twice* about duplicates e.g. f (x,x) = ...
+ ; addErrCtxt doc_pat $
+ checkDupAndShadowedNames envs_before $
+ collectPatsBinders pats'
; thing_inside pats' } }
where
doc_pat = ptext (sLit "In") <+> pprMatchContext ctxt
diff --git a/compiler/simplCore/CoreMonad.lhs b/compiler/simplCore/CoreMonad.lhs
index 6ddcff2b26..8e6ec5c870 100644
--- a/compiler/simplCore/CoreMonad.lhs
+++ b/compiler/simplCore/CoreMonad.lhs
@@ -8,10 +8,16 @@
module CoreMonad (
-- * Configuration of the core-to-core passes
- CoreToDo(..),
+ CoreToDo(..), runWhen, runMaybe,
SimplifierMode(..),
FloatOutSwitches(..),
- getCoreToDo, dumpSimplPhase,
+ dumpSimplPhase,
+
+ defaultGentleSimplToDo,
+
+ -- * Plugins
+ PluginPass, Plugin(..), CommandLineOption,
+ defaultPlugin, bindsOnlyPass,
-- * Counting
SimplCount, doSimplTick, doFreeSimplTick, simplCountN,
@@ -198,6 +204,7 @@ showLintWarnings _ = True
%************************************************************************
\begin{code}
+
data CoreToDo -- These are diff core-to-core passes,
-- which may be invoked in any order,
-- as many times as you like.
@@ -205,7 +212,7 @@ data CoreToDo -- These are diff core-to-core passes,
= CoreDoSimplify -- The core-to-core simplifier.
Int -- Max iterations
SimplifierMode
-
+ | CoreDoPluginPass String PluginPass
| CoreDoFloatInwards
| CoreDoFloatOutwards FloatOutSwitches
| CoreLiberateCase
@@ -229,8 +236,12 @@ data CoreToDo -- These are diff core-to-core passes,
| CoreTidy
| CorePrep
+\end{code}
+
+\begin{code}
coreDumpFlag :: CoreToDo -> Maybe DynFlag
coreDumpFlag (CoreDoSimplify {}) = Just Opt_D_dump_simpl_phases
+coreDumpFlag (CoreDoPluginPass {}) = Just Opt_D_dump_core_pipeline
coreDumpFlag CoreDoFloatInwards = Just Opt_D_verbose_core2core
coreDumpFlag (CoreDoFloatOutwards {}) = Just Opt_D_verbose_core2core
coreDumpFlag CoreLiberateCase = Just Opt_D_verbose_core2core
@@ -255,6 +266,7 @@ instance Outputable CoreToDo where
ppr (CoreDoSimplify n md) = ptext (sLit "Simplifier")
<+> ppr md
<+> ptext (sLit "max-iterations=") <> int n
+ ppr (CoreDoPluginPass s _) = ptext (sLit "Core plugin: ") <+> text s
ppr CoreDoFloatInwards = ptext (sLit "Float inwards")
ppr (CoreDoFloatOutwards f) = ptext (sLit "Float out") <> parens (ppr f)
ppr CoreLiberateCase = ptext (sLit "Liberate case")
@@ -327,200 +339,17 @@ pprFloatOutSwitches sw
[ ptext (sLit "Lam =") <+> ppr (floatOutLambdas sw)
, ptext (sLit "Consts =") <+> ppr (floatOutConstants sw)
, ptext (sLit "PAPs =") <+> ppr (floatOutPartialApplications sw) ])
-\end{code}
-
-%************************************************************************
-%* *
- Generating the main optimisation pipeline
-%* *
-%************************************************************************
-
-\begin{code}
-getCoreToDo :: DynFlags -> [CoreToDo]
-getCoreToDo dflags
- = core_todo
- where
- opt_level = optLevel dflags
- phases = simplPhases dflags
- max_iter = maxSimplIterations dflags
- rule_check = ruleCheck dflags
- strictness = dopt Opt_Strictness dflags
- full_laziness = dopt Opt_FullLaziness dflags
- do_specialise = dopt Opt_Specialise dflags
- do_float_in = dopt Opt_FloatIn dflags
- cse = dopt Opt_CSE dflags
- spec_constr = dopt Opt_SpecConstr dflags
- liberate_case = dopt Opt_LiberateCase dflags
- static_args = dopt Opt_StaticArgumentTransformation dflags
- rules_on = dopt Opt_EnableRewriteRules dflags
- eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags
-
- maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
-
- maybe_strictness_before phase
- = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
-
- base_mode = SimplMode { sm_phase = panic "base_mode"
- , sm_names = []
- , sm_rules = rules_on
- , sm_eta_expand = eta_expand_on
- , sm_inline = True
- , sm_case_case = True }
-
- simpl_phase phase names iter
- = CoreDoPasses
- $ [ maybe_strictness_before phase
- , CoreDoSimplify iter
- (base_mode { sm_phase = Phase phase
- , sm_names = names })
-
- , maybe_rule_check (Phase phase) ]
-
- -- Vectorisation can introduce a fair few common sub expressions involving
- -- DPH primitives. For example, see the Reverse test from dph-examples.
- -- We need to eliminate these common sub expressions before their definitions
- -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
- -- so we also run simpl_gently to inline them.
- ++ (if dopt Opt_Vectorise dflags && phase == 3
- then [CoreCSE, simpl_gently]
- else [])
-
- vectorisation
- = runWhen (dopt Opt_Vectorise dflags) $
- CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
-
- -- By default, we have 2 phases before phase 0.
-
- -- Want to run with inline phase 2 after the specialiser to give
- -- maximum chance for fusion to work before we inline build/augment
- -- in phase 1. This made a difference in 'ansi' where an
- -- overloaded function wasn't inlined till too late.
-
- -- Need phase 1 so that build/augment get
- -- inlined. I found that spectral/hartel/genfft lost some useful
- -- strictness in the function sumcode' if augment is not inlined
- -- before strictness analysis runs
- simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
- | phase <- [phases, phases-1 .. 1] ]
-
-
- -- initial simplify: mk specialiser happy: minimum effort please
- simpl_gently = CoreDoSimplify max_iter
- (base_mode { sm_phase = InitialPhase
+-- | A reasonably gentle simplification pass for doing "obvious" simplifications
+defaultGentleSimplToDo :: CoreToDo
+defaultGentleSimplToDo = CoreDoSimplify 4 -- 4 is the default maxSimpleIterations
+ (SimplMode { sm_phase = InitialPhase
, sm_names = ["Gentle"]
- , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
+ , sm_rules = True -- Note [RULEs enabled in SimplGently]
, sm_inline = False
- , sm_case_case = False })
- -- Don't do case-of-case transformations.
- -- This makes full laziness work better
-
- core_todo =
- if opt_level == 0 then
- [vectorisation,
- simpl_phase 0 ["final"] max_iter]
- else {- opt_level >= 1 -} [
-
- -- We want to do the static argument transform before full laziness as it
- -- may expose extra opportunities to float things outwards. However, to fix
- -- up the output of the transformation we need at do at least one simplify
- -- after this before anything else
- runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
-
- -- We run vectorisation here for now, but we might also try to run
- -- it later
- vectorisation,
-
- -- initial simplify: mk specialiser happy: minimum effort please
- simpl_gently,
-
- -- Specialisation is best done before full laziness
- -- so that overloaded functions have all their dictionary lambdas manifest
- runWhen do_specialise CoreDoSpecialising,
-
- runWhen full_laziness $
- CoreDoFloatOutwards FloatOutSwitches {
- floatOutLambdas = Just 0,
- floatOutConstants = True,
- floatOutPartialApplications = False },
- -- Was: gentleFloatOutSwitches
- --
- -- I have no idea why, but not floating constants to
- -- top level is very bad in some cases.
- --
- -- Notably: p_ident in spectral/rewrite
- -- Changing from "gentle" to "constantsOnly"
- -- improved rewrite's allocation by 19%, and
- -- made 0.0% difference to any other nofib
- -- benchmark
- --
- -- Not doing floatOutPartialApplications yet, we'll do
- -- that later on when we've had a chance to get more
- -- accurate arity information. In fact it makes no
- -- difference at all to performance if we do it here,
- -- but maybe we save some unnecessary to-and-fro in
- -- the simplifier.
-
- runWhen do_float_in CoreDoFloatInwards,
-
- simpl_phases,
-
- -- Phase 0: allow all Ids to be inlined now
- -- This gets foldr inlined before strictness analysis
-
- -- At least 3 iterations because otherwise we land up with
- -- huge dead expressions because of an infelicity in the
- -- simpifier.
- -- let k = BIG in foldr k z xs
- -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
- -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
- -- Don't stop now!
- simpl_phase 0 ["main"] (max max_iter 3),
-
- runWhen strictness (CoreDoPasses [
- CoreDoStrictness,
- CoreDoWorkerWrapper,
- CoreDoGlomBinds,
- simpl_phase 0 ["post-worker-wrapper"] max_iter
- ]),
-
- runWhen full_laziness $
- CoreDoFloatOutwards FloatOutSwitches {
- floatOutLambdas = floatLamArgs dflags,
- floatOutConstants = True,
- floatOutPartialApplications = True },
- -- nofib/spectral/hartel/wang doubles in speed if you
- -- do full laziness late in the day. It only happens
- -- after fusion and other stuff, so the early pass doesn't
- -- catch it. For the record, the redex is
- -- f_el22 (f_el21 r_midblock)
-
-
- runWhen cse CoreCSE,
- -- We want CSE to follow the final full-laziness pass, because it may
- -- succeed in commoning up things floated out by full laziness.
- -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
-
- runWhen do_float_in CoreDoFloatInwards,
-
- maybe_rule_check (Phase 0),
-
- -- Case-liberation for -O2. This should be after
- -- strictness analysis and the simplification which follows it.
- runWhen liberate_case (CoreDoPasses [
- CoreLiberateCase,
- simpl_phase 0 ["post-liberate-case"] max_iter
- ]), -- Run the simplifier after LiberateCase to vastly
- -- reduce the possiblility of shadowing
- -- Reason: see Note [Shadowing] in SpecConstr.lhs
-
- runWhen spec_constr CoreDoSpecConstr,
-
- maybe_rule_check (Phase 0),
-
- -- Final clean-up simplification:
- simpl_phase 0 ["final"] max_iter
- ]
+ , sm_eta_expand = False
+ , sm_case_case = False
+ })
-- The core-to-core pass ordering is derived from the DynFlags:
runWhen :: Bool -> CoreToDo -> CoreToDo
@@ -531,6 +360,7 @@ runMaybe :: Maybe a -> (a -> CoreToDo) -> CoreToDo
runMaybe (Just x) f = f x
runMaybe Nothing _ = CoreDoNothing
+
dumpSimplPhase :: DynFlags -> SimplifierMode -> Bool
dumpSimplPhase dflags mode
| Just spec_string <- shouldDumpSimplPhase dflags
@@ -579,6 +409,47 @@ to switch off those rules until after floating.
%************************************************************************
%* *
+ Types for Plugins
+%* *
+%************************************************************************
+
+\begin{code}
+-- | Command line options gathered from the -PModule.Name:stuff syntax are given to you as this type
+type CommandLineOption = String
+
+-- | 'Plugin' is the core compiler plugin data type. Try to avoid
+-- constructing one of these directly, and just modify some fields of
+-- 'defaultPlugin' instead: this is to try and preserve source-code
+-- compatability when we add fields to this.
+--
+-- Nonetheless, this API is preliminary and highly likely to change in the future.
+data Plugin = Plugin {
+ installCoreToDos :: [CommandLineOption] -> [CoreToDo] -> CoreM [CoreToDo]
+ -- ^ Modify the Core pipeline that will be used for compilation.
+ -- This is called as the Core pipeline is built for every module
+ -- being compiled, and plugins get the opportunity to modify
+ -- the pipeline in a nondeterministic order.
+ }
+
+-- | Default plugin: does nothing at all! For compatability reasons you should base all your
+-- plugin definitions on this default value.
+defaultPlugin :: Plugin
+defaultPlugin = Plugin {
+ installCoreToDos = const return
+ }
+
+-- | A description of the plugin pass itself
+type PluginPass = ModGuts -> CoreM ModGuts
+
+bindsOnlyPass :: ([CoreBind] -> CoreM [CoreBind]) -> ModGuts -> CoreM ModGuts
+bindsOnlyPass pass guts
+ = do { binds' <- pass (mg_binds guts)
+ ; return (guts { mg_binds = binds' }) }
+\end{code}
+
+
+%************************************************************************
+%* *
Counting and logging
%* *
%************************************************************************
@@ -955,7 +826,6 @@ liftIOWithCount what = liftIO what >>= (\(count, x) -> addSimplCount count >> re
%************************************************************************
\begin{code}
-
getHscEnv :: CoreM HscEnv
getHscEnv = read cr_hsc_env
@@ -979,7 +849,6 @@ getOrigNameCache :: CoreM OrigNameCache
getOrigNameCache = do
nameCacheRef <- fmap hsc_NC getHscEnv
liftIO $ fmap nsNames $ readIORef nameCacheRef
-
\end{code}
diff --git a/compiler/simplCore/SimplCore.lhs b/compiler/simplCore/SimplCore.lhs
index 59aba4b030..34ffacb208 100644
--- a/compiler/simplCore/SimplCore.lhs
+++ b/compiler/simplCore/SimplCore.lhs
@@ -8,7 +8,7 @@ module SimplCore ( core2core, simplifyExpr ) where
#include "HsVersions.h"
-import DynFlags ( DynFlags, DynFlag(..), dopt )
+import DynFlags
import CoreSyn
import CoreSubst
import HscTypes
@@ -29,7 +29,7 @@ import FloatIn ( floatInwards )
import FloatOut ( floatOutwards )
import FamInstEnv
import Id
-import BasicTypes
+import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma )
import VarSet
import VarEnv
import LiberateCase ( liberateCase )
@@ -45,6 +45,16 @@ import Util
import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
import Outputable
import Control.Monad
+
+#ifdef GHCI
+import Type ( mkTyConTy )
+import RdrName ( mkRdrQual )
+import OccName ( mkVarOcc )
+import PrelNames ( pluginTyConName )
+import DynamicLoading ( forceLoadTyCon, lookupRdrNameInModule, getValueSafely )
+import Module ( ModuleName )
+import Panic
+#endif
\end{code}
%************************************************************************
@@ -57,9 +67,18 @@ import Control.Monad
core2core :: HscEnv -> ModGuts -> IO ModGuts
core2core hsc_env guts
= do { us <- mkSplitUniqSupply 's'
- ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod $
- doCorePasses (getCoreToDo dflags) guts
-
+ -- make sure all plugins are loaded
+
+ ; let builtin_passes = getCoreToDo dflags
+ ;
+ ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base us mod $
+ do { all_passes <- addPluginPasses dflags builtin_passes
+ ; runCorePasses all_passes guts }
+
+{--
+ ; Err.dumpIfSet_dyn dflags Opt_D_dump_core_pipeline
+ "Plugin information" "" -- TODO FIXME: dump plugin info
+--}
; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
"Grand total simplifier statistics"
(pprSimplCount stats)
@@ -75,16 +94,262 @@ core2core hsc_env guts
-- consume the ModGuts to find the module) but somewhat ugly because mg_module may
-- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
-- would mean our cached value would go out of date.
+\end{code}
+
+
+%************************************************************************
+%* *
+ Generating the main optimisation pipeline
+%* *
+%************************************************************************
+
+\begin{code}
+getCoreToDo :: DynFlags -> [CoreToDo]
+getCoreToDo dflags
+ = core_todo
+ where
+ opt_level = optLevel dflags
+ phases = simplPhases dflags
+ max_iter = maxSimplIterations dflags
+ rule_check = ruleCheck dflags
+ strictness = dopt Opt_Strictness dflags
+ full_laziness = dopt Opt_FullLaziness dflags
+ do_specialise = dopt Opt_Specialise dflags
+ do_float_in = dopt Opt_FloatIn dflags
+ cse = dopt Opt_CSE dflags
+ spec_constr = dopt Opt_SpecConstr dflags
+ liberate_case = dopt Opt_LiberateCase dflags
+ static_args = dopt Opt_StaticArgumentTransformation dflags
+ rules_on = dopt Opt_EnableRewriteRules dflags
+ eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags
+
+ maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
+
+ maybe_strictness_before phase
+ = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
+
+ base_mode = SimplMode { sm_phase = panic "base_mode"
+ , sm_names = []
+ , sm_rules = rules_on
+ , sm_eta_expand = eta_expand_on
+ , sm_inline = True
+ , sm_case_case = True }
+
+ simpl_phase phase names iter
+ = CoreDoPasses
+ $ [ maybe_strictness_before phase
+ , CoreDoSimplify iter
+ (base_mode { sm_phase = Phase phase
+ , sm_names = names })
+
+ , maybe_rule_check (Phase phase) ]
+
+ -- Vectorisation can introduce a fair few common sub expressions involving
+ -- DPH primitives. For example, see the Reverse test from dph-examples.
+ -- We need to eliminate these common sub expressions before their definitions
+ -- are inlined in phase 2. The CSE introduces lots of v1 = v2 bindings,
+ -- so we also run simpl_gently to inline them.
+ ++ (if dopt Opt_Vectorise dflags && phase == 3
+ then [CoreCSE, simpl_gently]
+ else [])
+
+ vectorisation
+ = runWhen (dopt Opt_Vectorise dflags) $
+ CoreDoPasses [ simpl_gently, CoreDoVectorisation ]
+
+ -- By default, we have 2 phases before phase 0.
+
+ -- Want to run with inline phase 2 after the specialiser to give
+ -- maximum chance for fusion to work before we inline build/augment
+ -- in phase 1. This made a difference in 'ansi' where an
+ -- overloaded function wasn't inlined till too late.
+
+ -- Need phase 1 so that build/augment get
+ -- inlined. I found that spectral/hartel/genfft lost some useful
+ -- strictness in the function sumcode' if augment is not inlined
+ -- before strictness analysis runs
+ simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
+ | phase <- [phases, phases-1 .. 1] ]
+
+
+ -- initial simplify: mk specialiser happy: minimum effort please
+ simpl_gently = CoreDoSimplify max_iter
+ (base_mode { sm_phase = InitialPhase
+ , sm_names = ["Gentle"]
+ , sm_rules = rules_on -- Note [RULEs enabled in SimplGently]
+ , sm_inline = False
+ , sm_case_case = False })
+ -- Don't do case-of-case transformations.
+ -- This makes full laziness work better
+
+ core_todo =
+ if opt_level == 0 then
+ [vectorisation,
+ simpl_phase 0 ["final"] max_iter]
+ else {- opt_level >= 1 -} [
+
+ -- We want to do the static argument transform before full laziness as it
+ -- may expose extra opportunities to float things outwards. However, to fix
+ -- up the output of the transformation we need at do at least one simplify
+ -- after this before anything else
+ runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
+
+ -- We run vectorisation here for now, but we might also try to run
+ -- it later
+ vectorisation,
+
+ -- initial simplify: mk specialiser happy: minimum effort please
+ simpl_gently,
+
+ -- Specialisation is best done before full laziness
+ -- so that overloaded functions have all their dictionary lambdas manifest
+ runWhen do_specialise CoreDoSpecialising,
+
+ runWhen full_laziness $
+ CoreDoFloatOutwards FloatOutSwitches {
+ floatOutLambdas = Just 0,
+ floatOutConstants = True,
+ floatOutPartialApplications = False },
+ -- Was: gentleFloatOutSwitches
+ --
+ -- I have no idea why, but not floating constants to
+ -- top level is very bad in some cases.
+ --
+ -- Notably: p_ident in spectral/rewrite
+ -- Changing from "gentle" to "constantsOnly"
+ -- improved rewrite's allocation by 19%, and
+ -- made 0.0% difference to any other nofib
+ -- benchmark
+ --
+ -- Not doing floatOutPartialApplications yet, we'll do
+ -- that later on when we've had a chance to get more
+ -- accurate arity information. In fact it makes no
+ -- difference at all to performance if we do it here,
+ -- but maybe we save some unnecessary to-and-fro in
+ -- the simplifier.
+
+ runWhen do_float_in CoreDoFloatInwards,
+
+ simpl_phases,
+
+ -- Phase 0: allow all Ids to be inlined now
+ -- This gets foldr inlined before strictness analysis
+
+ -- At least 3 iterations because otherwise we land up with
+ -- huge dead expressions because of an infelicity in the
+ -- simpifier.
+ -- let k = BIG in foldr k z xs
+ -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs
+ -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
+ -- Don't stop now!
+ simpl_phase 0 ["main"] (max max_iter 3),
+
+ runWhen strictness (CoreDoPasses [
+ CoreDoStrictness,
+ CoreDoWorkerWrapper,
+ CoreDoGlomBinds,
+ simpl_phase 0 ["post-worker-wrapper"] max_iter
+ ]),
+
+ runWhen full_laziness $
+ CoreDoFloatOutwards FloatOutSwitches {
+ floatOutLambdas = floatLamArgs dflags,
+ floatOutConstants = True,
+ floatOutPartialApplications = True },
+ -- nofib/spectral/hartel/wang doubles in speed if you
+ -- do full laziness late in the day. It only happens
+ -- after fusion and other stuff, so the early pass doesn't
+ -- catch it. For the record, the redex is
+ -- f_el22 (f_el21 r_midblock)
+
+
+ runWhen cse CoreCSE,
+ -- We want CSE to follow the final full-laziness pass, because it may
+ -- succeed in commoning up things floated out by full laziness.
+ -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
+
+ runWhen do_float_in CoreDoFloatInwards,
+
+ maybe_rule_check (Phase 0),
+
+ -- Case-liberation for -O2. This should be after
+ -- strictness analysis and the simplification which follows it.
+ runWhen liberate_case (CoreDoPasses [
+ CoreLiberateCase,
+ simpl_phase 0 ["post-liberate-case"] max_iter
+ ]), -- Run the simplifier after LiberateCase to vastly
+ -- reduce the possiblility of shadowing
+ -- Reason: see Note [Shadowing] in SpecConstr.lhs
+
+ runWhen spec_constr CoreDoSpecConstr,
+
+ maybe_rule_check (Phase 0),
+
+ -- Final clean-up simplification:
+ simpl_phase 0 ["final"] max_iter
+ ]
+\end{code}
-type CorePass = CoreToDo
+Loading plugins
-doCorePasses :: [CorePass] -> ModGuts -> CoreM ModGuts
-doCorePasses passes guts
+\begin{code}
+addPluginPasses :: DynFlags -> [CoreToDo] -> CoreM [CoreToDo]
+#ifndef GHCI
+addPluginPasses _ builtin_passes = return builtin_passes
+#else
+addPluginPasses dflags builtin_passes
+ = do { hsc_env <- getHscEnv
+ ; named_plugins <- liftIO (loadPlugins hsc_env)
+ ; foldM query_plug builtin_passes named_plugins }
+ where
+ query_plug todos (mod_nm, plug)
+ = installCoreToDos plug options todos
+ where
+ options = [ option | (opt_mod_nm, option) <- pluginModNameOpts dflags
+ , opt_mod_nm == mod_nm ]
+
+loadPlugins :: HscEnv -> IO [(ModuleName, Plugin)]
+loadPlugins hsc_env
+ = do { let to_load = pluginModNames (hsc_dflags hsc_env)
+ ; plugins <- mapM (loadPlugin hsc_env) to_load
+ ; return $ to_load `zip` plugins }
+
+loadPlugin :: HscEnv -> ModuleName -> IO Plugin
+loadPlugin hsc_env mod_name
+ = do { let plugin_rdr_name = mkRdrQual mod_name (mkVarOcc "plugin")
+ ; mb_name <- lookupRdrNameInModule hsc_env mod_name plugin_rdr_name
+ ; case mb_name of {
+ Nothing -> throwGhcException (CmdLineError $ showSDoc $ hsep
+ [ ptext (sLit "The module"), ppr mod_name
+ , ptext (sLit "did not export the plugin name")
+ , ppr plugin_rdr_name ]) ;
+ Just name ->
+
+ do { plugin_tycon <- forceLoadTyCon hsc_env pluginTyConName
+ ; mb_plugin <- getValueSafely hsc_env name (mkTyConTy plugin_tycon)
+ ; case mb_plugin of
+ Nothing -> throwGhcException (CmdLineError $ showSDoc $ hsep
+ [ ptext (sLit "The value"), ppr name
+ , ptext (sLit "did not have the type")
+ , ppr pluginTyConName, ptext (sLit "as required")])
+ Just plugin -> return plugin } } }
+#endif
+\end{code}
+
+%************************************************************************
+%* *
+ The CoreToDo interpreter
+%* *
+%************************************************************************
+
+\begin{code}
+runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
+runCorePasses passes guts
= foldM do_pass guts passes
where
do_pass guts CoreDoNothing = return guts
- do_pass guts (CoreDoPasses ps) = doCorePasses ps guts
+ do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
do_pass guts pass
= do { dflags <- getDynFlags
; liftIO $ showPass dflags pass
@@ -92,7 +357,7 @@ doCorePasses passes guts
; liftIO $ endPass dflags pass (mg_binds guts') (mg_rules guts')
; return guts' }
-doCorePass :: CorePass -> ModGuts -> CoreM ModGuts
+doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-}
simplifyPgm pass
@@ -128,9 +393,14 @@ doCorePass CoreDoVectorisation = {-# SCC "Vectorise" #-}
doCorePass CoreDoGlomBinds = doPassDM glomBinds
doCorePass CoreDoPrintCore = observe printCore
-doCorePass (CoreDoRuleCheck phase pat) = ruleCheck phase pat
+doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat
doCorePass CoreDoNothing = return
-doCorePass (CoreDoPasses passes) = doCorePasses passes
+doCorePass (CoreDoPasses passes) = runCorePasses passes
+
+#ifdef GHCI
+doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
+#endif
+
doCorePass pass = pprPanic "doCorePass" (ppr pass)
\end{code}
@@ -144,8 +414,8 @@ doCorePass pass = pprPanic "doCorePass" (ppr pass)
printCore :: a -> [CoreBind] -> IO ()
printCore _ binds = Err.dumpIfSet True "Print Core" (pprCoreBindings binds)
-ruleCheck :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
-ruleCheck current_phase pat guts = do
+ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
+ruleCheckPass current_phase pat guts = do
rb <- getRuleBase
dflags <- getDynFlags
liftIO $ Err.showPass dflags "RuleCheck"
diff --git a/compiler/typecheck/TcCanonical.lhs b/compiler/typecheck/TcCanonical.lhs
index 66a37388f1..07ada2bd04 100644
--- a/compiler/typecheck/TcCanonical.lhs
+++ b/compiler/typecheck/TcCanonical.lhs
@@ -475,7 +475,9 @@ canEq fl cv (TyConApp tc1 tys1) (TyConApp tc2 tys2)
-- See Note [Equality between type applications]
-- Note [Care with type applications] in TcUnify
canEq fl cv ty1 ty2
- | Just (s1,t1) <- tcSplitAppTy_maybe ty1
+ | Nothing <- tcView ty1 -- Naked applications ONLY
+ , Nothing <- tcView ty2 -- See Note [Naked given applications]
+ , Just (s1,t1) <- tcSplitAppTy_maybe ty1
, Just (s2,t2) <- tcSplitAppTy_maybe ty2
= if isWanted fl
then do { cv1 <- newCoVar s1 s2
@@ -493,8 +495,12 @@ canEq fl cv ty1 ty2
; cc2 <- canEq fl cv2 t1 t2
; return (cc1 `andCCan` cc2) }
- else return emptyCCan -- We cannot decompose given applications
- -- because we no longer have 'left' and 'right'
+ else do { traceTcS "canEq/(app case)" $
+ text "Ommitting decomposition of given equality between: "
+ <+> ppr ty1 <+> text "and" <+> ppr ty2
+ ; return emptyCCan -- We cannot decompose given applications
+ -- because we no longer have 'left' and 'right'
+ }
canEq fl cv s1@(ForAllTy {}) s2@(ForAllTy {})
| tcIsForAllTy s1, tcIsForAllTy s2,
@@ -513,6 +519,25 @@ canEqFailure :: CtFlavor -> EvVar -> TcS CanonicalCts
canEqFailure fl cv = return (singleCCan (mkFrozenError fl cv))
\end{code}
+Note [Naked given applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider:
+ data A a
+ type T a = A a
+and the given equality:
+ [G] A a ~ T Int
+We will reach the case canEq where we do a tcSplitAppTy_maybe, but if
+we dont have the guards (Nothing <- tcView ty1) (Nothing <- tcView
+ty2) then the given equation is going to fall through and get
+completely forgotten!
+
+What we want instead is this clause to apply only when there is no
+immediate top-level synonym; if there is one it will be later on
+unfolded by the later stages of canEq.
+
+Test-case is in typecheck/should_compile/GivenTypeSynonym.hs
+
+
Note [Equality between type applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If we see an equality of the form s1 t1 ~ s2 t2 we can always split
diff --git a/compiler/vectorise/Vectorise/Builtins.hs b/compiler/vectorise/Vectorise/Builtins.hs
index 3647a7f875..125d26482e 100644
--- a/compiler/vectorise/Vectorise/Builtins.hs
+++ b/compiler/vectorise/Vectorise/Builtins.hs
@@ -8,32 +8,33 @@
-- civilized panic message if the specified thing cannot be found.
--
module Vectorise.Builtins (
- -- * Builtins
- Builtins(..),
- indexBuiltin,
-
- -- * Wrapped selectors
- selTy,
- selReplicate,
- selPick,
- selTags,
- selElements,
- sumTyCon,
- prodTyCon,
- prodDataCon,
- combinePDVar,
- scalarZip,
- closureCtrFun,
+ -- * Builtins
+ Builtins(..),
+ indexBuiltin,
+
+ -- * Wrapped selectors
+ selTy,
+ selReplicate,
+ selPick,
+ selTags,
+ selElements,
+ sumTyCon,
+ prodTyCon,
+ prodDataCon,
+ combinePDVar,
+ scalarZip,
+ closureCtrFun,
- -- * Initialisation
- initBuiltins, initBuiltinVars, initBuiltinTyCons, initBuiltinDataCons,
- initBuiltinPAs, initBuiltinPRs,
- initBuiltinBoxedTyCons, initBuiltinScalars,
-
- -- * Lookup
- primMethod,
- primPArray
+ -- * Initialisation
+ initBuiltins, initBuiltinVars, initBuiltinTyCons, initBuiltinDataCons,
+ initBuiltinPAs, initBuiltinPRs,
+ initBuiltinBoxedTyCons,
+
+ -- * Lookup
+ primMethod,
+ primPArray
) where
+
import Vectorise.Builtins.Base
import Vectorise.Builtins.Modules
import Vectorise.Builtins.Initialise
@@ -48,7 +49,8 @@ import Var
import Control.Monad
--- | Lookup a method function given its name and instance type.
+-- |Lookup a method function given its name and instance type.
+--
primMethod :: TyCon -> String -> Builtins -> DsM (Maybe Var)
primMethod tycon method (Builtins { dphModules = mods })
| Just suffix <- lookupNameEnv prim_ty_cons (tyConName tycon)
@@ -58,7 +60,8 @@ primMethod tycon method (Builtins { dphModules = mods })
| otherwise = return Nothing
--- | Lookup the representation type we use for PArrays that contain a given element type.
+-- |Lookup the representation type we use for PArrays that contain a given element type.
+--
primPArray :: TyCon -> Builtins -> DsM (Maybe TyCon)
primPArray tycon (Builtins { dphModules = mods })
| Just suffix <- lookupNameEnv prim_ty_cons (tyConName tycon)
diff --git a/compiler/vectorise/Vectorise/Builtins/Initialise.hs b/compiler/vectorise/Vectorise/Builtins/Initialise.hs
index 5a6cf88272..9fdf3ba8f5 100644
--- a/compiler/vectorise/Vectorise/Builtins/Initialise.hs
+++ b/compiler/vectorise/Vectorise/Builtins/Initialise.hs
@@ -1,14 +1,13 @@
-
module Vectorise.Builtins.Initialise (
- -- * Initialisation
- initBuiltins, initBuiltinVars, initBuiltinTyCons, initBuiltinDataCons,
- initBuiltinPAs, initBuiltinPRs,
- initBuiltinBoxedTyCons, initBuiltinScalars,
+ -- * Initialisation
+ initBuiltins, initBuiltinVars, initBuiltinTyCons, initBuiltinDataCons,
+ initBuiltinPAs, initBuiltinPRs,
+ initBuiltinBoxedTyCons
) where
+
import Vectorise.Builtins.Base
import Vectorise.Builtins.Modules
-import Vectorise.Builtins.Prelude
import BasicTypes
import PrelNames
@@ -30,20 +29,18 @@ import Outputable
import Control.Monad
import Data.Array
-import Data.List
-
--- | Create the initial map of builtin types and functions.
-initBuiltins
- :: PackageId -- ^ package id the builtins are in, eg dph-common
- -> DsM Builtins
+-- |Create the initial map of builtin types and functions.
+--
+initBuiltins :: PackageId -- ^ package id the builtins are in, eg dph-common
+ -> DsM Builtins
initBuiltins pkg
= do mapM_ load dph_Orphans
-- From dph-common:Data.Array.Parallel.PArray.PData
-- PData is a type family that maps an element type onto the type
-- we use to hold an array of those elements.
- pdataTyCon <- externalTyCon dph_PArray_PData (fsLit "PData")
+ pdataTyCon <- externalTyCon dph_PArray_PData (fsLit "PData")
-- PR is a type class that holds the primitive operators we can
-- apply to array data. Its functions take arrays in terms of PData types.
@@ -53,7 +50,7 @@ initBuiltins pkg
-- From dph-common:Data.Array.Parallel.PArray.PRepr
- preprTyCon <- externalTyCon dph_PArray_PRepr (fsLit "PRepr")
+ preprTyCon <- externalTyCon dph_PArray_PRepr (fsLit "PRepr")
paClass <- externalClass dph_PArray_PRepr (fsLit "PA")
let paTyCon = classTyCon paClass
[paDataCon] = tyConDataCons paTyCon
@@ -62,9 +59,9 @@ initBuiltins pkg
replicatePDVar <- externalVar dph_PArray_PRepr (fsLit "replicatePD")
emptyPDVar <- externalVar dph_PArray_PRepr (fsLit "emptyPD")
packByTagPDVar <- externalVar dph_PArray_PRepr (fsLit "packByTagPD")
- combines <- mapM (externalVar dph_PArray_PRepr)
- [mkFastString ("combine" ++ show i ++ "PD")
- | i <- [2..mAX_DPH_COMBINE]]
+ combines <- mapM (externalVar dph_PArray_PRepr)
+ [mkFastString ("combine" ++ show i ++ "PD")
+ | i <- [2..mAX_DPH_COMBINE]]
let combinePDVars = listArray (2, mAX_DPH_COMBINE) combines
@@ -73,45 +70,45 @@ initBuiltins pkg
-- Scalar is the class of scalar values.
-- The dictionary contains functions to coerce U.Arrays of scalars
-- to and from the PData representation.
- scalarClass <- externalClass dph_PArray_Scalar (fsLit "Scalar")
+ scalarClass <- externalClass dph_PArray_Scalar (fsLit "Scalar")
-- From dph-common:Data.Array.Parallel.Lifted.PArray
-- A PArray (Parallel Array) holds the array length and some array elements
-- represented by the PData type family.
- parrayTyCon <- externalTyCon dph_PArray_Base (fsLit "PArray")
+ parrayTyCon <- externalTyCon dph_PArray_Base (fsLit "PArray")
let [parrayDataCon] = tyConDataCons parrayTyCon
-- From dph-common:Data.Array.Parallel.PArray.Types
- voidTyCon <- externalTyCon dph_PArray_Types (fsLit "Void")
+ voidTyCon <- externalTyCon dph_PArray_Types (fsLit "Void")
voidVar <- externalVar dph_PArray_Types (fsLit "void")
fromVoidVar <- externalVar dph_PArray_Types (fsLit "fromVoid")
- wrapTyCon <- externalTyCon dph_PArray_Types (fsLit "Wrap")
- sum_tcs <- mapM (externalTyCon dph_PArray_Types) (numbered "Sum" 2 mAX_DPH_SUM)
+ wrapTyCon <- externalTyCon dph_PArray_Types (fsLit "Wrap")
+ sum_tcs <- mapM (externalTyCon dph_PArray_Types) (numbered "Sum" 2 mAX_DPH_SUM)
-- from dph-common:Data.Array.Parallel.PArray.PDataInstances
pvoidVar <- externalVar dph_PArray_PDataInstances (fsLit "pvoid")
punitVar <- externalVar dph_PArray_PDataInstances (fsLit "punit")
- closureTyCon <- externalTyCon dph_Closure (fsLit ":->")
+ closureTyCon <- externalTyCon dph_Closure (fsLit ":->")
-- From dph-common:Data.Array.Parallel.Lifted.Unboxed
- sel_tys <- mapM (externalType dph_Unboxed)
- (numbered "Sel" 2 mAX_DPH_SUM)
+ sel_tys <- mapM (externalType dph_Unboxed)
+ (numbered "Sel" 2 mAX_DPH_SUM)
- sel_replicates <- mapM (externalFun dph_Unboxed)
- (numbered_hash "replicateSel" 2 mAX_DPH_SUM)
+ sel_replicates <- mapM (externalFun dph_Unboxed)
+ (numbered_hash "replicateSel" 2 mAX_DPH_SUM)
- sel_picks <- mapM (externalFun dph_Unboxed)
- (numbered_hash "pickSel" 2 mAX_DPH_SUM)
+ sel_picks <- mapM (externalFun dph_Unboxed)
+ (numbered_hash "pickSel" 2 mAX_DPH_SUM)
- sel_tags <- mapM (externalFun dph_Unboxed)
- (numbered "tagsSel" 2 mAX_DPH_SUM)
+ sel_tags <- mapM (externalFun dph_Unboxed)
+ (numbered "tagsSel" 2 mAX_DPH_SUM)
- sel_els <- mapM mk_elements
- [(i,j) | i <- [2..mAX_DPH_SUM], j <- [0..i-1]]
+ sel_els <- mapM mk_elements
+ [(i,j) | i <- [2..mAX_DPH_SUM], j <- [0..i-1]]
let selTys = listArray (2, mAX_DPH_SUM) sel_tys
@@ -123,26 +120,26 @@ initBuiltins pkg
- closureVar <- externalVar dph_Closure (fsLit "closure")
- applyVar <- externalVar dph_Closure (fsLit "$:")
- liftedClosureVar <- externalVar dph_Closure (fsLit "liftedClosure")
- liftedApplyVar <- externalVar dph_Closure (fsLit "liftedApply")
+ closureVar <- externalVar dph_Closure (fsLit "closure")
+ applyVar <- externalVar dph_Closure (fsLit "$:")
+ liftedClosureVar <- externalVar dph_Closure (fsLit "liftedClosure")
+ liftedApplyVar <- externalVar dph_Closure (fsLit "liftedApply")
- scalar_map <- externalVar dph_Scalar (fsLit "scalar_map")
- scalar_zip2 <- externalVar dph_Scalar (fsLit "scalar_zipWith")
- scalar_zips <- mapM (externalVar dph_Scalar)
- (numbered "scalar_zipWith" 3 mAX_DPH_SCALAR_ARGS)
+ scalar_map <- externalVar dph_Scalar (fsLit "scalar_map")
+ scalar_zip2 <- externalVar dph_Scalar (fsLit "scalar_zipWith")
+ scalar_zips <- mapM (externalVar dph_Scalar)
+ (numbered "scalar_zipWith" 3 mAX_DPH_SCALAR_ARGS)
- let scalarZips = listArray (1, mAX_DPH_SCALAR_ARGS)
+ let scalarZips = listArray (1, mAX_DPH_SCALAR_ARGS)
(scalar_map : scalar_zip2 : scalar_zips)
- closures <- mapM (externalVar dph_Closure)
- (numbered "closure" 1 mAX_DPH_SCALAR_ARGS)
+ closures <- mapM (externalVar dph_Closure)
+ (numbered "closure" 1 mAX_DPH_SCALAR_ARGS)
let closureCtrFuns = listArray (1, mAX_DPH_COMBINE) closures
- liftingContext <- liftM (\u -> mkSysLocal (fsLit "lc") u intPrimTy)
- newUnique
+ liftingContext <- liftM (\u -> mkSysLocal (fsLit "lc") u intPrimTy)
+ newUnique
return $ Builtins
{ dphModules = mods
@@ -221,32 +218,26 @@ initBuiltins pkg
-- | Get the mapping of names in the Prelude to names in the DPH library.
--
-initBuiltinVars :: Bool -- FIXME
- -> Builtins -> DsM [(Var, Var)]
-initBuiltinVars compilingDPH (Builtins { dphModules = mods })
+initBuiltinVars :: Builtins -> DsM [(Var, Var)]
+initBuiltinVars (Builtins { dphModules = mods })
= do
- uvars <- zipWithM externalVar umods ufs
- vvars <- zipWithM externalVar vmods vfs
cvars <- zipWithM externalVar cmods cfs
return $ [(v,v) | v <- map dataConWorkId defaultDataConWorkers]
++ zip (map dataConWorkId cons) cvars
- ++ zip uvars vvars
where
- (umods, ufs, vmods, vfs) = if compilingDPH then ([], [], [], []) else unzip4 (preludeVars mods)
- (cons, cmods, cfs) = unzip3 (preludeDataCons mods)
+ (cons, cmods, cfs) = unzip3 (preludeDataCons mods)
defaultDataConWorkers :: [DataCon]
defaultDataConWorkers = [trueDataCon, falseDataCon, unitDataCon]
+ preludeDataCons :: Modules -> [(DataCon, Module, FastString)]
+ preludeDataCons (Modules { dph_Prelude_Tuple = dph_Prelude_Tuple })
+ = [mk_tup n dph_Prelude_Tuple (mkFastString $ "tup" ++ show n) | n <- [2..3]]
+ where
+ mk_tup n mod name = (tupleCon Boxed n, mod, name)
-preludeDataCons :: Modules -> [(DataCon, Module, FastString)]
-preludeDataCons (Modules { dph_Prelude_Tuple = dph_Prelude_Tuple })
- = [mk_tup n dph_Prelude_Tuple (mkFastString $ "tup" ++ show n) | n <- [2..3]]
- where
- mk_tup n mod name = (tupleCon Boxed n, mod, name)
-
-
--- | Get a list of names to `TyCon`s in the mock prelude.
+-- |Get a list of names to `TyCon`s in the mock prelude.
+--
initBuiltinTyCons :: Builtins -> DsM [(Name, TyCon)]
initBuiltinTyCons bi
= do
@@ -260,83 +251,82 @@ initBuiltinTyCons bi
: [(tyConName tc, tc) | tc <- dft_tcs]
- where defaultTyCons :: DsM [TyCon]
- defaultTyCons
- = do word8 <- dsLookupTyCon word8TyConName
- return [intTyCon, boolTyCon, doubleTyCon, word8]
-
+ where
+ defaultTyCons :: DsM [TyCon]
+ defaultTyCons
+ = do word8 <- dsLookupTyCon word8TyConName
+ return [intTyCon, boolTyCon, floatTyCon, doubleTyCon, word8]
--- | Get a list of names to `DataCon`s in the mock prelude.
+-- |Get a list of names to `DataCon`s in the mock prelude.
+--
initBuiltinDataCons :: Builtins -> [(Name, DataCon)]
initBuiltinDataCons _
= [(dataConName dc, dc)| dc <- defaultDataCons]
- where defaultDataCons :: [DataCon]
- defaultDataCons = [trueDataCon, falseDataCon, unitDataCon]
-
+ where
+ defaultDataCons :: [DataCon]
+ defaultDataCons = [trueDataCon, falseDataCon, unitDataCon]
--- | Get the names of all buildin instance functions for the PA class.
+-- |Get the names of all buildin instance functions for the PA class.
+--
initBuiltinPAs :: Builtins -> (InstEnv, InstEnv) -> DsM [(Name, Var)]
initBuiltinPAs (Builtins { dphModules = mods }) insts
= liftM (initBuiltinDicts insts) (externalClass (dph_PArray_PRepr mods) (fsLit "PA"))
-
--- | Get the names of all builtin instance functions for the PR class.
+-- |Get the names of all builtin instance functions for the PR class.
+--
initBuiltinPRs :: Builtins -> (InstEnv, InstEnv) -> DsM [(Name, Var)]
initBuiltinPRs (Builtins { dphModules = mods }) insts
= liftM (initBuiltinDicts insts) (externalClass (dph_PArray_PData mods) (fsLit "PR"))
-
--- | Get the names of all DPH instance functions for this class.
+-- |Get the names of all DPH instance functions for this class.
+--
initBuiltinDicts :: (InstEnv, InstEnv) -> Class -> [(Name, Var)]
initBuiltinDicts insts cls = map find $ classInstances insts cls
where
- find i | [Just tc] <- instanceRoughTcs i = (tc, instanceDFunId i)
- | otherwise = pprPanic "Invalid DPH instance" (ppr i)
-
+ find i | [Just tc] <- instanceRoughTcs i = (tc, instanceDFunId i)
+ | otherwise = pprPanic "Invalid DPH instance" (ppr i)
--- | Get a list of boxed `TyCons` in the mock prelude. This is Int only.
+-- |Get a list of boxed `TyCons` in the mock prelude. This is Int only.
+--
initBuiltinBoxedTyCons :: Builtins -> DsM [(Name, TyCon)]
initBuiltinBoxedTyCons
= return . builtinBoxedTyCons
- where builtinBoxedTyCons :: Builtins -> [(Name, TyCon)]
- builtinBoxedTyCons _
- = [(tyConName intPrimTyCon, intTyCon)]
+ where
+ builtinBoxedTyCons :: Builtins -> [(Name, TyCon)]
+ builtinBoxedTyCons _
+ = [(tyConName intPrimTyCon, intTyCon)]
--- | Get a list of all scalar functions in the mock prelude.
---
-initBuiltinScalars :: Bool
- -> Builtins -> DsM [Var]
-initBuiltinScalars True _bi = return []
-initBuiltinScalars False bi = mapM (uncurry externalVar) (preludeScalars $ dphModules bi)
--- | Lookup some variable given its name and the module that contains it.
+-- Auxilliary look up functions ----------------
+
+-- Lookup some variable given its name and the module that contains it.
+--
externalVar :: Module -> FastString -> DsM Var
externalVar mod fs
= dsLookupGlobalId =<< lookupOrig mod (mkVarOccFS fs)
-
--- | Like `externalVar` but wrap the `Var` in a `CoreExpr`
+-- Like `externalVar` but wrap the `Var` in a `CoreExpr`.
+--
externalFun :: Module -> FastString -> DsM CoreExpr
externalFun mod fs
= do var <- externalVar mod fs
return $ Var var
-
--- | Lookup some `TyCon` given its name and the module that contains it.
+-- Lookup some `TyCon` given its name and the module that contains it.
+--
externalTyCon :: Module -> FastString -> DsM TyCon
externalTyCon mod fs
= dsLookupTyCon =<< lookupOrig mod (mkTcOccFS fs)
-
--- | Lookup some `Type` given its name and the module that contains it.
+-- Lookup some `Type` given its name and the module that contains it.
+--
externalType :: Module -> FastString -> DsM Type
externalType mod fs
= do tycon <- externalTyCon mod fs
return $ mkTyConApp tycon []
-
--- | Lookup some `Class` given its name and the module that contains it.
+-- Lookup some `Class` given its name and the module that contains it.
+--
externalClass :: Module -> FastString -> DsM Class
externalClass mod fs
= dsLookupClass =<< lookupOrig mod (mkClsOccFS fs)
-
diff --git a/compiler/vectorise/Vectorise/Builtins/Modules.hs b/compiler/vectorise/Vectorise/Builtins/Modules.hs
index 6ea3595d53..c75057129b 100644
--- a/compiler/vectorise/Vectorise/Builtins/Modules.hs
+++ b/compiler/vectorise/Vectorise/Builtins/Modules.hs
@@ -22,13 +22,8 @@ data Modules
, dph_Closure :: Module
, dph_Unboxed :: Module
- , dph_Combinators :: Module
, dph_Scalar :: Module
- , dph_Prelude_Int :: Module
- , dph_Prelude_Word8 :: Module
- , dph_Prelude_Double :: Module
- , dph_Prelude_Bool :: Module
, dph_Prelude_Tuple :: Module
}
@@ -48,13 +43,8 @@ dph_Modules pkg
, dph_Closure = mk (fsLit "Data.Array.Parallel.Lifted.Closure")
, dph_Unboxed = mk (fsLit "Data.Array.Parallel.Lifted.Unboxed")
- , dph_Combinators = mk (fsLit "Data.Array.Parallel.Lifted.Combinators")
, dph_Scalar = mk (fsLit "Data.Array.Parallel.Lifted.Scalar")
- , dph_Prelude_Int = mk (fsLit "Data.Array.Parallel.Prelude.Base.Int")
- , dph_Prelude_Word8 = mk (fsLit "Data.Array.Parallel.Prelude.Base.Word8")
- , dph_Prelude_Double = mk (fsLit "Data.Array.Parallel.Prelude.Base.Double")
- , dph_Prelude_Bool = mk (fsLit "Data.Array.Parallel.Prelude.Base.Bool")
, dph_Prelude_Tuple = mk (fsLit "Data.Array.Parallel.Prelude.Base.Tuple")
}
where mk = mkModule pkg . mkModuleNameFS
diff --git a/compiler/vectorise/Vectorise/Builtins/Prelude.hs b/compiler/vectorise/Vectorise/Builtins/Prelude.hs
deleted file mode 100644
index a59f9369aa..0000000000
--- a/compiler/vectorise/Vectorise/Builtins/Prelude.hs
+++ /dev/null
@@ -1,209 +0,0 @@
-
--- WARNING: This module is a temporary kludge. It will soon go away entirely (once
--- VECTORISE SCALAR pragmas are fully implemented.)
-
--- | Mapping of prelude functions to vectorised versions.
--- Functions like filterP currently have a working but naive version in GHC.PArr
--- During vectorisation we replace these by calls to filterPA, which are
--- defined in dph-common Data.Array.Parallel.Lifted.Combinators
---
--- As renamer only sees the GHC.PArr functions, if you want to add a new function
--- to the vectoriser there has to be a definition for it in GHC.PArr, even though
--- it will never be used at runtime.
---
-module Vectorise.Builtins.Prelude
- ( preludeVars
- , preludeScalars)
-where
-import Vectorise.Builtins.Modules
-import PrelNames
-import Module
-import FastString
-
-
-preludeVars :: Modules
- -> [( Module, FastString -- Maps the original variable to the one in the DPH
- , Module, FastString)] -- packages that it should be rewritten to.
-preludeVars (Modules { dph_Combinators = _dph_Combinators
- , dph_Prelude_Int = dph_Prelude_Int
- , dph_Prelude_Word8 = dph_Prelude_Word8
- -- , dph_Prelude_Double = dph_Prelude_Double
- , dph_Prelude_Bool = dph_Prelude_Bool
- })
-
- = [
- -- Map scalar functions to versions using closures.
- mk' dph_Prelude_Int "div" "divV"
- , mk' dph_Prelude_Int "mod" "modV"
- , mk' dph_Prelude_Int "sqrt" "sqrtV"
- , mk' dph_Prelude_Int "enumFromToP" "enumFromToPA"
- ]
- ++ vars_Ord dph_Prelude_Int
- ++ vars_Num dph_Prelude_Int
-
- ++ vars_Ord dph_Prelude_Word8
- ++ vars_Num dph_Prelude_Word8
- ++
- [ mk' dph_Prelude_Word8 "div" "divV"
- , mk' dph_Prelude_Word8 "mod" "modV"
- , mk' dph_Prelude_Word8 "fromInt" "fromIntV"
- , mk' dph_Prelude_Word8 "toInt" "toIntV"
- ]
-
- -- ++ vars_Ord dph_Prelude_Double
- -- ++ vars_Num dph_Prelude_Double
- -- ++ vars_Fractional dph_Prelude_Double
- -- ++ vars_Floating dph_Prelude_Double
- -- ++ vars_RealFrac dph_Prelude_Double
- ++
- [ mk dph_Prelude_Bool (fsLit "andP") dph_Prelude_Bool (fsLit "andPA")
- , mk dph_Prelude_Bool (fsLit "orP") dph_Prelude_Bool (fsLit "orPA")
-
- , mk gHC_CLASSES (fsLit "not") dph_Prelude_Bool (fsLit "notV")
- , mk gHC_CLASSES (fsLit "&&") dph_Prelude_Bool (fsLit "andV")
- , mk gHC_CLASSES (fsLit "||") dph_Prelude_Bool (fsLit "orV")
- ]
- where
- mk = (,,,)
- mk' mod v v' = mk mod (fsLit v) mod (fsLit v')
-
- vars_Ord mod
- = [ mk' mod "==" "eqV"
- , mk' mod "/=" "neqV"
- , mk' mod "<=" "leV"
- , mk' mod "<" "ltV"
- , mk' mod ">=" "geV"
- , mk' mod ">" "gtV"
- , mk' mod "min" "minV"
- , mk' mod "max" "maxV"
- , mk' mod "minimumP" "minimumPA"
- , mk' mod "maximumP" "maximumPA"
- , mk' mod "minIndexP" "minIndexPA"
- , mk' mod "maxIndexP" "maxIndexPA"
- ]
-
- vars_Num mod
- = [ mk' mod "+" "plusV"
- , mk' mod "-" "minusV"
- , mk' mod "*" "multV"
- , mk' mod "negate" "negateV"
- , mk' mod "abs" "absV"
- , mk' mod "sumP" "sumPA"
- , mk' mod "productP" "productPA"
- ]
-
- -- vars_Fractional mod
- -- = [ mk' mod "/" "divideV"
- -- , mk' mod "recip" "recipV"
- -- ]
- --
- -- vars_Floating mod
- -- = [ mk' mod "pi" "pi"
- -- , mk' mod "exp" "expV"
- -- , mk' mod "sqrt" "sqrtV"
- -- , mk' mod "log" "logV"
- -- , mk' mod "sin" "sinV"
- -- , mk' mod "tan" "tanV"
- -- , mk' mod "cos" "cosV"
- -- , mk' mod "asin" "asinV"
- -- , mk' mod "atan" "atanV"
- -- , mk' mod "acos" "acosV"
- -- , mk' mod "sinh" "sinhV"
- -- , mk' mod "tanh" "tanhV"
- -- , mk' mod "cosh" "coshV"
- -- , mk' mod "asinh" "asinhV"
- -- , mk' mod "atanh" "atanhV"
- -- , mk' mod "acosh" "acoshV"
- -- , mk' mod "**" "powV"
- -- , mk' mod "logBase" "logBaseV"
- -- ]
- --
- -- vars_RealFrac mod
- -- = [ mk' mod "fromInt" "fromIntV"
- -- , mk' mod "truncate" "truncateV"
- -- , mk' mod "round" "roundV"
- -- , mk' mod "ceiling" "ceilingV"
- -- , mk' mod "floor" "floorV"
- -- ]
- --
-preludeScalars :: Modules -> [(Module, FastString)]
-preludeScalars (Modules { dph_Prelude_Int = dph_Prelude_Int
- , dph_Prelude_Word8 = dph_Prelude_Word8
- , dph_Prelude_Double = dph_Prelude_Double
- })
- = [ mk dph_Prelude_Int "div"
- , mk dph_Prelude_Int "mod"
- , mk dph_Prelude_Int "sqrt"
- ]
- ++ scalars_Ord dph_Prelude_Int
- ++ scalars_Num dph_Prelude_Int
-
- ++ scalars_Ord dph_Prelude_Word8
- ++ scalars_Num dph_Prelude_Word8
- ++
- [ mk dph_Prelude_Word8 "div"
- , mk dph_Prelude_Word8 "mod"
- , mk dph_Prelude_Word8 "fromInt"
- , mk dph_Prelude_Word8 "toInt"
- ]
-
- ++ scalars_Ord dph_Prelude_Double
- ++ scalars_Num dph_Prelude_Double
- ++ scalars_Fractional dph_Prelude_Double
- ++ scalars_Floating dph_Prelude_Double
- ++ scalars_RealFrac dph_Prelude_Double
- where
- mk mod s = (mod, fsLit s)
-
- scalars_Ord mod
- = [ mk mod "=="
- , mk mod "/="
- , mk mod "<="
- , mk mod "<"
- , mk mod ">="
- , mk mod ">"
- , mk mod "min"
- , mk mod "max"
- ]
-
- scalars_Num mod
- = [ mk mod "+"
- , mk mod "-"
- , mk mod "*"
- , mk mod "negate"
- , mk mod "abs"
- ]
-
- scalars_Fractional mod
- = [ mk mod "/"
- , mk mod "recip"
- ]
-
- scalars_Floating mod
- = [ mk mod "pi"
- , mk mod "exp"
- , mk mod "sqrt"
- , mk mod "log"
- , mk mod "sin"
- , mk mod "tan"
- , mk mod "cos"
- , mk mod "asin"
- , mk mod "atan"
- , mk mod "acos"
- , mk mod "sinh"
- , mk mod "tanh"
- , mk mod "cosh"
- , mk mod "asinh"
- , mk mod "atanh"
- , mk mod "acosh"
- , mk mod "**"
- , mk mod "logBase"
- ]
-
- scalars_RealFrac mod
- = [ mk mod "fromInt"
- , mk mod "truncate"
- , mk mod "round"
- , mk mod "ceiling"
- , mk mod "floor"
- ]
diff --git a/compiler/vectorise/Vectorise/Env.hs b/compiler/vectorise/Vectorise/Env.hs
index 97bb5aef69..d70f09affd 100644
--- a/compiler/vectorise/Vectorise/Env.hs
+++ b/compiler/vectorise/Vectorise/Env.hs
@@ -10,7 +10,6 @@ module Vectorise.Env (
GlobalEnv(..),
initGlobalEnv,
extendImportedVarsEnv,
- extendScalars,
setFamEnv,
extendFamEnv,
extendTyConsEnv,
@@ -46,18 +45,18 @@ data Scope a b
-- LocalEnv -------------------------------------------------------------------
-- | The local environment.
data LocalEnv
- = LocalEnv {
+ = LocalEnv {
-- Mapping from local variables to their vectorised and lifted versions.
- local_vars :: VarEnv (Var, Var)
+ local_vars :: VarEnv (Var, Var)
-- In-scope type variables.
- , local_tyvars :: [TyVar]
+ , local_tyvars :: [TyVar]
-- Mapping from tyvars to their PA dictionaries.
- , local_tyvar_pa :: VarEnv CoreExpr
+ , local_tyvar_pa :: VarEnv CoreExpr
-- Local binding name.
- , local_bind_name :: FastString
+ , local_bind_name :: FastString
}
@@ -163,12 +162,6 @@ extendImportedVarsEnv :: [(Var, Var)] -> GlobalEnv -> GlobalEnv
extendImportedVarsEnv ps genv
= genv { global_vars = extendVarEnvList (global_vars genv) ps }
--- |Extend the set of scalar variables in an environment.
---
-extendScalars :: [Var] -> GlobalEnv -> GlobalEnv
-extendScalars vs genv
- = genv { global_scalar_vars = extendVarSetList (global_scalar_vars genv) vs }
-
-- |Set the list of type family instances in an environment.
--
setFamEnv :: FamInstEnv -> GlobalEnv -> GlobalEnv
diff --git a/compiler/vectorise/Vectorise/Monad.hs b/compiler/vectorise/Vectorise/Monad.hs
index 73cba88a3b..e690077192 100644
--- a/compiler/vectorise/Vectorise/Monad.hs
+++ b/compiler/vectorise/Vectorise/Monad.hs
@@ -65,13 +65,11 @@ initV hsc_env guts info thing_inside
Just pkg -> do {
-- set up tables of builtin entities
- ; let compilingDPH = dphBackend dflags == DPHThis -- FIXME: temporary kludge support
; builtins <- initBuiltins pkg
- ; builtin_vars <- initBuiltinVars compilingDPH builtins
+ ; builtin_vars <- initBuiltinVars builtins
; builtin_tycons <- initBuiltinTyCons builtins
; let builtin_datacons = initBuiltinDataCons builtins
; builtin_boxed <- initBuiltinBoxedTyCons builtins
- ; builtin_scalars <- initBuiltinScalars compilingDPH builtins
-- set up class and type family envrionments
; eps <- liftIO $ hscEPS hsc_env
@@ -83,7 +81,6 @@ initV hsc_env guts info thing_inside
-- construct the initial global environment
; let thing_inside' = traceVt "VectDecls" (ppr (mg_vect_decls guts)) >> thing_inside
; let genv = extendImportedVarsEnv builtin_vars
- . extendScalars builtin_scalars
. extendTyConsEnv builtin_tycons
. extendDataConsEnv builtin_datacons
. extendPAFunsEnv builtin_pas
diff --git a/validate b/validate
index 3ca888fba1..0a0677e908 100755
--- a/validate
+++ b/validate
@@ -95,10 +95,13 @@ $make test_bindist TEST_PREP=YES
#
bindistdir="bindisttest/install dir"
cd libraries/mtl
-"$thisdir/$bindistdir/bin/runhaskell" Setup.hs configure --with-ghc="$thisdir/$bindistdir/bin/ghc" --global --builddir=dist-bindist --prefix="$thisdir/$bindistdir"
-"$thisdir/$bindistdir/bin/runhaskell" Setup.hs build --builddir=dist-bindist
-"$thisdir/$bindistdir/bin/runhaskell" Setup.hs install --builddir=dist-bindist
-"$thisdir/$bindistdir/bin/runhaskell" Setup.hs clean --builddir=dist-bindist
+"$thisdir/$bindistdir/bin/ghc" --make Setup
+./Setup configure --with-ghc="$thisdir/$bindistdir/bin/ghc" --with-haddock="$thisdir/$bindistdir/bin/haddock" --global --builddir=dist-bindist --prefix="$thisdir/$bindistdir"
+./Setup build --builddir=dist-bindist
+./Setup haddock --builddir=dist-bindist
+./Setup install --builddir=dist-bindist
+./Setup clean --builddir=dist-bindist
+rm -f Setup Setup.exe Setup.hi Setup.o
cd $thisdir
fi # testsuite-only