Merge remote-tracking branch 'origin/master' into ghc-parmake-gsoc

author: Patrick Palka <patrick@parcs.ath.cx> 2013-09-04 12:10:27 -0400
committer: Patrick Palka <patrick@parcs.ath.cx> 2013-09-04 12:10:27 -0400
commit: d127a697192851ea6bf308525a8a8895da71b639 (patch)
tree: 895f8976273df8e96b7c52382529ca0dfee61a5a
parent: a2e338f3ae5a101d333fb260ed58ec238106e88e (diff)
parent: 32ade417f7e82b6fbcb6f1c93871ba3141a8f5c8 (diff)
download: haskell-d127a697192851ea6bf308525a8a8895da71b639.tar.gz
78 files changed, 1120 insertions, 781 deletions
diff --git a/aclocal.m4 b/aclocal.m4
index 7e555446a1..151cc2ea2c 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1184,6 +1184,7 @@ AC_SUBST(GccLT46)
 
 dnl Check to see if the C compiler is clang or llvm-gcc
 dnl
+GccIsClang=NO
 AC_DEFUN([FP_CC_LLVM_BACKEND],
 [AC_REQUIRE([AC_PROG_CC])
 AC_MSG_CHECKING([whether C compiler is clang])
@@ -1191,6 +1192,7 @@ $CC -x c /dev/null -dM -E > conftest.txt 2>&1
 if grep "__clang__" conftest.txt >/dev/null 2>&1; then
   AC_SUBST([CC_CLANG_BACKEND], [1])
   AC_SUBST([CC_LLVM_BACKEND], [1])
+  GccIsClang=YES
   AC_MSG_RESULT([yes])
 else
   AC_MSG_RESULT([no])
@@ -1205,6 +1207,7 @@ else
     AC_MSG_RESULT([no])
   fi
 fi
+AC_SUBST(GccIsClang)
 
 rm -f conftest.txt
 ])
@@ -2049,7 +2052,16 @@ AC_DEFUN([FIND_GCC],[
     then
         $1="$CC"
     else
-        FP_ARG_WITH_PATH_GNU_PROG([$1], [$2], [$3])
+        FP_ARG_WITH_PATH_GNU_PROG_OPTIONAL([$1], [$2], [$3])
+        # From Xcode 5 on, OS X command line tools do not include gcc anymore. Use clang.
+        if test -z "$$1"
+        then
+            FP_ARG_WITH_PATH_GNU_PROG_OPTIONAL([$1], [clang], [clang])
+        fi
+        if test -z "$$1"
+        then
+            AC_MSG_ERROR([cannot find $3 nor clang in your PATH])
+        fi
     fi
     AC_SUBST($1)
 ])
diff --git a/boot b/boot
index 3c17e4c94f..13747421ed 100755
--- a/boot
+++ b/boot
@@ -58,7 +58,7 @@ sub sanity_check_tree {
         if (/^#/) {
             # Comment; do nothing
         }
-        elsif (/^([a-zA-Z0-9\/.-]+) +([^ ]+) +[^ ]+$/) {
+        elsif (/^([a-zA-Z0-9\/.-]+) +([^ ]+) +[^ ]+ +[^ ]+$/) {
             $dir = $1;
             $tag = $2;
 
diff --git a/compiler/basicTypes/MkId.lhs b/compiler/basicTypes/MkId.lhs
index 14e29c1d99..21553ab4f9 100644
--- a/compiler/basicTypes/MkId.lhs
+++ b/compiler/basicTypes/MkId.lhs
@@ -695,8 +695,7 @@ dataConArgUnpack arg_ty
     -- An interface file specified Unpacked, but we couldn't unpack it
 
 isUnpackableType :: FamInstEnvs -> Type -> Bool
--- True if we can unpack the UNPACK fields of the constructor
--- without involving the NameSet tycons
+-- True if we can unpack the UNPACK the argument type 
 -- See Note [Recursive unboxing]
 -- We look "deeply" inside rather than relying on the DataCons
 -- we encounter on the way, because otherwise we might well
@@ -730,9 +729,11 @@ isUnpackableType fam_envs ty
          -- NB: dataConStrictMarks gives the *user* request; 
          -- We'd get a black hole if we used dataConRepBangs
 
-    attempt_unpack (HsUnpack {})              = True
-    attempt_unpack (HsUserBang (Just unpk) _) = unpk
-    attempt_unpack _                          = False
+    attempt_unpack (HsUnpack {})                 = True
+    attempt_unpack (HsUserBang (Just unpk) bang) = bang && unpk
+    attempt_unpack (HsUserBang Nothing bang)     = bang  -- Be conservative
+    attempt_unpack HsStrict                      = False
+    attempt_unpack HsNoBang                      = False
 \end{code}
 
 Note [Unpack one-wide fields]
@@ -761,14 +762,26 @@ Here we can represent T with an Int#.
 
 Note [Recursive unboxing]
 ~~~~~~~~~~~~~~~~~~~~~~~~~
-Be careful not to try to unbox this!
-	data T = MkT {-# UNPACK #-} !T Int
-Reason: consider
+Consider
   data R = MkR {-# UNPACK #-} !S Int
   data S = MkS {-# UNPACK #-} !Int
 The representation arguments of MkR are the *representation* arguments
-of S (plus Int); the rep args of MkS are Int#.  This is obviously no
-good for T, because then we'd get an infinite number of arguments.
+of S (plus Int); the rep args of MkS are Int#.  This is all fine.
+
+But be careful not to try to unbox this!
+	data T = MkT {-# UNPACK #-} !T Int
+Because then we'd get an infinite number of arguments.
+
+Here is a more complicated case:
+	data S = MkS {-# UNPACK #-} !T Int
+	data T = MkT {-# UNPACK #-} !S Int
+Each of S and T must decide independendently whether to unpack
+and they had better not both say yes. So they must both say no.
+
+Also behave conservatively when there is no UNPACK pragma
+	data T = MkS !T Int
+with -funbox-strict-fields or -funbox-small-strict-fields
+we need to behave as if there was an UNPACK pragma there.
 
 But it's the *argument* type that matters. This is fine:
 	data S = MkS S !Int
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
index 47811bcd7f..7a4fb98a79 100644
--- a/compiler/cmm/CmmNode.hs
+++ b/compiler/cmm/CmmNode.hs
@@ -52,7 +52,7 @@ data CmmNode e x where
       [CmmActual] ->            -- zero or more arguments
       CmmNode O O
       -- Semantics: clobbers any GlobalRegs for which callerSaves r == True
-      -- See Note [foreign calls clobber GlobalRegs]
+      -- See Note [Unsafe foreign calls clobber caller-save registers]
       --
       -- Invariant: the arguments and the ForeignTarget must not
       -- mention any registers for which CodeGen.Platform.callerSaves
@@ -158,8 +158,8 @@ made manifest in CmmLayoutStack, where they are lowered into the above
 sequence.
 -}
 
-{- Note [foreign calls clobber GlobalRegs]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{- Note [Unsafe foreign calls clobber caller-save registers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 A foreign call is defined to clobber any GlobalRegs that are mapped to
 caller-saves machine registers (according to the prevailing C ABI).
@@ -329,8 +329,9 @@ instance DefinerOfRegs GlobalReg (CmmNode e x) where
   foldRegsDefd dflags f z n = case n of
     CmmAssign lhs _ -> fold f z lhs
     CmmUnsafeForeignCall tgt _ _  -> fold f z (foreignTargetRegs tgt)
-    CmmCall {} -> fold f z activeRegs
-    CmmForeignCall {tgt=tgt} -> fold f z (foreignTargetRegs tgt)
+    CmmCall        {} -> fold f z activeRegs
+    CmmForeignCall {} -> fold f z activeRegs
+                      -- See Note [Safe foreign calls clobber STG registers]
     _ -> z
     where fold :: forall a b.
                    DefinerOfRegs GlobalReg a =>
@@ -344,6 +345,74 @@ instance DefinerOfRegs GlobalReg (CmmNode e x) where
           foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = []
           foreignTargetRegs _ = activeCallerSavesRegs
 
+-- Note [Safe foreign calls clobber STG registers]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- During stack layout phase every safe foreign call is expanded into a block
+-- that contains unsafe foreign call (instead of safe foreign call) and ends
+-- with a normal call (See Note [Foreign calls]). This means that we must
+-- treat safe foreign call as if it was a normal call (because eventually it
+-- will be). This is important if we try to run sinking pass before stack
+-- layout phase. Consider this example of what might go wrong (this is cmm
+-- code from stablename001 test). Here is code after common block elimination
+-- (before stack layout):
+--
+--  c1q6:
+--      _s1pf::P64 = R1;
+--      _c1q8::I64 = performMajorGC;
+--      I64[(young<c1q9> + 8)] = c1q9;
+--      foreign call "ccall" arg hints:  []  result hints:  [] (_c1q8::I64)(...)
+--                   returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
+--  c1q9:
+--      I64[(young<c1qb> + 8)] = c1qb;
+--      R1 = _s1pc::P64;
+--      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- If we run sinking pass now (still before stack layout) we will get this:
+--
+--  c1q6:
+--      I64[(young<c1q9> + 8)] = c1q9;
+--      foreign call "ccall" arg hints:  []  result hints:  [] performMajorGC(...)
+--                   returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8;
+--  c1q9:
+--      I64[(young<c1qb> + 8)] = c1qb;
+--      _s1pf::P64 = R1;         <------ _s1pf sunk past safe foreign call
+--      R1 = _s1pc::P64;
+--      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- Notice that _s1pf was sunk past a foreign call. When we run stack layout
+-- safe call to performMajorGC will be turned into:
+--
+--  c1q6:
+--      _s1pc::P64 = P64[Sp + 8];
+--      I64[Sp - 8] = c1q9;
+--      Sp = Sp - 8;
+--      I64[I64[CurrentTSO + 24] + 16] = Sp;
+--      P64[CurrentNursery + 8] = Hp + 8;
+--      (_u1qI::I64) = call "ccall" arg hints:  [PtrHint,]
+--                           result hints:  [PtrHint] suspendThread(BaseReg, 0);
+--      call "ccall" arg hints:  []  result hints:  [] performMajorGC();
+--      (_u1qJ::I64) = call "ccall" arg hints:  [PtrHint]
+--                           result hints:  [PtrHint] resumeThread(_u1qI::I64);
+--      BaseReg = _u1qJ::I64;
+--      _u1qK::P64 = CurrentTSO;
+--      _u1qL::P64 = I64[_u1qK::P64 + 24];
+--      Sp = I64[_u1qL::P64 + 16];
+--      SpLim = _u1qL::P64 + 192;
+--      HpAlloc = 0;
+--      Hp = I64[CurrentNursery + 8] - 8;
+--      HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1);
+--      call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8;
+--  c1q9:
+--      I64[(young<c1qb> + 8)] = c1qb;
+--      _s1pf::P64 = R1;         <------ INCORRECT!
+--      R1 = _s1pc::P64;
+--      call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8;
+--
+-- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that
+-- call is clearly incorrect. This is what would happen if we assumed that
+-- safe foreign call has the same semantics as unsafe foreign call. To prevent
+-- this we need to treat safe foreign call as if was normal call.
 
 -----------------------------------
 -- mapping Expr in CmmNode
@@ -429,6 +498,8 @@ foldExpForeignTarget exp (ForeignTarget e _) z = exp e z
 foldExpForeignTarget _   (PrimTarget _)      z = z
 
 -- Take a folder on expressions and apply it recursively.
+-- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad
+-- itself, delegating all the other CmmExpr forms to 'f'.
 wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z
 wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es
 wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z)
diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
index 9f8a3975e7..41323ecad3 100644
--- a/compiler/cmm/CmmSink.hs
+++ b/compiler/cmm/CmmSink.hs
@@ -43,38 +43,52 @@ import qualified Data.Set as Set
 --
 --  * Start by doing liveness analysis.
 --
---  * Keep a list of assignments A; earlier ones may refer to later ones
+--  * Keep a list of assignments A; earlier ones may refer to later ones.
+--    Currently we only sink assignments to local registers, because we don't
+--    have liveness information about global registers.
 --
 --  * Walk forwards through the graph, look at each node N:
---    * If any assignments in A (1) occur only once in N, and (2) are
---      not live after N, inline the assignment and remove it
---      from A.
---    * If N is an assignment:
---      * If the register is not live after N, discard it
---      * otherwise pick up the assignment and add it to A
---    * If N is a non-assignment node:
+--
+--    * If it is a dead assignment, i.e. assignment to a register that is
+--      not used after N, discard it.
+--
+--    * Try to inline based on current list of assignments
+--      * If any assignments in A (1) occur only once in N, and (2) are
+--        not live after N, inline the assignment and remove it
+--        from A.
+--
+--      * If an assignment in A is cheap (RHS is local register), then
+--        inline the assignment and keep it in A in case it is used afterwards.
+--
+--      * Otherwise don't inline.
+--
+--    * If N is assignment to a local register pick up the assignment
+--      and add it to A.
+--
+--    * If N is not an assignment to a local register:
 --      * remove any assignments from A that conflict with N, and
---        place them before N in the current block.  (we call this
---        "dropping" the assignments).
+--        place them before N in the current block.  We call this
+--        "dropping" the assignments.
+--
 --      * An assignment conflicts with N if it:
 --        - assigns to a register mentioned in N
 --        - mentions a register assigned by N
 --        - reads from memory written by N
 --      * do this recursively, dropping dependent assignments
---    * At a multi-way branch:
---      * drop any assignments that are live on more than one branch
---      * if any successor has more than one predecessor (a
---        join-point), drop everything live in that successor
--- 
--- As a side-effect we'll delete some dead assignments (transitively,
--- even).  This isn't as good as removeDeadAssignments, but it's much
--- cheaper.
-
--- If we do this *before* stack layout, we might be able to avoid
--- saving some things across calls/procpoints.
 --
--- *but*, that will invalidate the liveness analysis, and we'll have
--- to re-do it.
+--    * At an exit node:
+--      * drop any assignments that are live on more than one successor
+--        and are not trivial
+--      * if any successor has more than one predecessor (a join-point),
+--        drop everything live in that successor. Since we only propagate
+--        assignments that are not dead at the successor, we will therefore
+--        eliminate all assignments dead at this point. Thus analysis of a
+--        join-point will always begin with an empty list of assignments.
+--
+--
+-- As a result of above algorithm, sinking deletes some dead assignments
+-- (transitively, even).  This isn't as good as removeDeadAssignments,
+-- but it's much cheaper.
 
 -- -----------------------------------------------------------------------------
 -- things that we aren't optimising very well yet.
@@ -122,6 +136,12 @@ type Assignment = (LocalReg, CmmExpr, AbsMem)
   -- Assignment caches AbsMem, an abstraction of the memory read by
   -- the RHS of the assignment.
 
+type Assignments = [Assignment]
+  -- A sequence of assignements; kept in *reverse* order
+  -- So the list [ x=e1, y=e2 ] means the sequence of assignments
+  --     y = e2
+  --     x = e1
+
 cmmSink :: DynFlags -> CmmGraph -> CmmGraph
 cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
   where
@@ -132,7 +152,7 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
 
   join_pts = findJoinPoints blocks
 
-  sink :: BlockEnv [Assignment] -> [CmmBlock] -> [CmmBlock]
+  sink :: BlockEnv Assignments -> [CmmBlock] -> [CmmBlock]
   sink _ [] = []
   sink sunk (b:bs) =
     -- pprTrace "sink" (ppr lbl) $
@@ -209,7 +229,8 @@ isSmall _ = False
 
 isTrivial :: CmmExpr -> Bool
 isTrivial (CmmReg (CmmLocal _)) = True
--- isTrivial (CmmLit _) = True
+-- isTrivial (CmmLit _) = True  -- Disabled because it used to make thing worse.
+                                -- Needs further investigation
 isTrivial _ = False
 
 --
@@ -234,7 +255,7 @@ findJoinPoints blocks = mapFilter (>1) succ_counts
 -- filter the list of assignments to remove any assignments that
 -- are not live in a continuation.
 --
-filterAssignments :: DynFlags -> LocalRegSet -> [Assignment] -> [Assignment]
+filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments
 filterAssignments dflags live assigs = reverse (go assigs [])
   where go []             kept = kept
         go (a@(r,_,_):as) kept | needed    = go as (a:kept)
@@ -249,26 +270,36 @@ filterAssignments dflags live assigs = reverse (go assigs [])
 -- -----------------------------------------------------------------------------
 -- Walk through the nodes of a block, sinking and inlining assignments
 -- as we go.
+--
+-- On input we pass in a:
+--    * list of nodes in the block
+--    * a list of assignments that appeared *before* this block and
+--      that are being sunk.
+--
+-- On output we get:
+--    * a new block
+--    * a list of assignments that will be placed *after* that block.
+--
 
 walk :: DynFlags
      -> [(LocalRegSet, CmmNode O O)]    -- nodes of the block, annotated with
                                         -- the set of registers live *after*
                                         -- this node.
 
-     -> [Assignment]                    -- The current list of
+     -> Assignments                     -- The current list of
                                         -- assignments we are sinking.
                                         -- Later assignments may refer
                                         -- to earlier ones.
 
      -> ( Block CmmNode O O             -- The new block
-        , [Assignment]                  -- Assignments to sink further
+        , Assignments                   -- Assignments to sink further
         )
 
 walk dflags nodes assigs = go nodes emptyBlock assigs
  where
    go []               block as = (block, as)
    go ((live,node):ns) block as
-    | shouldDiscard node live    = go ns block as
+    | shouldDiscard node live           = go ns block as -- discard dead assignment
     | Just a <- shouldSink dflags node2 = go ns block (a : as1)
     | otherwise                         = go ns block' as'
     where
@@ -316,17 +347,17 @@ shouldDiscard node live
        CmmAssign r (CmmReg r') | r == r' -> True
        CmmAssign (CmmLocal r) _ -> not (r `Set.member` live)
        _otherwise -> False
-  
+
 
 toNode :: Assignment -> CmmNode O O
 toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
 
-dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> [Assignment]
-                      -> ([CmmNode O O], [Assignment])
+dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments
+                      -> ([CmmNode O O], Assignments)
 dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) ()
 
-dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> [Assignment]
-                -> ([CmmNode O O], [Assignment])
+dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments
+                -> ([CmmNode O O], Assignments)
 dropAssignments dflags should_drop state assigs
  = (dropped, reverse kept)
  where
@@ -351,16 +382,16 @@ tryToInline
                                 -- that is live after the node, unless
                                 -- it is small enough to duplicate.
    -> CmmNode O x               -- The node to inline into
-   -> [Assignment]              -- Assignments to inline
+   -> Assignments               -- Assignments to inline
    -> (
         CmmNode O x             -- New node
-      , [Assignment]            -- Remaining assignments
+      , Assignments             -- Remaining assignments
       )
 
 tryToInline dflags live node assigs = go usages node [] assigs
  where
-  usages :: UniqFM Int
-  usages = foldRegsUsed dflags addUsage emptyUFM node
+  usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used
+  usages = foldLocalRegsUsed dflags addUsage emptyUFM node
 
   go _usages node _skipped [] = (node, [])
 
@@ -371,10 +402,10 @@ tryToInline dflags live node assigs = go usages node [] assigs
    | otherwise               = dont_inline
    where
         inline_and_discard = go usages' inl_node skipped rest
-          where usages' = foldRegsUsed dflags addUsage usages rhs
+          where usages' = foldLocalRegsUsed dflags addUsage usages rhs
 
-        dont_inline        = keep node  -- don't inline the assignment, keep it
-        inline_and_keep    = keep inl_node -- inline the assignment, keep it
+        dont_inline        = keep node     -- don't inline the assignment, keep it
+        inline_and_keep    = keep inl_node --       inline the assignment, keep it
 
         keep node' = (final_node, a : rest')
           where (final_node, rest') = go usages' node' (l:skipped) rest
@@ -470,10 +501,10 @@ conflicts dflags (r, rhs, addr) node
   | SpMem{}    <- addr, CmmAssign (CmmGlobal Sp) _ <- node        = True
 
   -- (4) assignments that read caller-saves GlobalRegs conflict with a
-  -- foreign call.  See Note [foreign calls clobber GlobalRegs].
+  -- foreign call.  See Note [Unsafe foreign calls clobber caller-save registers]
   | CmmUnsafeForeignCall{} <- node, anyCallerSavesRegs dflags rhs = True
 
-  -- (5) foreign calls clobber heap: see Note [foreign calls clobber heap]
+  -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap]
   | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem      = True
 
   -- (6) native calls clobber any memory
@@ -532,7 +563,8 @@ data AbsMem
 --  that was written in the same basic block.  To take advantage of
 --  non-aliasing of heap memory we will have to be more clever.
 
--- Note [foreign calls clobber]
+-- Note [Foreign calls clobber heap]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 --
 -- It is tempting to say that foreign calls clobber only
 -- non-heap/stack memory, but unfortunately we break this invariant in
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs
index 9b1bce4b57..bf950c4158 100644
--- a/compiler/codeGen/StgCmm.hs
+++ b/compiler/codeGen/StgCmm.hs
@@ -11,7 +11,7 @@ module StgCmm ( codeGen ) where
 #define FAST_STRING_NOT_NEEDED
 #include "HsVersions.h"
 
-import StgCmmProf
+import StgCmmProf (initCostCentres, ldvEnter)
 import StgCmmMonad
 import StgCmmEnv
 import StgCmmBind
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index dccefd0fb0..4762c5a4e0 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -20,7 +20,8 @@ import StgCmmMonad
 import StgCmmEnv
 import StgCmmCon
 import StgCmmHeap
-import StgCmmProf
+import StgCmmProf (curCCS, ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk,
+                   initUpdFrameProf, costCentreFrom)
 import StgCmmTicky
 import StgCmmLayout
 import StgCmmUtils
diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs
index 57d4759346..eb00bbf0c0 100644
--- a/compiler/codeGen/StgCmmCon.hs
+++ b/compiler/codeGen/StgCmmCon.hs
@@ -23,7 +23,7 @@ import StgCmmEnv
 import StgCmmHeap
 import StgCmmUtils
 import StgCmmClosure
-import StgCmmProf
+import StgCmmProf ( curCCS )
 
 import CmmExpr
 import CLabel
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 331e65819f..c7fddd49dc 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -17,7 +17,7 @@ import StgCmmMonad
 import StgCmmHeap
 import StgCmmEnv
 import StgCmmCon
-import StgCmmProf
+import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
 import StgCmmLayout
 import StgCmmPrim
 import StgCmmHpc
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index 0b782fffcc..a688074b9e 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -18,7 +18,7 @@ module StgCmmForeign (
 #include "HsVersions.h"
 
 import StgSyn
-import StgCmmProf
+import StgCmmProf (storeCurCCS, ccsType, curCCS)
 import StgCmmEnv
 import StgCmmMonad
 import StgCmmUtils
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 97233aa500..f4c58e95e1 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -28,7 +28,7 @@ import CLabel
 import StgCmmLayout
 import StgCmmUtils
 import StgCmmMonad
-import StgCmmProf
+import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr)
 import StgCmmTicky
 import StgCmmClosure
 import StgCmmEnv
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 6c6e49dafa..b52d4e57df 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -29,7 +29,7 @@ import StgCmmArgRep -- notably: ( slowCallPattern )
 import StgCmmTicky
 import StgCmmMonad
 import StgCmmUtils
-import StgCmmProf
+import StgCmmProf (curCCS)
 
 import MkGraph
 import SMRep
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index c11df7009c..bb58024a4e 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -21,7 +21,7 @@ import StgCmmMonad
 import StgCmmUtils
 import StgCmmTicky
 import StgCmmHeap
-import StgCmmProf
+import StgCmmProf ( costCentreFrom, curCCS )
 
 import DynFlags
 import Platform
diff --git a/compiler/coreSyn/CoreLint.lhs b/compiler/coreSyn/CoreLint.lhs
index 68aaea5b5c..1913e3ab93 100644
--- a/compiler/coreSyn/CoreLint.lhs
+++ b/compiler/coreSyn/CoreLint.lhs
@@ -16,7 +16,7 @@ A ``lint'' pass to check for Core correctness
 
 {-# OPTIONS_GHC -fprof-auto #-}
 
-module CoreLint ( lintCoreBindings, lintUnfolding ) where
+module CoreLint ( lintCoreBindings, lintUnfolding, lintExpr ) where
 
 #include "HsVersions.h"
 
@@ -120,14 +120,15 @@ find an occurence of an Id, we fetch it from the in-scope set.
 
 
 \begin{code}
-lintCoreBindings :: CoreProgram -> (Bag MsgDoc, Bag MsgDoc)
+lintCoreBindings :: [Var] -> CoreProgram -> (Bag MsgDoc, Bag MsgDoc)
 --   Returns (warnings, errors)
 -- If you edit this function, you may need to update the GHC formalism
 -- See Note [GHC Formalism]
-lintCoreBindings binds
+lintCoreBindings local_in_scope binds
   = initL $ 
-    addLoc TopLevelBindings $
-    addInScopeVars binders  $
+    addLoc TopLevelBindings        $
+    addInScopeVars local_in_scope  $
+    addInScopeVars binders         $
 	-- Put all the top-level binders in scope at the start
 	-- This is because transformation rules can bring something
 	-- into use 'unexpectedly'
@@ -178,6 +179,18 @@ lintUnfolding locn vars expr
     (_warns, errs) = initL (addLoc (ImportedUnfolding locn) $
                             addInScopeVars vars	           $
                             lintCoreExpr expr)
+
+lintExpr :: [Var]		-- Treat these as in scope
+	 -> CoreExpr
+	 -> Maybe MsgDoc	-- Nothing => OK
+
+lintExpr vars expr
+  | isEmptyBag errs = Nothing
+  | otherwise       = Just (pprMessageBag errs)
+  where
+    (_warns, errs) = initL (addLoc TopLevelBindings $
+                            addInScopeVars vars	    $
+                            lintCoreExpr expr)
 \end{code}
 
 %************************************************************************
diff --git a/compiler/coreSyn/CorePrep.lhs b/compiler/coreSyn/CorePrep.lhs
index d87fdfc197..1a21253f39 100644
--- a/compiler/coreSyn/CorePrep.lhs
+++ b/compiler/coreSyn/CorePrep.lhs
@@ -172,7 +172,7 @@ corePrepPgm dflags hsc_env binds data_tycons = do
                       floats2 <- corePrepTopBinds initialCorePrepEnv implicit_binds
                       return (deFloatTop (floats1 `appendFloats` floats2))
 
-    endPass dflags CorePrep binds_out []
+    endPass hsc_env CorePrep binds_out []
     return binds_out
 
 corePrepExpr :: DynFlags -> HscEnv -> CoreExpr -> IO CoreExpr
diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs
index dd7307d190..baa28bc0cc 100644
--- a/compiler/coreSyn/CoreSyn.lhs
+++ b/compiler/coreSyn/CoreSyn.lhs
@@ -714,7 +714,9 @@ data Unfolding
 
 ------------------------------------------------
 data UnfoldingSource
-  = InlineRhs          -- The current rhs of the function
+  = -- See also Note [Historical note: unfoldings for wrappers]
+   
+    InlineRhs          -- The current rhs of the function
     		       -- Replace uf_tmpl each time around
 
   | InlineStable       -- From an INLINE or INLINABLE pragma 
@@ -739,13 +741,6 @@ data UnfoldingSource
                        -- (see MkId.lhs, calls to mkCompulsoryUnfolding).
                        -- Inline absolutely always, however boring the context.
 
-  | InlineWrapper      -- This unfolding is the wrapper in a
-                       -- worker/wrapper split from the strictness
-                       -- analyser
-                       --
-                       -- cf some history in TcIface's Note [wrappers
-                       -- in interface files]
-
 
 
 -- | 'UnfoldingGuidance' says when unfolding should take place
@@ -775,6 +770,25 @@ data UnfoldingGuidance
   | UnfNever	    -- The RHS is big, so don't inline it
 \end{code}
 
+Note [Historical note: unfoldings for wrappers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We used to have a nice clever scheme in interface files for
+wrappers. A wrapper's unfolding can be reconstructed from its worker's
+id and its strictness. This decreased .hi file size (sometimes
+significantly, for modules like GHC.Classes with many high-arity w/w
+splits) and had a slight corresponding effect on compile times.
+
+However, when we added the second demand analysis, this scheme lead to
+some Core lint errors. The second analysis could change the strictness
+signatures, which sometimes resulted in a wrapper's regenerated
+unfolding applying the wrapper to too many arguments.
+
+Instead of repairing the clever .hi scheme, we abandoned it in favor
+of simplicity. The .hi sizes are usually insignificant (excluding the
++1M for base libraries), and compile time barely increases (~+1% for
+nofib). The nicer upshot is that the UnfoldingSource no longer mentions
+an Id, so, eg, substitutions need not traverse them.
+
 
 Note [DFun unfoldings]
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -844,7 +858,6 @@ isStableSource :: UnfoldingSource -> Bool
 -- Keep the unfolding template
 isStableSource InlineCompulsory   = True
 isStableSource InlineStable       = True
-isStableSource InlineWrapper      = True
 isStableSource InlineRhs          = False
 
 -- | Retrieves the template of an unfolding: panics if none is known
diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs
index bbf9e0eb40..896f3723d2 100644
--- a/compiler/coreSyn/CoreUnfold.lhs
+++ b/compiler/coreSyn/CoreUnfold.lhs
@@ -103,7 +103,7 @@ mkDFunUnfolding bndrs con ops
 
 mkWwInlineRule :: CoreExpr -> Arity -> Unfolding
 mkWwInlineRule expr arity
-  = mkCoreUnfolding InlineWrapper True
+  = mkCoreUnfolding InlineStable True
                    (simpleOptExpr expr) arity
                    (UnfWhen unSaturatedOk boringCxtNotOk)
 
diff --git a/compiler/coreSyn/PprCore.lhs b/compiler/coreSyn/PprCore.lhs
index 64e7d63590..00f9a9346f 100644
--- a/compiler/coreSyn/PprCore.lhs
+++ b/compiler/coreSyn/PprCore.lhs
@@ -422,7 +422,6 @@ instance Outputable UnfoldingGuidance where
 
 instance Outputable UnfoldingSource where
   ppr InlineCompulsory  = ptext (sLit "Compulsory")
-  ppr InlineWrapper     = ptext (sLit "Wrapper")
   ppr InlineStable      = ptext (sLit "InlineStable")
   ppr InlineRhs         = ptext (sLit "<vanilla>")
 
diff --git a/compiler/deSugar/Desugar.lhs b/compiler/deSugar/Desugar.lhs
index b65304a118..20a8a57299 100644
--- a/compiler/deSugar/Desugar.lhs
+++ b/compiler/deSugar/Desugar.lhs
@@ -143,14 +143,14 @@ deSugar hsc_env
 
 #ifdef DEBUG
           -- Debug only as pre-simple-optimisation program may be really big
-        ; endPass dflags CoreDesugar final_pgm rules_for_imps
+        ; endPass hsc_env CoreDesugar final_pgm rules_for_imps
 #endif
         ; (ds_binds, ds_rules_for_imps, ds_vects)
             <- simpleOptPgm dflags mod final_pgm rules_for_imps vects0
                          -- The simpleOptPgm gets rid of type
                          -- bindings plus any stupid dead code
 
-        ; endPass dflags CoreDesugarOpt ds_binds ds_rules_for_imps
+        ; endPass hsc_env CoreDesugarOpt ds_binds ds_rules_for_imps
 
         ; let used_names = mkUsedNames tcg_env
         ; deps <- mkDependencies tcg_env
@@ -226,22 +226,23 @@ deSugarExpr :: HscEnv
             -> IO (Messages, Maybe CoreExpr)
 -- Prints its own errors; returns Nothing if error occurred
 
-deSugarExpr hsc_env this_mod rdr_env type_env tc_expr = do
-    let dflags = hsc_dflags hsc_env
-    showPass dflags "Desugar"
+deSugarExpr hsc_env this_mod rdr_env type_env tc_expr
+  = do { let dflags = hsc_dflags hsc_env
+       ; showPass dflags "Desugar"
 
-    -- Do desugaring
-    (msgs, mb_core_expr) <- initDs hsc_env this_mod rdr_env type_env $
-                                   dsLExpr tc_expr
+         -- Do desugaring
+       ; (msgs, mb_core_expr) <- initDs hsc_env this_mod rdr_env type_env $
+                                 dsLExpr tc_expr
 
-    case mb_core_expr of
-      Nothing   -> return (msgs, Nothing)
-      Just expr -> do
+       ; case mb_core_expr of {
+            Nothing   -> return (msgs, Nothing) ;
+            Just expr ->
 
-        -- Dump output
-        dumpIfSet_dyn dflags Opt_D_dump_ds "Desugared" (pprCoreExpr expr)
+ 
+         -- Dump output
+    do { dumpIfSet_dyn dflags Opt_D_dump_ds "Desugared" (pprCoreExpr expr)
 
-        return (msgs, Just expr)
+       ; return (msgs, Just expr) } } }
 \end{code}
 
 %************************************************************************
diff --git a/compiler/iface/IfaceSyn.lhs b/compiler/iface/IfaceSyn.lhs
index 8dc4188bb9..f6e68e2836 100644
--- a/compiler/iface/IfaceSyn.lhs
+++ b/compiler/iface/IfaceSyn.lhs
@@ -583,8 +583,6 @@ data IfaceUnfolding
                  Bool           -- OK to inline even if context is boring
                  IfaceExpr
 
-  | IfWrapper IfaceExpr         -- cf TcIface's Note [wrappers in interface files]
-
   | IfDFunUnfold [IfaceBndr] [IfaceExpr]
 
 instance Binary IfaceUnfolding where
@@ -598,15 +596,12 @@ instance Binary IfaceUnfolding where
         put_ bh b
         put_ bh c
         put_ bh d
-    put_ bh (IfWrapper e) = do
-        putByte bh 2
-        put_ bh e
     put_ bh (IfDFunUnfold as bs) = do
-        putByte bh 3
+        putByte bh 2
         put_ bh as
         put_ bh bs
     put_ bh (IfCompulsory e) = do
-        putByte bh 4
+        putByte bh 3
         put_ bh e
     get bh = do
         h <- getByte bh
@@ -619,9 +614,7 @@ instance Binary IfaceUnfolding where
                     c <- get bh
                     d <- get bh
                     return (IfInlineRule a b c d)
-            2 -> do e <- get bh
-                    return (IfWrapper e)
-            3 -> do as <- get bh
+            2 -> do as <- get bh
                     bs <- get bh
                     return (IfDFunUnfold as bs)
             _ -> do e <- get bh
@@ -1288,7 +1281,6 @@ instance Outputable IfaceUnfolding where
   ppr (IfInlineRule a uok bok e) = sep [ptext (sLit "InlineRule")
                                             <+> ppr (a,uok,bok),
                                         pprParendIfaceExpr e]
-  ppr (IfWrapper e) = ptext (sLit "Wrapper:") <+> parens (ppr e)
   ppr (IfDFunUnfold bs es) = hang (ptext (sLit "DFun:") <+> sep (map ppr bs) <> dot)
                                 2 (sep (map pprParendIfaceExpr es))
 
@@ -1446,7 +1438,6 @@ freeNamesIfUnfold :: IfaceUnfolding -> NameSet
 freeNamesIfUnfold (IfCoreUnfold _ e)     = freeNamesIfExpr e
 freeNamesIfUnfold (IfCompulsory e)       = freeNamesIfExpr e
 freeNamesIfUnfold (IfInlineRule _ _ _ e) = freeNamesIfExpr e
-freeNamesIfUnfold (IfWrapper e)          = freeNamesIfExpr e
 freeNamesIfUnfold (IfDFunUnfold bs es)   = fnList freeNamesIfBndr bs &&& fnList freeNamesIfExpr es
 
 freeNamesIfExpr :: IfaceExpr -> NameSet
diff --git a/compiler/iface/MkIface.lhs b/compiler/iface/MkIface.lhs
index d3b56d1f7b..44f99d520e 100644
--- a/compiler/iface/MkIface.lhs
+++ b/compiler/iface/MkIface.lhs
@@ -1762,7 +1762,6 @@ toIfUnfolding lb (CoreUnfolding { uf_tmpl = rhs, uf_arity = arity
           -> case guidance of
                UnfWhen unsat_ok boring_ok -> IfInlineRule arity unsat_ok boring_ok if_rhs
                _other                     -> IfCoreUnfold True if_rhs
-        InlineWrapper -> IfWrapper if_rhs
         InlineCompulsory -> IfCompulsory if_rhs
         InlineRhs        -> IfCoreUnfold False if_rhs
         -- Yes, even if guidance is UnfNever, expose the unfolding
diff --git a/compiler/iface/TcIface.lhs b/compiler/iface/TcIface.lhs
index dffd69b9ed..2d2e867390 100644
--- a/compiler/iface/TcIface.lhs
+++ b/compiler/iface/TcIface.lhs
@@ -605,33 +605,36 @@ tcIfaceDataCons tycon_name tycon _ if_cons
                          ifConStricts = if_stricts})
      = bindIfaceTyVars univ_tvs $ \ univ_tyvars -> do
        bindIfaceTyVars ex_tvs    $ \ ex_tyvars -> do
-        { name  <- lookupIfaceTop occ
+        { traceIf (text "Start interface-file tc_con_decl" <+> ppr occ)
+        ; name  <- lookupIfaceTop occ
 
         -- Read the context and argument types, but lazily for two reasons
         -- (a) to avoid looking tugging on a recursive use of 
         --     the type itself, which is knot-tied
         -- (b) to avoid faulting in the component types unless 
         --     they are really needed
-        ; ~(eq_spec, theta, arg_tys) <- forkM (mk_doc name) $
+        ; ~(eq_spec, theta, arg_tys, stricts) <- forkM (mk_doc name) $
              do { eq_spec <- tcIfaceEqSpec spec
                 ; theta   <- tcIfaceCtxt ctxt
                 ; arg_tys <- mapM tcIfaceType args
-                ; return (eq_spec, theta, arg_tys) }
+                ; stricts <- mapM tc_strict if_stricts 
+                        -- The IfBang field can mention 
+                        -- the type itself; hence inside forkM
+                ; return (eq_spec, theta, arg_tys, stricts) }
         ; lbl_names <- mapM lookupIfaceTop field_lbls
 
-        ; stricts <- mapM tc_strict if_stricts
-
         -- Remember, tycon is the representation tycon
         ; let orig_res_ty = mkFamilyTyConApp tycon 
                                 (substTyVars (mkTopTvSubst eq_spec) univ_tyvars)
 
-        ; buildDataCon (pprPanic "tcIfaceDataCons: FamInstEnvs" (ppr name))
+        ; con <- buildDataCon (pprPanic "tcIfaceDataCons: FamInstEnvs" (ppr name))
                        name is_infix
                        stricts lbl_names
                        univ_tyvars ex_tyvars 
                        eq_spec theta 
                        arg_tys orig_res_ty tycon
-        }
+        ; traceIf (text "Done interface-file tc_con_decl" <+> ppr name)
+        ; return con } 
     mk_doc con_name = ptext (sLit "Constructor") <+> ppr con_name
 
     tc_strict IfNoBang = return HsNoBang
@@ -1204,25 +1207,6 @@ do_one (IfaceRec pairs) thing_inside
 %*                                                                      *
 %************************************************************************
 
-Note [wrappers in interface files]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We used to have a nice clever scheme in interface files for
-wrappers. A wrapper's unfolding can be reconstructed from its worker's
-id and its strictness. This decreased .hi file size (sometimes
-significantly, for modules like GHC.Classes with many high-arity w/w
-splits) and had a slight corresponding effect on compile times.
-
-However, when we added the second demand analysis, this scheme lead to
-some Core lint errors. The second analysis could change the strictness
-signatures, which sometimes resulted in a wrapper's regenerated
-unfolding applying the wrapper to too many arguments.
-
-Instead of repairing the clever .hi scheme, we abandoned it in favor
-of simplicity. The .hi sizes are usually insignificant (excluding the
-+1M for base libraries), and compile time barely increases (~+1% for
-nofib). The nicer upshot is that unfolding sources no longer include
-an Id, so, eg, substitutions need not traverse them any longer.
-
 \begin{code}
 tcIdDetails :: Type -> IfaceIdDetails -> IfL IdDetails
 tcIdDetails _  IfVanillaId = return VanillaId
@@ -1300,16 +1284,6 @@ tcUnfolding name dfun_ty _ (IfDFunUnfold bs ops)
   where
     doc = text "Class ops for dfun" <+> ppr name
     (_, _, cls, _) = tcSplitDFunTy dfun_ty
-
-tcUnfolding name _ info (IfWrapper if_expr)
-  = do  { mb_expr <- tcPragExpr name if_expr
-        ; return $ case mb_expr of
-            Nothing -> NoUnfolding
-            Just expr -> mkWwInlineRule expr arity -- see Note [wrappers in interface files]
-        }
-  where
-     -- Arity should occur before unfolding!
-    arity = arityInfo info
 \end{code}
 
 For unfoldings we try to do the job lazily, so that we never type check
diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs
index e884fe5bcf..ad1b7c503a 100644
--- a/compiler/main/HscMain.hs
+++ b/compiler/main/HscMain.hs
@@ -81,9 +81,8 @@ import CoreTidy         ( tidyExpr )
 import Type             ( Type )
 import PrelNames
 import {- Kind parts of -} Type         ( Kind )
-import CoreLint         ( lintUnfolding )
+import CoreMonad        ( lintInteractiveExpr )
 import DsMeta           ( templateHaskellNames )
-import VarSet
 import VarEnv           ( emptyTidyEnv )
 import Panic
 
@@ -1385,12 +1384,12 @@ hscStmtWithLocation hsc_env0 stmt source linenumber =
 
             -- Desugar it
             ds_expr <- ioMsgMaybe $
-                           deSugarExpr hsc_env iNTERACTIVE rdr_env type_env tc_expr
+                       deSugarExpr hsc_env iNTERACTIVE rdr_env type_env tc_expr
+            liftIO (lintInteractiveExpr "desugar expression" hsc_env ds_expr)
             handleWarnings
 
             -- Then code-gen, and link it
-            hsc_env <- getHscEnv
-            hval    <- liftIO $ hscCompileCoreExpr hsc_env src_span ds_expr
+            hval <- liftIO $ hscCompileCoreExpr hsc_env src_span ds_expr
             let hval_io = unsafeCoerce# hval :: IO [HValue]
 
             return $ Just (ids, hval_io, fix_env)
@@ -1618,37 +1617,28 @@ hscCompileCoreExpr hsc_env srcspan ds_expr
     = throwIO (InstallationError "You can't call hscCompileCoreExpr in a profiled compiler")
             -- Otherwise you get a seg-fault when you run it
 
-    | otherwise = do
-        let dflags = hsc_dflags hsc_env
-        let lint_on = gopt Opt_DoCoreLinting dflags
+    | otherwise
+    = do { let dflags = hsc_dflags hsc_env
 
-        {- Simplify it -}
-        simpl_expr <- simplifyExpr dflags ds_expr
+           {- Simplify it -}
+         ; simpl_expr <- simplifyExpr dflags ds_expr
 
-        {- Tidy it (temporary, until coreSat does cloning) -}
-        let tidy_expr = tidyExpr emptyTidyEnv simpl_expr
+           {- Tidy it (temporary, until coreSat does cloning) -}
+         ; let tidy_expr = tidyExpr emptyTidyEnv simpl_expr
 
-        {- Prepare for codegen -}
-        prepd_expr <- corePrepExpr dflags hsc_env tidy_expr
+           {- Prepare for codegen -}
+         ; prepd_expr <- corePrepExpr dflags hsc_env tidy_expr
 
-        {- Lint if necessary -}
-        -- ToDo: improve SrcLoc
-        when lint_on $
-            let ictxt  = hsc_IC hsc_env
-                te     = mkTypeEnvWithImplicits (ic_tythings ictxt ++ map AnId (ic_sys_vars ictxt))
-                tyvars = varSetElems $ tyThingsTyVars $ typeEnvElts $ te
-                vars   = typeEnvIds te
-            in case lintUnfolding noSrcLoc (tyvars ++ vars) prepd_expr of
-                   Just err -> pprPanic "hscCompileCoreExpr" err
-                   Nothing  -> return ()
+           {- Lint if necessary -}
+         ; lintInteractiveExpr "hscCompileExpr" hsc_env prepd_expr 
 
-        {- Convert to BCOs -}
-        bcos <- coreExprToBCOs dflags iNTERACTIVE prepd_expr
+           {- Convert to BCOs -}
+         ; bcos <- coreExprToBCOs dflags iNTERACTIVE prepd_expr
 
-        {- link it -}
-        hval <- linkExpr hsc_env srcspan bcos
+           {- link it -}
+         ; hval <- linkExpr hsc_env srcspan bcos
 
-        return hval
+         ; return hval }
 #endif
 
 
diff --git a/compiler/main/TidyPgm.lhs b/compiler/main/TidyPgm.lhs
index 214e7f3315..d6a3da13e6 100644
--- a/compiler/main/TidyPgm.lhs
+++ b/compiler/main/TidyPgm.lhs
@@ -363,7 +363,7 @@ tidyProgram hsc_env  (ModGuts { mg_module    = mod
               ; alg_tycons = filter isAlgTyCon (typeEnvTyCons type_env)
               }
 
-        ; endPass dflags CoreTidy all_tidy_binds tidy_rules
+        ; endPass hsc_env CoreTidy all_tidy_binds tidy_rules
 
           -- If the endPass didn't print the rules, but ddump-rules is
           -- on, print now
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index e275b23778..094c2f55e6 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -794,6 +794,14 @@ primop  ThawArrayOp "thawArray#" GenPrimOp
   has_side_effects = True
   code_size = { primOpCodeSizeForeignCall + 4 }
 
+primop CasArrayOp  "casArray#" GenPrimOp
+   MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, a #)
+   {Unsafe, machine-level atomic compare and swap on an element within an Array.}
+   with
+   out_of_line = True
+   has_side_effects = True
+
+
 ------------------------------------------------------------------------
 section "Byte Arrays"
 	{Operations on {\tt ByteArray\#}. A {\tt ByteArray\#} is a just a region of
@@ -1110,6 +1118,21 @@ primop  SetByteArrayOp "setByteArray#" GenPrimOp
   code_size = { primOpCodeSizeForeignCall + 4 }
   can_fail = True
 
+primop CasByteArrayOp_Int "casIntArray#" GenPrimOp
+   MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #)
+   {Machine-level atomic compare and swap on a word within a ByteArray.}
+   with
+   out_of_line = True
+   has_side_effects = True
+
+primop FetchAddByteArrayOp_Int "fetchAddIntArray#" GenPrimOp
+   MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) 
+   {Machine-level word-sized fetch-and-add within a ByteArray.}
+   with
+   out_of_line = True
+   has_side_effects = True
+
+
 ------------------------------------------------------------------------
 section "Arrays of arrays"
 	{Operations on {\tt ArrayArray\#}. An {\tt ArrayArray\#} contains references to {\em unpointed}
diff --git a/compiler/rename/RnExpr.lhs b/compiler/rename/RnExpr.lhs
index 0ef169085b..23501e3e1a 100644
--- a/compiler/rename/RnExpr.lhs
+++ b/compiler/rename/RnExpr.lhs
@@ -189,8 +189,10 @@ rnExpr (HsSpliceE splice)
 rnExpr e@(HsQuasiQuoteE _) = pprPanic "Cant do quasiquotation without GHCi" (ppr e)
 #else
 rnExpr (HsQuasiQuoteE qq)
-  = runQuasiQuoteExpr qq        `thenM` \ (L _ expr') ->
-    rnExpr expr'
+  = runQuasiQuoteExpr qq        `thenM` \ lexpr' ->
+    -- Wrap the result of the quasi-quoter in parens so that we don't
+    -- lose the outermost location set by runQuasiQuote (#7918) 
+    rnExpr (HsPar lexpr')
 #endif  /* GHCI */
 
 ---------------------------------------------
diff --git a/compiler/rename/RnPat.lhs b/compiler/rename/RnPat.lhs
index 90a83d6a8e..e7cecf8f3f 100644
--- a/compiler/rename/RnPat.lhs
+++ b/compiler/rename/RnPat.lhs
@@ -416,8 +416,9 @@ rnPatAndThen _ p@(QuasiQuotePat {})
 #else
 rnPatAndThen mk (QuasiQuotePat qq)
   = do { pat <- liftCps $ runQuasiQuotePat qq
-       ; L _ pat' <- rnLPatAndThen mk pat
-       ; return pat' }
+         -- Wrap the result of the quasi-quoter in parens so that we don't
+         -- lose the outermost location set by runQuasiQuote (#7918) 
+       ; rnPatAndThen mk (ParPat pat) }
 #endif  /* GHCI */
 
 rnPatAndThen _ pat = pprPanic "rnLPatAndThen" (ppr pat)
diff --git a/compiler/rename/RnTypes.lhs b/compiler/rename/RnTypes.lhs
index a1c4bac25c..c13ea336e4 100644
--- a/compiler/rename/RnTypes.lhs
+++ b/compiler/rename/RnTypes.lhs
@@ -269,7 +269,9 @@ rnHsTyKi _ _ ty@(HsQuasiQuoteTy _) = pprPanic "Can't do quasiquotation without G
 rnHsTyKi isType doc (HsQuasiQuoteTy qq)
   = ASSERT( isType )
     do { ty <- runQuasiQuoteType qq
-       ; rnHsType doc (unLoc ty) }
+         -- Wrap the result of the quasi-quoter in parens so that we don't
+         -- lose the outermost location set by runQuasiQuote (#7918) 
+       ; rnHsType doc (HsParTy ty) }
 #endif
 
 rnHsTyKi isType _ (HsCoreTy ty)
diff --git a/compiler/simplCore/CoreMonad.lhs b/compiler/simplCore/CoreMonad.lhs
index 31547e14a2..0af8201170 100644
--- a/compiler/simplCore/CoreMonad.lhs
+++ b/compiler/simplCore/CoreMonad.lhs
@@ -50,7 +50,8 @@ module CoreMonad (
     getAnnotations, getFirstAnnotations,
     
     -- ** Debug output
-    showPass, endPass, dumpPassResult, lintPassResult, dumpIfSet,
+    showPass, endPass, dumpPassResult, lintPassResult, 
+    lintInteractiveExpr, dumpIfSet,
 
     -- ** Screen output
     putMsg, putMsgS, errorMsg, errorMsgS, 
@@ -70,7 +71,7 @@ import Name( Name )
 import CoreSyn
 import PprCore
 import CoreUtils
-import CoreLint		( lintCoreBindings )
+import CoreLint		( lintCoreBindings, lintExpr )
 import HscTypes
 import Module
 import DynFlags
@@ -78,12 +79,13 @@ import StaticFlags
 import Rules            ( RuleBase )
 import BasicTypes       ( CompilerPhase(..) )
 import Annotations
-import Id		( Id )
 
 import IOEnv hiding     ( liftIO, failM, failWithM )
 import qualified IOEnv  ( liftIO )
 import TcEnv            ( tcLookupGlobal )
 import TcRnMonad        ( initTcForLookup )
+import Var
+import VarSet
 
 import Outputable
 import FastString
@@ -136,11 +138,12 @@ stuff before and after core passes, and do Core Lint when necessary.
 showPass :: DynFlags -> CoreToDo -> IO ()
 showPass dflags pass = Err.showPass dflags (showPpr dflags pass)
 
-endPass :: DynFlags -> CoreToDo -> CoreProgram -> [CoreRule] -> IO ()
-endPass dflags pass binds rules
+endPass :: HscEnv -> CoreToDo -> CoreProgram -> [CoreRule] -> IO ()
+endPass hsc_env pass binds rules
   = do { dumpPassResult dflags mb_flag (ppr pass) (pprPassDetails pass) binds rules
-       ; lintPassResult dflags pass binds }      
+       ; lintPassResult hsc_env pass binds }      
   where
+    dflags = hsc_dflags hsc_env 
     mb_flag = case coreDumpFlag pass of
                 Just flag | dopt flag dflags                    -> Just flag
                           | dopt Opt_D_verbose_core2core dflags -> Just flag
@@ -178,12 +181,16 @@ dumpPassResult dflags mb_flag hdr extra_info binds rules
                     , ptext (sLit "------ Local rules for imported ids --------")
                     , pprRules rules ]
 
-lintPassResult :: DynFlags -> CoreToDo -> CoreProgram -> IO ()
-lintPassResult dflags pass binds
-  = when (gopt Opt_DoCoreLinting dflags) $
-    do { let (warns, errs) = lintCoreBindings binds
+lintPassResult :: HscEnv -> CoreToDo -> CoreProgram -> IO ()
+lintPassResult hsc_env pass binds
+  | not (gopt Opt_DoCoreLinting dflags)
+  = return ()
+  | otherwise
+  = do { let (warns, errs) = lintCoreBindings (interactiveInScope hsc_env) binds
        ; Err.showPass dflags ("Core Linted result of " ++ showPpr dflags pass)
        ; displayLintResults dflags pass warns errs binds  }
+  where 
+    dflags = hsc_dflags hsc_env
 
 displayLintResults :: DynFlags -> CoreToDo
                    -> Bag Err.MsgDoc -> Bag Err.MsgDoc -> CoreProgram
@@ -191,7 +198,7 @@ displayLintResults :: DynFlags -> CoreToDo
 displayLintResults dflags pass warns errs binds
   | not (isEmptyBag errs)
   = do { log_action dflags dflags Err.SevDump noSrcSpan defaultDumpStyle
-           (vcat [ banner "errors", Err.pprMessageBag errs
+           (vcat [ lint_banner "errors" (ppr pass), Err.pprMessageBag errs
                  , ptext (sLit "*** Offending Program ***")
                  , pprCoreBindings binds
                  , ptext (sLit "*** End of Offense ***") ])
@@ -206,19 +213,66 @@ displayLintResults dflags pass warns errs binds
   , not opt_NoDebugOutput
   , showLintWarnings pass
   = log_action dflags dflags Err.SevDump noSrcSpan defaultDumpStyle
-        (banner "warnings" $$ Err.pprMessageBag warns)
+        (lint_banner "warnings" (ppr pass) $$ Err.pprMessageBag warns)
 
   | otherwise = return ()
   where
-    banner string = ptext (sLit "*** Core Lint")      <+> text string 
-                    <+> ptext (sLit ": in result of") <+> ppr pass
-                    <+> ptext (sLit "***")
+
+lint_banner :: String -> SDoc -> SDoc
+lint_banner string pass = ptext (sLit "*** Core Lint")      <+> text string 
+                          <+> ptext (sLit ": in result of") <+> pass
+                          <+> ptext (sLit "***")
 
 showLintWarnings :: CoreToDo -> Bool
 -- Disable Lint warnings on the first simplifier pass, because
 -- there may be some INLINE knots still tied, which is tiresomely noisy
 showLintWarnings (CoreDoSimplify _ (SimplMode { sm_phase = InitialPhase })) = False
 showLintWarnings _ = True
+
+lintInteractiveExpr :: String -> HscEnv -> CoreExpr -> IO ()
+lintInteractiveExpr what hsc_env expr
+  | not (gopt Opt_DoCoreLinting dflags)
+  = return ()
+  | Just err <- lintExpr (interactiveInScope hsc_env) expr 
+  = do { display_lint_err err
+       ; Err.ghcExit dflags 1 }
+  | otherwise
+  = return ()
+  where
+    dflags = hsc_dflags hsc_env
+
+    display_lint_err err
+      = do { log_action dflags dflags Err.SevDump noSrcSpan defaultDumpStyle
+               (vcat [ lint_banner "errors" (text what) 
+                     , err
+                     , ptext (sLit "*** Offending Program ***")
+                     , pprCoreExpr expr
+                     , ptext (sLit "*** End of Offense ***") ])
+           ; Err.ghcExit dflags 1 }
+
+interactiveInScope :: HscEnv -> [Var]
+-- In GHCi we may lint expressions, or bindings arising from 'deriving'
+-- clauses, that mention variables bound in the interactive context.
+-- These are Local things (see Note [Interactively-bound Ids in GHCi] in TcRnDriver).
+-- So we have to tell Lint about them, lest it reports them as out of scope.
+-- 
+-- We do this by find local-named things that may appear free in interactive
+-- context.  This function is pretty revolting and quite possibly not quite right.
+-- When we are not in GHCi, the interactive context (hsc_IC hsc_env) is empty
+-- so this is a (cheap) no-op.
+-- 
+-- See Trac #8215 for an example
+interactiveInScope hsc_env 
+  = tyvars ++ vars
+  where
+    ictxt  = hsc_IC hsc_env
+    te     = mkTypeEnvWithImplicits (ic_tythings ictxt ++ map AnId (ic_sys_vars ictxt))
+    vars   = typeEnvIds te
+    tyvars = varSetElems $ tyThingsTyVars $ typeEnvElts $ te
+              -- Why the type variables?  How can the top level envt have free tyvars?
+              -- I think it's becuase of the GHCi debugger, which can bind variables
+              --   f :: [t] -> [t]
+              -- where t is a RuntimeUnk (see TcType)
 \end{code}
 
 
diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs
index 75d5364f63..d17b0561f5 100644
--- a/compiler/simplCore/OccurAnal.lhs
+++ b/compiler/simplCore/OccurAnal.lhs
@@ -878,14 +878,13 @@ reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
         | isDFunId bndr = 9   -- Never choose a DFun as a loop breaker
                               -- Note [DFuns should not be loop breakers]
 
-        | Just inl_source <- isStableCoreUnfolding_maybe (idUnfolding bndr)
-        = case inl_source of
-             InlineWrapper    -> 10  -- Note [INLINE pragmas]
-             _other           ->  3  -- Data structures are more important than this
-                                     -- so that dictionary/method recursion unravels
-                -- Note that this case hits all InlineRule things, so we
-                -- never look at 'rhs' for InlineRule stuff. That's right, because
-                -- 'rhs' is irrelevant for inlining things with an InlineRule
+        | Just _ <- isStableCoreUnfolding_maybe (idUnfolding bndr)
+        = 3    -- Note [INLINE pragmas]
+               -- Data structures are more important than INLINE pragmas
+               -- so that dictionary/method recursion unravels
+               -- Note that this case hits all InlineRule things, so we
+               -- never look at 'rhs' for InlineRule stuff. That's right, because
+               -- 'rhs' is irrelevant for inlining things with an InlineRule
 
         | is_con_app rhs = 5  -- Data types help with cases: Note [Constructor applications]
 
@@ -968,32 +967,37 @@ Avoid choosing a function with an INLINE pramga as the loop breaker!
 If such a function is mutually-recursive with a non-INLINE thing,
 then the latter should be the loop-breaker.
 
-Usually this is just a question of optimisation. But a particularly
-bad case is wrappers generated by the demand analyser: if you make
-then into a loop breaker you may get an infinite inlining loop.  For
-example:
-  rec {
-        $wfoo x = ....foo x....
+   ----> Historical note, dating from when strictness wrappers
+   were generated from the strictness signatures:
 
-        {-loop brk-} foo x = ...$wfoo x...
-  }
-The interface file sees the unfolding for $wfoo, and sees that foo is
-strict (and hence it gets an auto-generated wrapper).  Result: an
-infinite inlining in the importing scope.  So be a bit careful if you
-change this.  A good example is Tree.repTree in
-nofib/spectral/minimax. If the repTree wrapper is chosen as the loop
-breaker then compiling Game.hs goes into an infinite loop.  This
-happened when we gave is_con_app a lower score than inline candidates:
-
-  Tree.repTree
-    = __inline_me (/\a. \w w1 w2 ->
-                   case Tree.$wrepTree @ a w w1 w2 of
-                    { (# ww1, ww2 #) -> Branch @ a ww1 ww2 })
-  Tree.$wrepTree
-    = /\a w w1 w2 ->
-      (# w2_smP, map a (Tree a) (Tree.repTree a w1 w) (w w2) #)
-
-Here we do *not* want to choose 'repTree' as the loop breaker.
+      Usually this is just a question of optimisation. But a particularly
+      bad case is wrappers generated by the demand analyser: if you make
+      then into a loop breaker you may get an infinite inlining loop.  For
+      example:
+        rec {
+              $wfoo x = ....foo x....
+
+              {-loop brk-} foo x = ...$wfoo x...
+        }
+      The interface file sees the unfolding for $wfoo, and sees that foo is
+      strict (and hence it gets an auto-generated wrapper).  Result: an
+      infinite inlining in the importing scope.  So be a bit careful if you
+      change this.  A good example is Tree.repTree in
+      nofib/spectral/minimax. If the repTree wrapper is chosen as the loop
+      breaker then compiling Game.hs goes into an infinite loop.  This
+      happened when we gave is_con_app a lower score than inline candidates:
+
+        Tree.repTree
+          = __inline_me (/\a. \w w1 w2 ->
+                         case Tree.$wrepTree @ a w w1 w2 of
+                          { (# ww1, ww2 #) -> Branch @ a ww1 ww2 })
+        Tree.$wrepTree
+          = /\a w w1 w2 ->
+            (# w2_smP, map a (Tree a) (Tree.repTree a w1 w) (w w2) #)
+
+      Here we do *not* want to choose 'repTree' as the loop breaker.
+
+   -----> End of historical note
 
 Note [DFuns should not be loop breakers]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/simplCore/SimplCore.lhs b/compiler/simplCore/SimplCore.lhs
index a3101f715e..4b07d3bb1c 100644
--- a/compiler/simplCore/SimplCore.lhs
+++ b/compiler/simplCore/SimplCore.lhs
@@ -370,10 +370,11 @@ runCorePasses passes guts
     do_pass guts CoreDoNothing = return guts
     do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
     do_pass guts pass
-       = do { dflags <- getDynFlags
+       = do { hsc_env <- getHscEnv
+            ; let dflags = hsc_dflags hsc_env
             ; liftIO $ showPass dflags pass
             ; guts' <- doCorePass dflags pass guts
-            ; liftIO $ endPass dflags pass (mg_binds guts') (mg_rules guts')
+            ; liftIO $ endPass hsc_env pass (mg_binds guts') (mg_rules guts')
             ; return guts' }
 
 doCorePass :: DynFlags -> CoreToDo -> ModGuts -> CoreM ModGuts
@@ -676,7 +677,8 @@ simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
            let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
 
                 -- Dump the result of this iteration
-           end_iteration dflags pass iteration_no counts1 binds2 rules1 ;
+           dump_end_iteration dflags iteration_no counts1 binds2 rules1 ;
+           lintPassResult hsc_env pass binds2 ;
 
                 -- Loop
            do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
@@ -693,11 +695,10 @@ simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
 simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
 
 -------------------
-end_iteration :: DynFlags -> CoreToDo -> Int
+dump_end_iteration :: DynFlags -> Int
              -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
-end_iteration dflags pass iteration_no counts binds rules
-  = do { dumpPassResult dflags mb_flag hdr pp_counts binds rules
-       ; lintPassResult dflags pass binds }
+dump_end_iteration dflags iteration_no counts binds rules
+  = dumpPassResult dflags mb_flag hdr pp_counts binds rules
   where
     mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_phases
             | otherwise                               = Nothing
diff --git a/compiler/specialise/Specialise.lhs b/compiler/specialise/Specialise.lhs
index bf73bec240..a175e5ed4b 100644
--- a/compiler/specialise/Specialise.lhs
+++ b/compiler/specialise/Specialise.lhs
@@ -10,7 +10,7 @@ module Specialise ( specProgram ) where
 
 import Id
 import TcType hiding( substTy, extendTvSubstList )
-import Type( TyVar, isDictTy, mkPiTypes, classifyPredType, PredTree(..), isIPClass )
+import Type   hiding( substTy, extendTvSubstList )
 import Coercion( Coercion )
 import CoreMonad
 import qualified CoreSubst
@@ -1614,10 +1614,10 @@ mkCallUDs env f args
     _trace_doc = vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts
                       , ppr (map (interestingDict env) dicts)]
     (tyvars, theta, _) = tcSplitSigmaTy (idType f)
-    constrained_tyvars = tyVarsOfTypes theta
+    constrained_tyvars = closeOverKinds (tyVarsOfTypes theta)
     n_tyvars           = length tyvars
     n_dicts            = length theta
-
+   
     spec_tys = [mk_spec_ty tv ty | (tv, Type ty) <- tyvars `zip` args]
     dicts    = [dict_expr | (_, dict_expr) <- theta `zip` (drop n_tyvars args)]
 
diff --git a/compiler/typecheck/TcBinds.lhs b/compiler/typecheck/TcBinds.lhs
index b8bef9e0b2..2a33955148 100644
--- a/compiler/typecheck/TcBinds.lhs
+++ b/compiler/typecheck/TcBinds.lhs
@@ -559,9 +559,8 @@ mkExport prag_fn qtvs theta (poly_name, mb_sig, mono_id)
               -- In the inference case (no signature) this stuff figures out
               -- the right type variables and theta to quantify over
               -- See Note [Impedence matching]
-              my_tvs1 = growThetaTyVars theta (tyVarsOfType mono_ty)
-              my_tvs2 = foldVarSet (\tv tvs -> tyVarsOfType (tyVarKind tv) `unionVarSet` tvs) 
-                                   my_tvs1 my_tvs1            -- Add kind variables!  Trac #7916
+              my_tvs2 = closeOverKinds (growThetaTyVars theta (tyVarsOfType mono_ty))
+                            -- Include kind variables!  Trac #7916
               my_tvs   = filter (`elemVarSet` my_tvs2) qtvs   -- Maintain original order
               my_theta = filter (quantifyPred my_tvs2) theta
               inferred_poly_ty = mkSigmaTy my_tvs my_theta mono_ty
diff --git a/compiler/typecheck/TcInstDcls.lhs b/compiler/typecheck/TcInstDcls.lhs
index 1481b2552d..9f89afe0af 100644
--- a/compiler/typecheck/TcInstDcls.lhs
+++ b/compiler/typecheck/TcInstDcls.lhs
@@ -409,7 +409,7 @@ tcInstDecls1 tycl_decls inst_decls deriv_decls
        -- Remove any handwritten instance of poly-kinded Typeable and warn
        ; dflags <- getDynFlags
        ; when (wopt Opt_WarnTypeableInstances dflags) $
-              mapM_ (addWarnTc . instMsg) typeable_instances
+              mapM_ (failWithTc . instMsg) typeable_instances
 
        -- Check that if the module is compiled with -XSafe, there are no
        -- hand written instances of old Typeable as then unsafe casts could be
@@ -444,7 +444,7 @@ tcInstDecls1 tycl_decls inst_decls deriv_decls
     typInstErr = ptext $ sLit $ "Can't create hand written instances of Typeable in Safe"
                               ++ " Haskell! Can only derive them"
 
-    instMsg i = hang (ptext (sLit $ "Typeable instances can only be derived; ignoring "
+    instMsg i = hang (ptext (sLit $ "Typeable instances can only be derived; replace "
                                  ++ "the following instance:"))
                      2 (pprInstance (iSpec i))
 
diff --git a/compiler/typecheck/TcMType.lhs b/compiler/typecheck/TcMType.lhs
index 481cb89ab0..6049d5be9f 100644
--- a/compiler/typecheck/TcMType.lhs
+++ b/compiler/typecheck/TcMType.lhs
@@ -511,11 +511,9 @@ quantifyTyVars :: TcTyVarSet -> TcTyVarSet -> TcM [TcTyVar]
 quantifyTyVars gbl_tvs tkvs
   = do { tkvs    <- zonkTyVarsAndFV tkvs
        ; gbl_tvs <- zonkTyVarsAndFV gbl_tvs
-       ; let (kvs1, tvs) = partitionVarSet isKindVar (tkvs `minusVarSet` gbl_tvs)
-             kvs2 = varSetElems (foldVarSet add_kvs kvs1 tvs
-                                 `minusVarSet` gbl_tvs )
-             add_kvs tv kvs = tyVarsOfType (tyVarKind tv) `unionVarSet` kvs 
+       ; let (kvs, tvs) = partitionVarSet isKindVar (closeOverKinds tkvs `minusVarSet` gbl_tvs)
                               -- NB kinds of tvs are zonked by zonkTyVarsAndFV
+             kvs2 = varSetElems kvs
              qtvs = varSetElems tvs                       
 
              -- In the non-PolyKinds case, default the kind variables
diff --git a/compiler/typecheck/TcSMonad.lhs b/compiler/typecheck/TcSMonad.lhs
index 0b2e484f7a..5674b47ee2 100644
--- a/compiler/typecheck/TcSMonad.lhs
+++ b/compiler/typecheck/TcSMonad.lhs
@@ -1639,6 +1639,7 @@ matchOpenFam :: TyCon -> [Type] -> TcS (Maybe FamInstMatch)
 matchOpenFam tycon args = wrapTcS $ tcLookupFamInst tycon args
 
 matchFam :: TyCon -> [Type] -> TcS (Maybe (TcCoercion, TcType))
+-- Given (F tys) return (ty, co), where co :: F tys ~ ty
 matchFam tycon args
   | isOpenSynFamilyTyCon tycon
   = do { maybe_match <- matchOpenFam tycon args
diff --git a/compiler/typecheck/TcTyClsDecls.lhs b/compiler/typecheck/TcTyClsDecls.lhs
index 70e72f593f..f4e4dabd1b 100644
--- a/compiler/typecheck/TcTyClsDecls.lhs
+++ b/compiler/typecheck/TcTyClsDecls.lhs
@@ -1524,14 +1524,16 @@ checkValidRoles tc
   = return ()
   where
     check_dc_roles datacon
-      = let univ_tvs   = dataConUnivTyVars datacon
-            ex_tvs     = dataConExTyVars datacon
-            args       = dataConRepArgTys datacon
-            univ_roles = zipVarEnv univ_tvs (tyConRoles tc)
+      = do { traceTc "check_dc_roles" (ppr datacon <+> ppr (tyConRoles tc))
+           ; mapM_ (check_ty_roles role_env Representational) $
+                    eqSpecPreds eq_spec ++ theta ++ arg_tys }
+                    -- See Note [Role-checking data constructor arguments] in TcTyDecls
+      where
+        (univ_tvs, ex_tvs, eq_spec, theta, arg_tys, _res_ty) = dataConFullSig datacon
+        univ_roles = zipVarEnv univ_tvs (tyConRoles tc)
               -- zipVarEnv uses zipEqual, but we don't want that for ex_tvs
-            ex_roles   = mkVarEnv (zip ex_tvs (repeat Nominal))
-            role_env   = univ_roles `plusVarEnv` ex_roles in
-        mapM_ (check_ty_roles role_env Representational) args
+        ex_roles   = mkVarEnv (zip ex_tvs (repeat Nominal))
+        role_env   = univ_roles `plusVarEnv` ex_roles
 
     check_ty_roles env role (TyVarTy tv)
       = case lookupVarEnv env tv of
diff --git a/compiler/typecheck/TcTyDecls.lhs b/compiler/typecheck/TcTyDecls.lhs
index bea2cd19be..5091cab802 100644
--- a/compiler/typecheck/TcTyDecls.lhs
+++ b/compiler/typecheck/TcTyDecls.lhs
@@ -615,6 +615,19 @@ roles(~#) = N, N
 With -dcore-lint on, the output of this algorithm is checked in checkValidRoles,
 called from checkValidTycon.
 
+Note [Role-checking data constructor arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+  data T a where
+    MkT :: Eq b => F a -> (a->a) -> T (G a)
+
+Then we want to check the roles at which 'a' is used
+in MkT's type.  We want to work on the user-written type,
+so we need to take into account
+  * the arguments:   (F a) and (a->a)
+  * the context:     C a b
+  * the result type: (G a)   -- this is in the eq_spec
+
 \begin{code}
 type RoleEnv    = NameEnv [Role]        -- from tycon names to roles
 type RoleAnnots = NameEnv [Maybe Role]  -- from tycon names to role annotations,
@@ -695,9 +708,12 @@ irClass tc_name cls
 -- See Note [Role inference]
 irDataCon :: Name -> DataCon -> RoleM ()
 irDataCon tc_name datacon
-  = addRoleInferenceInfo tc_name (dataConUnivTyVars datacon) $
-    let ex_var_set = mkVarSet $ dataConExTyVars datacon in
-    mapM_ (irType ex_var_set) (dataConRepArgTys datacon)
+  = addRoleInferenceInfo tc_name univ_tvs $
+    mapM_ (irType ex_var_set) (eqSpecPreds eq_spec ++ theta ++ arg_tys)
+      -- See Note [Role-checking data constructor arguments] 
+  where
+    (univ_tvs, ex_tvs, eq_spec, theta, arg_tys, _res_ty) = dataConFullSig datacon
+    ex_var_set = mkVarSet ex_tvs
 
 irType :: VarSet -> Type -> RoleM ()
 irType = go
diff --git a/compiler/typecheck/TcType.lhs b/compiler/typecheck/TcType.lhs
index 8a8de41159..af67808044 100644
--- a/compiler/typecheck/TcType.lhs
+++ b/compiler/typecheck/TcType.lhs
@@ -142,7 +142,7 @@ module TcType (
   isUnboxedTupleType,	-- Ditto
   isPrimitiveType, 
 
-  tyVarsOfType, tyVarsOfTypes,
+  tyVarsOfType, tyVarsOfTypes, closeOverKinds,
   tcTyVarsOfType, tcTyVarsOfTypes,
 
   pprKind, pprParendKind, pprSigmaType,
diff --git a/compiler/typecheck/TcValidity.lhs b/compiler/typecheck/TcValidity.lhs
index 968d8695cc..1d68ede3b7 100644
--- a/compiler/typecheck/TcValidity.lhs
+++ b/compiler/typecheck/TcValidity.lhs
@@ -70,21 +70,23 @@ checkAmbiguity ctxt ty
   | otherwise
   = do { allow_ambiguous <- xoptM Opt_AllowAmbiguousTypes
        ; unless allow_ambiguous $ 
-    do {(subst, _tvs) <- tcInstSkolTyVars (varSetElems (tyVarsOfType ty))
+    do { traceTc "Ambiguity check for" (ppr ty)
+       ; (subst, _tvs) <- tcInstSkolTyVars (varSetElems (tyVarsOfType ty))
        ; let ty' = substTy subst ty  
               -- The type might have free TyVars,
               -- so we skolemise them as TcTyVars
               -- Tiresome; but the type inference engine expects TcTyVars
-       ; (_wrap, wanted) <- addErrCtxtM (mk_msg ty') $
-                            captureConstraints $
-                            tcSubType (AmbigOrigin ctxt) ctxt ty' ty'
 
          -- Solve the constraints eagerly because an ambiguous type
-         -- can cause a cascade of further errors.  The free tyvars
-         -- are skolemised, so we can safely use tcSimplifyTop
-       ; _ev_binds <- simplifyTop wanted
-
-       ; return () } } 
+         -- can cause a cascade of further errors.  Since the free 
+         -- tyvars are skolemised, we can safely use tcSimplifyTop
+       ; addErrCtxtM (mk_msg ty') $
+         do { (_wrap, wanted) <- captureConstraints $
+                                 tcSubType (AmbigOrigin ctxt) ctxt ty' ty'
+            ; _ev_binds <- simplifyTop wanted
+            ; return () }
+
+       ; traceTc "Done ambiguity check for" (ppr ty) } }
  where
    mk_msg ty tidy_env 
      = return (tidy_env', msg)
@@ -174,7 +176,8 @@ checkValidType ctxt ty
         -- Check that the thing has kind Type, and is lifted if necessary
         -- Do this second, because we can't usefully take the kind of an 
         -- ill-formed type such as (a~Int)
-       ; check_kind ctxt ty }
+       ; check_kind ctxt ty
+       ; traceTc "checkValidType done" (ppr ty <+> text "::" <+> ppr (typeKind ty)) }
 
 checkValidMonoType :: Type -> TcM ()
 checkValidMonoType ty = check_mono_type SigmaCtxt MustBeMonoType ty
diff --git a/compiler/types/Coercion.lhs b/compiler/types/Coercion.lhs
index 6cda16b9ec..b0da3edd53 100644
--- a/compiler/types/Coercion.lhs
+++ b/compiler/types/Coercion.lhs
@@ -802,7 +802,7 @@ isReflCo_maybe _                 = Nothing
 mkCoVarCo :: CoVar -> Coercion
 -- cv :: s ~# t
 mkCoVarCo cv
-  | ty1 `eqType` ty2 = Refl Nominal ty1
+  | ty1 `eqType` ty2 = Refl (coVarRole cv) ty1
   | otherwise        = CoVarCo cv
   where
     (ty1, ty2) = ASSERT( isCoVar cv ) coVarKind cv
@@ -1360,8 +1360,7 @@ subst_co subst co
 
 substCoVar :: CvSubst -> CoVar -> Coercion
 substCoVar (CvSubst in_scope _ cenv) cv
-  | Just co  <- lookupVarEnv cenv cv      = ASSERT2( coercionRole co == Nominal, ppr co )
-                                            co
+  | Just co  <- lookupVarEnv cenv cv      = co
   | Just cv1 <- lookupInScope in_scope cv = ASSERT( isCoVar cv1 ) CoVarCo cv1
   | otherwise = WARN( True, ptext (sLit "substCoVar not in scope") <+> ppr cv $$ ppr in_scope)
                 ASSERT( isCoVar cv ) CoVarCo cv
diff --git a/compiler/types/Type.lhs b/compiler/types/Type.lhs
index 8596dde439..5753aba0c1 100644
--- a/compiler/types/Type.lhs
+++ b/compiler/types/Type.lhs
@@ -85,7 +85,7 @@ module Type (
         constraintKindTyCon, anyKindTyCon,
 
         -- * Type free variables
-        tyVarsOfType, tyVarsOfTypes,
+        tyVarsOfType, tyVarsOfTypes, closeOverKinds,
         expandTypeSynonyms,
         typeSize, varSetElemsKvsFirst,
 
@@ -171,7 +171,6 @@ import Util
 import Outputable
 import FastString
 
-import Data.List        ( partition )
 import Maybes           ( orElse )
 import Data.Maybe       ( isJust )
 import Control.Monad    ( guard )
@@ -995,13 +994,6 @@ typeSize (AppTy t1 t2)   = typeSize t1 + typeSize t2
 typeSize (FunTy t1 t2)   = typeSize t1 + typeSize t2
 typeSize (ForAllTy _ t)  = 1 + typeSize t
 typeSize (TyConApp _ ts) = 1 + sum (map typeSize ts)
-
-varSetElemsKvsFirst :: VarSet -> [TyVar]
--- {k1,a,k2,b} --> [k1,k2,a,b]
-varSetElemsKvsFirst set
-  = kvs ++ tvs
-  where
-    (kvs, tvs) = partition isKindVar (varSetElems set)
 \end{code}
 
 
diff --git a/compiler/types/TypeRep.lhs b/compiler/types/TypeRep.lhs
index cb5b8f0f18..2b127369d4 100644
--- a/compiler/types/TypeRep.lhs
+++ b/compiler/types/TypeRep.lhs
@@ -45,7 +45,7 @@ module TypeRep (
         pprPrefixApp, pprArrowChain, ppr_type,
 
         -- Free variables
-        tyVarsOfType, tyVarsOfTypes,
+        tyVarsOfType, tyVarsOfTypes, closeOverKinds, varSetElemsKvsFirst,
 
         -- * Tidying type related things up for printing
         tidyType,      tidyTypes,
@@ -85,7 +85,7 @@ import StaticFlags( opt_PprStyle_Debug )
 import Util
 
 -- libraries
-import Data.List( mapAccumL )
+import Data.List( mapAccumL, partition )
 import qualified Data.Data        as Data hiding ( TyCon )
 \end{code}
 
@@ -327,6 +327,20 @@ tyVarsOfType (ForAllTy tyvar ty) = delVarSet (tyVarsOfType ty) tyvar
 
 tyVarsOfTypes :: [Type] -> TyVarSet
 tyVarsOfTypes tys = foldr (unionVarSet . tyVarsOfType) emptyVarSet tys
+
+closeOverKinds :: TyVarSet -> TyVarSet
+-- Add the kind variables free in the kinds
+-- of the tyvars in the given set
+closeOverKinds tvs
+  = foldVarSet (\tv ktvs -> tyVarsOfType (tyVarKind tv) `unionVarSet` ktvs) 
+               tvs tvs
+
+varSetElemsKvsFirst :: VarSet -> [TyVar]
+-- {k1,a,k2,b} --> [k1,k2,a,b]
+varSetElemsKvsFirst set
+  = kvs ++ tvs
+  where
+    (kvs, tvs) = partition isKindVar (varSetElems set)
 \end{code}
 
 %************************************************************************
diff --git a/docs/users_guide/7.8.1-notes.xml b/docs/users_guide/7.8.1-notes.xml
index 4719403e66..85d2f61053 100644
--- a/docs/users_guide/7.8.1-notes.xml
+++ b/docs/users_guide/7.8.1-notes.xml
@@ -236,7 +236,7 @@
                     dynamically-linkable <literal>.dyn_o</literal>
                     file. The output suffix of the dynamic objects can
                     be controlled by the flag
-                    <literal>-dyno</literal>.
+                    <literal>-dynosuf</literal>.
                </para>
 
                 <para>
@@ -318,6 +318,23 @@
         <itemizedlist>
             <listitem>
                 <para>
+                    The RTS linker can now unload object code at
+                    runtime (when using the GHC API
+                    <literal>ObjLink</literal> module.) Previously,
+                    GHC would not unload the old object file, causing
+                    a gradual memory leak as more objects were loaded
+                    over time.
+               </para>
+
+                <para>
+                    Note that this change in unloading behavior
+                    <emphasis>only</emphasis> affects statically
+                    linked binaries, and not dynamic ones.
+               </para>
+           </listitem>
+
+            <listitem>
+                <para>
                     The performance of <literal>StablePtr</literal>s and
                     <literal>StableName</literal>s has been improved.
                </para>
diff --git a/docs/users_guide/flags.xml b/docs/users_guide/flags.xml
index 648180c184..ee577e6ead 100644
--- a/docs/users_guide/flags.xml
+++ b/docs/users_guide/flags.xml
@@ -699,23 +699,10 @@
             <entry><option>-fno-glasgow-exts</option></entry>
           </row>
           <row>
-            <entry><option>-XOverlappingInstances</option></entry>
-            <entry>Enable <link linkend="instance-overlap">overlapping instances</link></entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoOverlappingInstances</option></entry>
-          </row>
-          <row>
-            <entry><option>-XIncoherentInstances</option></entry>
-            <entry>Enable <link linkend="instance-overlap">incoherent instances</link>.
-              Implies <option>-XOverlappingInstances</option> </entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoIncoherentInstances</option></entry>
-          </row>
-          <row>
-            <entry><option>-XUndecidableInstances</option></entry>
-            <entry>Enable <link linkend="undecidable-instances">undecidable instances</link></entry>
+            <entry><option>-firrefutable-tuples</option></entry>
+            <entry>Make tuple pattern matching irrefutable</entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoUndecidableInstances</option></entry>
+            <entry><option>-fno-irrefutable-tuples</option></entry>
           </row>
           <row>
             <entry><option>-fcontext-stack=N</option><replaceable>n</replaceable></entry>
@@ -724,6 +711,14 @@
             <entry></entry>
           </row>
           <row>
+            <entry><option>-XAllowAmbiguousTypes</option></entry>
+            <entry>Allow the user to write <link linkend="ambiguity">ambiguous types</link>, 
+                   and the type inference engine to infer them.
+                </entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoAllowAmbiguousTypes</option></entry>
+          </row>
+          <row>
             <entry><option>-XArrows</option></entry>
             <entry>Enable <link linkend="arrow-notation">arrow
                 notation</link> extension</entry>
@@ -731,6 +726,43 @@
             <entry><option>-XNoArrows</option></entry>
           </row>
           <row>
+            <entry><option>-XAutoDeriveTypeable</option></entry>
+            <entry>Automatically <link linkend="auto-derive-typeable">derive Typeable instances for every datatype and type class declaration</link>.
+            Implies <option>-XDeriveDataTypeable</option>.</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoAutoDeriveTypeable</option></entry>
+          </row>
+          <row>
+            <entry><option>-XConstraintKinds</option></entry>
+            <entry>Enable a <link linkend="constraint-kind">kind of constraints</link>.</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoConstraintKinds</option></entry>
+          </row>
+          <row>
+            <entry><option>-XDataKinds</option></entry>
+            <entry>Enable <link linkend="promotion">datatype promotion</link>.</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoDataKinds</option></entry>
+          </row>
+          <row>
+            <entry><option>-XDeriveDataTypeable</option></entry>
+            <entry>Enable <link linkend="deriving-typeable">deriving for the Data and Typeable classes</link>.</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoDeriveDataTypeable</option></entry>
+          </row>
+          <row>
+            <entry><option>-XDeriveGeneric</option></entry>
+            <entry>Enable <link linkend="deriving-typeable">deriving for the Generic class</link>.</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoDeriveGeneric</option></entry>
+          </row>
+          <row>
+            <entry><option>-XGeneralizedNewtypeDeriving</option></entry>
+            <entry>Enable <link linkend="newtype-deriving">newtype deriving</link>.</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoGeneralizedNewtypeDeriving</option></entry>
+          </row>
+          <row>
             <entry><option>-XDisambiguateRecordFields</option></entry>
             <entry>Enable <link linkend="disambiguate-fields">record
                 field disambiguation</link></entry>
@@ -738,6 +770,19 @@
             <entry><option>-XNoDisambiguateRecordFields</option></entry>
           </row>
           <row>
+            <entry><option>-XEmptyCase</option></entry>
+            <entry>Allow <link linkend="empty-case">empty case alternatives</link>
+                  </entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoEmptyCase</option></entry>
+          </row>
+          <row>
+            <entry><option>-XExtendedDefaultRules</option></entry>
+            <entry>Use GHCi's <link linkend="extended-default-rules">extended default rules</link> in a normal module</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoExtendedDefaultRules</option></entry>
+          </row>
+          <row>
             <entry><option>-XForeignFunctionInterface</option></entry>
             <entry>Enable <link linkend="ffi">foreign function interface</link> (implied by
               <option>-fglasgow-exts</option>)</entry>
@@ -745,6 +790,20 @@
             <entry><option>-XNoForeignFunctionInterface</option></entry>
           </row>
           <row>
+            <entry><option>-XGADTs</option></entry>
+            <entry>Enable <link linkend="gadt">generalised algebraic data types</link>.
+            </entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoGADTs</option></entry>
+          </row>
+          <row>
+            <entry><option>-XGADTSyntax</option></entry>
+            <entry>Enable <link linkend="gadt-style">generalised algebraic data type syntax</link>.
+            </entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoGADTSyntax</option></entry>
+          </row>
+          <row>
             <entry><option>-XGenerics</option></entry>
             <entry>Deprecated, does nothing. No longer enables <link linkend="generic-classes">generic classes</link>.
               See also GHC's support for
@@ -760,22 +819,17 @@
             <entry><option>-XNoImplicitParams</option></entry>
           </row>
           <row>
-            <entry><option>-firrefutable-tuples</option></entry>
-            <entry>Make tuple pattern matching irrefutable</entry>
-            <entry>dynamic</entry>
-            <entry><option>-fno-irrefutable-tuples</option></entry>
-          </row>
-          <row>
             <entry><option>-XNoImplicitPrelude</option></entry>
             <entry>Don't implicitly <literal>import Prelude</literal></entry>
             <entry>dynamic</entry>
             <entry><option>-XImplicitPrelude</option></entry>
           </row>
           <row>
-            <entry><option>-XRebindableSyntax</option></entry>
-            <entry>Employ <link linkend="rebindable-syntax">rebindable syntax</link></entry>
+            <entry><option>-XIncoherentInstances</option></entry>
+            <entry>Enable <link linkend="instance-overlap">incoherent instances</link>.
+              Implies <option>-XOverlappingInstances</option> </entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoRebindableSyntax</option></entry>
+            <entry><option>-XNoIncoherentInstances</option></entry>
           </row>
           <row>
             <entry><option>-XNoMonomorphismRestriction</option></entry>
@@ -784,16 +838,16 @@
             <entry><option>-XMonomorphismRrestriction</option></entry>
           </row>
           <row>
-            <entry><option>-XNoNPlusKPatterns</option></entry>
-            <entry>Disable support for <literal>n+k</literal> patterns</entry>
+            <entry><option>-XNegativeLiterals</option></entry>
+            <entry>Enable support for <link linkend="negative-literals">negative literals</link></entry>
             <entry>dynamic</entry>
-            <entry><option>-XNPlusKPatterns</option></entry>
+            <entry><option>-XNoNegativeLiterals</option></entry>
           </row>
           <row>
-            <entry><option>-XNegativeLiterals</option></entry>
-            <entry>Enable support for negative literals</entry>
+            <entry><option>-XNoNPlusKPatterns</option></entry>
+            <entry>Disable support for <literal>n+k</literal> patterns</entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoNegativeLiterals</option></entry>
+            <entry><option>-XNPlusKPatterns</option></entry>
           </row>
           <row>
             <entry><option>-XNumDecimals</option></entry>
@@ -802,22 +856,17 @@
             <entry><option>-XNoNumDecimals</option></entry>
           </row>
           <row>
-            <entry><option>-XNoTraditionalRecordSyntax</option></entry>
-            <entry>Disable support for traditional record syntax (as supported by Haskell 98) <literal>C {f = x}</literal></entry>
-            <entry>dynamic</entry>
-            <entry><option>-XTraditionalRecordSyntax</option></entry>
-          </row>
-          <row>
-            <entry><option>-XRelaxedPolyRec</option></entry>
-            <entry>Relaxed checking for <link linkend="typing-binds">mutually-recursive polymorphic functions</link></entry>
+            <entry><option>-XOverlappingInstances</option></entry>
+            <entry>Enable <link linkend="instance-overlap">overlapping instances</link></entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoRelaxedPolyRec</option></entry>
+            <entry><option>-XNoOverlappingInstances</option></entry>
           </row>
           <row>
-            <entry><option>-XExtendedDefaultRules</option></entry>
-            <entry>Use GHCi's <link linkend="extended-default-rules">extended default rules</link> in a normal module</entry>
+            <entry><option>-XOverloadedLists</option></entry>
+            <entry>Enable <link linkend="overloaded-lists">overloaded lists</link>.
+            </entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoExtendedDefaultRules</option></entry>
+            <entry><option>-XNoOverloadedLists</option></entry>
           </row>
           <row>
             <entry><option>-XOverloadedStrings</option></entry>
@@ -827,25 +876,22 @@
             <entry><option>-XNoOverloadedStrings</option></entry>
           </row>
           <row>
-            <entry><option>-XOverloadedLists</option></entry>
-            <entry>Enable <link linkend="overloaded-lists">overloaded lists</link>.
-            </entry>
+            <entry><option>-XQuasiQuotes</option></entry>
+            <entry>Enable <link linkend="th-quasiquotation">quasiquotation</link>.</entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoOverloadedLists</option></entry>
+            <entry><option>-XNoQuasiQuotes</option></entry>
           </row>
           <row>
-            <entry><option>-XGADTs</option></entry>
-            <entry>Enable <link linkend="gadt">generalised algebraic data types</link>.
-            </entry>
+            <entry><option>-XRelaxedPolyRec</option></entry>
+            <entry>Relaxed checking for <link linkend="typing-binds">mutually-recursive polymorphic functions</link></entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoGADTs</option></entry>
+            <entry><option>-XNoRelaxedPolyRec</option></entry>
           </row>
           <row>
-            <entry><option>-XGADTSyntax</option></entry>
-            <entry>Enable <link linkend="gadt-style">generalised algebraic data type syntax</link>.
-            </entry>
+            <entry><option>-XNoTraditionalRecordSyntax</option></entry>
+            <entry>Disable support for traditional record syntax (as supported by Haskell 98) <literal>C {f = x}</literal></entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoGADTSyntax</option></entry>
+            <entry><option>-XTraditionalRecordSyntax</option></entry>
           </row>
           <row>
             <entry><option>-XTypeFamilies</option></entry>
@@ -854,16 +900,10 @@
             <entry><option>-XNoTypeFamilies</option></entry>
           </row>
           <row>
-            <entry><option>-XConstraintKinds</option></entry>
-            <entry>Enable a <link linkend="constraint-kind">kind of constraints</link>.</entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoConstraintKinds</option></entry>
-          </row>
-          <row>
-            <entry><option>-XDataKinds</option></entry>
-            <entry>Enable <link linkend="promotion">datatype promotion</link>.</entry>
+            <entry><option>-XUndecidableInstances</option></entry>
+            <entry>Enable <link linkend="undecidable-instances">undecidable instances</link></entry>
             <entry>dynamic</entry>
-            <entry><option>-XNoDataKinds</option></entry>
+            <entry><option>-XNoUndecidableInstances</option></entry>
           </row>
           <row>
             <entry><option>-XPolyKinds</option></entry>
@@ -873,13 +913,6 @@
             <entry><option>-XNoPolyKinds</option></entry>
           </row>
           <row>
-            <entry><option>-XScopedTypeVariables</option></entry>
-            <entry>Enable <link linkend="scoped-type-variables">lexically-scoped type variables</link>.
-              Implied by <option>-fglasgow-exts</option>.</entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoScopedTypeVariables</option></entry>
-          </row>
-          <row>
             <entry><option>-XMonoLocalBinds</option></entry>
             <entry>Enable <link linkend="mono-local-binds">do not generalise local bindings</link>.
               Implied by <option>-XTypeFamilies</option> and <option>-XGADTs</option>.
@@ -888,6 +921,19 @@
             <entry><option>-XNoMonoLocalBinds</option></entry>
           </row>
           <row>
+            <entry><option>-XRebindableSyntax</option></entry>
+            <entry>Employ <link linkend="rebindable-syntax">rebindable syntax</link></entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoRebindableSyntax</option></entry>
+          </row>
+          <row>
+            <entry><option>-XScopedTypeVariables</option></entry>
+            <entry>Enable <link linkend="scoped-type-variables">lexically-scoped type variables</link>.
+              Implied by <option>-fglasgow-exts</option>.</entry>
+            <entry>dynamic</entry>
+            <entry><option>-XNoScopedTypeVariables</option></entry>
+          </row>
+          <row>
             <entry><option>-XTemplateHaskell</option></entry>
             <entry>Enable <link linkend="template-haskell">Template Haskell</link>.
               No longer implied by <option>-fglasgow-exts</option>.</entry>
@@ -895,12 +941,6 @@
             <entry><option>-XNoTemplateHaskell</option></entry>
           </row>
           <row>
-            <entry><option>-XQuasiQuotes</option></entry>
-            <entry>Enable <link linkend="th-quasiquotation">quasiquotation</link>.</entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoQuasiQuotes</option></entry>
-          </row>
-          <row>
             <entry><option>-XBangPatterns</option></entry>
             <entry>Enable <link linkend="bang-patterns">bang patterns</link>.</entry>
             <entry>dynamic</entry>
@@ -1033,7 +1073,9 @@
           </row>
           <row>
             <entry><option>-XExplicitNamespaces</option></entry>
-            <entry>Enable using the keyword <literal>type</literal> to specify the namespace of entries in imports and exports.</entry>
+            <entry>Enable using the keyword <literal>type</literal> to specify the namespace of 
+                   entries in imports and exports (<xref linkend="explicit-namespaces"/>).
+                   Implied by <option>-XTypeOperators</option> and <option>-XTypeFamilies</option>.</entry>
             <entry>dynamic</entry>
             <entry><option>-XNoExplicitNamespaces</option></entry>
           </row>
@@ -1080,31 +1122,6 @@
             <entry><option>-XNoStandaloneDeriving</option></entry>
           </row>
           <row>
-            <entry><option>-XDeriveDataTypeable</option></entry>
-            <entry>Enable <link linkend="deriving-typeable">deriving for the Data and Typeable classes</link>.</entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoDeriveDataTypeable</option></entry>
-          </row>
-          <row>
-            <entry><option>-XAutoDeriveTypeable</option></entry>
-            <entry>Automatically <link linkend="auto-derive-typeable">derive Typeable instances for every datatype and type class declaration</link>.
-            Implies <option>-XDeriveDataTypeable</option>.</entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoAutoDeriveTypeable</option></entry>
-          </row>
-          <row>
-            <entry><option>-XDeriveGeneric</option></entry>
-            <entry>Enable <link linkend="deriving-typeable">deriving for the Generic class</link>.</entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoDeriveGeneric</option></entry>
-          </row>
-          <row>
-            <entry><option>-XGeneralizedNewtypeDeriving</option></entry>
-            <entry>Enable <link linkend="newtype-deriving">newtype deriving</link>.</entry>
-            <entry>dynamic</entry>
-            <entry><option>-XNoGeneralizedNewtypeDeriving</option></entry>
-          </row>
-          <row>
             <entry><option>-XTypeSynonymInstances</option></entry>
             <entry>Enable <link linkend="flexible-instance-head">type synonyms in instance heads</link>.</entry>
             <entry>dynamic</entry>
@@ -1190,14 +1207,14 @@
             <entry><option>-</option></entry>
           </row>
           <row>
-            <entry><option>-fpackage-trust</option></entry>
-            <entry>Enable <link linkend="safe-haskell">Safe Haskell</link> trusted package requirement for trustworty modules.</entry>
+            <entry><option>-XTypeHoles</option></entry>
+            <entry>Enable <link linkend="type-holes">holes</link> in expressions.</entry>
             <entry>dynamic</entry>
-            <entry><option>-</option></entry>
+            <entry><option>--XNoTypeHoles</option></entry>
           </row>
           <row>
-            <entry><option>-XTypeHoles</option></entry>
-            <entry>Enable <link linkend="type-holes">holes</link> in expressions.</entry>
+            <entry><option>-fpackage-trust</option></entry>
+            <entry>Enable <link linkend="safe-haskell">Safe Haskell</link> trusted package requirement for trustworty modules.</entry>
             <entry>dynamic</entry>
             <entry><option>-</option></entry>
           </row>
diff --git a/docs/users_guide/glasgow_exts.xml b/docs/users_guide/glasgow_exts.xml
index 46e4cbdb01..c825da90c7 100644
--- a/docs/users_guide/glasgow_exts.xml
+++ b/docs/users_guide/glasgow_exts.xml
@@ -2112,6 +2112,36 @@ import safe qualified Network.Socket as NS
 
 </sect2>
 
+<sect2 id="explicit-namespaces">
+<title>Explicit namespaces in import/export</title>
+
+<para> In an import or export list, such as 
+<programlisting>
+  module M( f, (++) ) where ...
+    import N( f, (++) ) 
+    ...
+</programlisting>
+the entities <literal>f</literal> and <literal>(++)</literal> are <emphasis>values</emphasis>.
+However, with type operators (<xref linkend="type-operators"/>) it becomes possible
+to declare <literal>(++)</literal> as a <emphasis>type constructor</emphasis>.  In that
+case, how would you export or import it?
+</para>
+<para>
+The <option>-XExplicitNamespaces</option> extension allows you to prefix the name of 
+a type constructor in an import or export list with "<literal>type</literal>" to 
+disambiguate this case, thus:
+<programlisting>
+  module M( f, type (++) ) where ...
+    import N( f, type (++) ) 
+    ...
+  module N( f, type (++) ) where
+    data family a ++ b = L a | R b
+</programlisting>
+The extension <option>-XExplicitNamespaces</option>
+is implied by <option>-XTypeOperators</option> and (for some reason) by <option>-XTypeFamilies</option>.
+</para>
+</sect2>
+
 <sect2 id="syntax-stolen">
 <title>Summary of stolen syntax</title>
 
@@ -2385,11 +2415,13 @@ There is now some potential ambiguity in import and export lists; for example
 if you write <literal>import M( (+) )</literal> do you mean the 
 <emphasis>function</emphasis> <literal>(+)</literal> or the 
 <emphasis>type constructor</emphasis> <literal>(+)</literal>?
-The default is the former, but GHC allows you to specify the latter
+The default is the former, but with <option>-XExplicitNamespaces</option> (which is implied
+by <option>-XExplicitTypeOperators</option>) GHC allows you to specify the latter
 by preceding it with the keyword <literal>type</literal>, thus:
 <programlisting>
 import M( type (+) )
 </programlisting>
+See <xref linkend="explicit-namespaces"/>.
 </para></listitem>
 <listitem><para>
 The fixity of a type operator may be set using the usual fixity declarations
@@ -4642,30 +4674,44 @@ The willingness to be overlapped or incoherent is a property of
 the <emphasis>instance declaration</emphasis> itself, controlled by the
 presence or otherwise of the <option>-XOverlappingInstances</option>
 and <option>-XIncoherentInstances</option> flags when that module is
-being defined.  Specifically, during the lookup process:
+being defined.  Suppose we are searching for an instance of the 
+<emphasis>target constraint</emphasis> <literal>(C ty1 .. tyn)</literal>.
+The search works like this.
 <itemizedlist>
 <listitem><para>
-If the constraint being looked up matches two instance declarations IA and IB,
-and
-<itemizedlist>
-<listitem><para>IB is a substitution instance of IA (but not vice versa);
-that is, IB is strictly more specific than IA</para></listitem>
-<listitem><para>either IA or IB was compiled with <option>-XOverlappingInstances</option></para></listitem>
-</itemizedlist>
-then the less-specific instance IA is ignored.
+Find all instances I that <emphasis>match</emphasis> the target constraint;
+that is, the target constraint is a substitution instance of I.  These
+instance declarations are the <emphasis>candidates</emphasis>.
 </para></listitem>
+
+<listitem><para>
+Find all <emphasis>non-candidate</emphasis> instances 
+that <emphasis>unify</emphasis> with the target constraint.
+Such non-candidates instances might match when the target constraint is further
+instantiated.  If all of them were compiled with
+<option>-XIncoherentInstances</option>, proceed; if not, the search fails.
+</para></listitem>
+
 <listitem><para>
-Suppose an instance declaration does not match the constraint being looked up, but
-does <emphasis>unify</emphasis> with it, so that it might match when the constraint is further
-instantiated.  Usually GHC will regard this as a reason for not committing to
-some other constraint.  But if the instance declaration was compiled with
-<option>-XIncoherentInstances</option>, GHC will skip the "does-it-unify?"
-check for that declaration.
+Eliminate any candidate IX for which both of the following hold:
+
+<itemizedlist>
+<listitem><para>There is another candidate IY that is strictly more specific;
+that is, IY is a substitution instance of IX but not vice versa.
+</para></listitem>
+<listitem><para>Either IX or IY was compiled with 
+<option>-XOverlappingInstances</option>.
 </para></listitem>
+</itemizedlist>
+
+</para></listitem>
+
 <listitem><para>
-If two instance declarations are matched and either is compiled with
-<option>-XIncoherentInstances</option>, then that declaration is ignored.
+If only one candidate remains, pick it.
+Otherwise if all remaining candidates were compiled with
+<option>-XInccoherentInstances</option>, pick an arbitrary candidate.
 </para></listitem>
+
 </itemizedlist>
 These rules make it possible for a library author to design a library that relies on
 overlapping instances without the library client having to know.
@@ -6386,106 +6432,91 @@ The flag <option>-XFlexibleContexts</option> also lifts the corresponding
 restriction on class declarations (<xref linkend="superclass-rules"/>) and instance declarations
 (<xref linkend="instance-rules"/>).
 </para>
+</sect2>
 
-<para>
-GHC imposes the following restrictions on the constraints in a type signature.
-Consider the type:
+<sect2 id="ambiguity"><title>Ambiguous types and the ambiguity check</title>
 
+<para>
+Each user-written type signature is subjected to an
+<emphasis>ambiguity check</emphasis>.  
+The ambiguity check rejects functions that can never be called; for example:
 <programlisting>
-  forall tv1..tvn (c1, ...,cn) => type
+   f :: C a => Int
 </programlisting>
-
-(Here, we write the "foralls" explicitly, although the Haskell source
-language omits them; in Haskell 98, all the free type variables of an
-explicit source-language type signature are universally quantified,
-except for the class type variables in a class declaration.  However,
-in GHC, you can give the foralls if you want.  See <xref linkend="explicit-foralls"/>).
+The idea is there can be no legal calls to <literal>f</literal> because every call will
+give rise to an ambiguous constraint.  
 </para>
-
 <para>
-
-<orderedlist>
-<listitem>
-
+The <emphasis>only</emphasis> purpose of the
+ambiguity check is to report functions that cannot possibly be called.
+We could soundly omit the
+ambiguity check on type signatures entirely, at the expense of
+delaying ambiguity errors to call sites.  Indeed, the language extension
+<option>-XAllowAmbiguousTypes</option> switches off the ambiguity check.
+</para>
 <para>
- <emphasis>Each universally quantified type variable
-<literal>tvi</literal> must be reachable from <literal>type</literal></emphasis>.
-
-A type variable <literal>a</literal> is "reachable" if it appears
-in the same constraint as either a type variable free in
-<literal>type</literal>, or another reachable type variable.
-A value with a type that does not obey
-this reachability restriction cannot be used without introducing
-ambiguity; that is why the type is rejected.
-Here, for example, is an illegal type:
-
-
+Ambiguity can be subtle.  Consider this example which uses functional dependencies:
 <programlisting>
-  forall a. Eq a => Int
+   class D a b | a -> b where ..
+   h :: D Int b => Int 
 </programlisting>
-
-
-When a value with this type was used, the constraint <literal>Eq tv</literal>
-would be introduced where <literal>tv</literal> is a fresh type variable, and
-(in the dictionary-translation implementation) the value would be
-applied to a dictionary for <literal>Eq tv</literal>.  The difficulty is that we
-can never know which instance of <literal>Eq</literal> to use because we never
-get any more information about <literal>tv</literal>.
-</para>
-<para>
-Note
-that the reachability condition is weaker than saying that <literal>a</literal> is
-functionally dependent on a type variable free in
-<literal>type</literal> (see <xref
-linkend="functional-dependencies"/>).  The reason for this is there
-might be a "hidden" dependency, in a superclass perhaps.  So
-"reachable" is a conservative approximation to "functionally dependent".
-For example, consider:
+The <literal>Int</literal> may well fix <literal>b</literal> at the call site, so that signature should
+not be rejected.  Moreover, the dependencies might be hidden. Consider
+<programlisting>
+   class X a b where ...
+   class D a b | a -> b where ...
+   instance D a b => X [a] b where...
+   h :: X a b => a -> a
+</programlisting>
+Here <literal>h</literal>'s type looks ambiguous in <literal>b</literal>, but here's a legal call:
 <programlisting>
-  class C a b | a -> b where ...
-  class C a b => D a b where ...
-  f :: forall a b. D a b => a -> a
+   ...(h [True])...
 </programlisting>
-This is fine, because in fact <literal>a</literal> does functionally determine <literal>b</literal>
-but that is not immediately apparent from <literal>f</literal>'s type.
+That gives rise to a <literal>(X [Bool] beta)</literal> constraint, and using the
+instance means we need <literal>(D Bool beta)</literal> and that 
+fixes <literal>beta</literal> via <literal>D</literal>'s
+fundep!
 </para>
-</listitem>
-<listitem>
-
 <para>
- <emphasis>Every constraint <literal>ci</literal> must mention at least one of the
-universally quantified type variables <literal>tvi</literal></emphasis>.
-
-For example, this type is OK because <literal>C a b</literal> mentions the
-universally quantified type variable <literal>b</literal>:
-
-
+Behind all these special cases there is a simple guiding principle. 
+Consider
 <programlisting>
-  forall a. C a b => burble
-</programlisting>
+  f :: <replaceable>type</replaceable>
+  f = ...blah...
 
-
-The next type is illegal because the constraint <literal>Eq b</literal> does not
-mention <literal>a</literal>:
-
-
-<programlisting>
-  forall a. Eq b => burble
+  g :: <replaceable>type</replaceable>
+  g = f
 </programlisting>
-
-
-The reason for this restriction is milder than the other one.  The
-excluded types are never useful or necessary (because the offending
-context doesn't need to be witnessed at this point; it can be floated
-out).  Furthermore, floating them out increases sharing. Lastly,
-excluding them is a conservative choice; it leaves a patch of
-territory free in case we need it later.
-
+You would think that the definition of <literal>g</literal> would surely typecheck!
+After all <literal>f</literal> has exactly the same type, and <literal>g=f</literal>. 
+But in fact <literal>f</literal>'s type
+is instantiated and the instantiated constraints are solved against
+the constraints bound by <literal>g</literal>'s signature.  So, in the case an ambiguous type, solving will fail.
+For example, consider the earlier definition <literal>f :: C a => Int</literal>.  Then in <literal>g</literal>'s definition,
+we'll instantiate to <literal>(C alpha)</literal> and try to 
+deduce <literal>(C alpha)</literal> from <literal>(C a)</literal>,
+and fail.  
+</para>
+<para>
+So in fact we use this as our <emphasis>definition</emphasis> of ambiguity: a type 
+<literal><replaceable>ty</replaceable></literal> is
+ambiguious if and only if <literal>((undefined :: <replaceable>ty</replaceable>) 
+:: <replaceable>ty</replaceable>)</literal> would fail to typecheck.  We use a
+very similar test for <emphasis>inferred</emphasis> types, to ensure that they too are
+unambiguous. 
 </para>
-</listitem>
-
-</orderedlist>
 
+<para>
+<emphasis>A historical note.</emphasis>
+GHC used to impose some more restrictive and less principled conditions
+on type signatures. For type type
+<literal>forall tv1..tvn (c1, ...,cn) => type</literal>
+GHC used to require (a) that each universally quantified type variable
+<literal>tvi</literal> must be "reachable" from <literal>type</literal>, 
+and (b) that every constraint <literal>ci</literal> mentions at least one of the
+universally quantified type variables <literal>tvi</literal>.
+These ad-hoc restrictions are completely subsumed by the new ambiguity check.
+<emphasis>End of historical note.</emphasis>
 </para>
 
 </sect2>
@@ -6498,9 +6529,7 @@ territory free in case we need it later.
 J Lewis, MB Shields, E Meijer, J Launchbury,
 27th ACM Symposium on Principles of Programming Languages (POPL'00),
 Boston, Jan 2000.
-</para>
-
-<para>(Most of the following, still rather incomplete, documentation is
+(Most of the following, still rather incomplete, documentation is
 due to Jeff Lewis.)</para>
 
 <para>Implicit parameter support is enabled with the option
@@ -7612,8 +7641,9 @@ Failed, modules loaded: none.
 This ensures that an unbound identifier is never reported with a too polymorphic type, like
 <literal>forall a. a</literal>, when used multiple times for types that can not be unified.
 </para>
-
 </sect2>
+
+
 </sect1>
 <!-- ==================== End of type system extensions =================  -->
 
diff --git a/driver/ghc-usage.txt b/driver/ghc-usage.txt
index b45199bdc1..4eda2c1724 100644
--- a/driver/ghc-usage.txt
+++ b/driver/ghc-usage.txt
@@ -77,4 +77,7 @@ online copy can be found here:
 
    http://haskell.org/haskellwiki/GHC
 
+If you *really* want to see every option, then you can pass
+'--show-options' to the compiler.
+
 ------------------------------------------------------------------------
diff --git a/includes/rts/Linker.h b/includes/rts/Linker.h
index e900e858c1..28f0a0e928 100644
--- a/includes/rts/Linker.h
+++ b/includes/rts/Linker.h
@@ -16,8 +16,10 @@
 
 #if defined(mingw32_HOST_OS)
 typedef wchar_t pathchar;
+#define PATH_FMT "ls"
 #else
 typedef char    pathchar;
+#define PATH_FMT "s"
 #endif
 
 /* initialize the object linker */
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index b0ed03b814..876f39a02c 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -364,9 +364,12 @@ RTS_FUN_DECL(stg_word64ToIntegerzh);
 #endif
 
 RTS_FUN_DECL(stg_unsafeThawArrayzh);
+RTS_FUN_DECL(stg_casArrayzh);
 RTS_FUN_DECL(stg_newByteArrayzh);
 RTS_FUN_DECL(stg_newPinnedByteArrayzh);
 RTS_FUN_DECL(stg_newAlignedPinnedByteArrayzh);
+RTS_FUN_DECL(stg_casIntArrayzh);
+RTS_FUN_DECL(stg_fetchAddIntArrayzh);
 RTS_FUN_DECL(stg_newArrayzh);
 RTS_FUN_DECL(stg_newArrayArrayzh);
 
diff --git a/includes/stg/SMP.h b/includes/stg/SMP.h
index 99c25fe355..07ea522328 100644
--- a/includes/stg/SMP.h
+++ b/includes/stg/SMP.h
@@ -52,13 +52,14 @@ EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w);
 EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
 
 /*
- * Atomic increment
+ * Atomic addition by the provided quantity
  *
- * atomic_inc(p) {
- *   return ++(*p);
+ * atomic_inc(p, n) {
+ *   return ((*p) += n);
  * }
  */
-EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p);
+EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord n);
+
 
 /*
  * Atomic decrement
@@ -236,22 +237,24 @@ cas(StgVolatilePtr p, StgWord o, StgWord n)
 #endif
 }
 
+// RRN: Generalized to arbitrary increments to enable fetch-and-add in
+// Haskell code (fetchAddIntArray#).
 EXTERN_INLINE StgWord
-atomic_inc(StgVolatilePtr p)
+atomic_inc(StgVolatilePtr p, StgWord incr)
 {
 #if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
     StgWord r;
-    r = 1;
+    r = incr;
     __asm__ __volatile__ (
         "lock\nxadd %0,%1":
             "+r" (r), "+m" (*p):
     );
-    return r+1;
+    return r + incr;
 #else
     StgWord old, new;
     do {
         old = *p;
-        new = old + 1;
+        new = old + incr;
     } while (cas(p, old, new) != old);
     return new;
 #endif
@@ -393,12 +396,14 @@ cas(StgVolatilePtr p, StgWord o, StgWord n)
     return result;
 }
 
-INLINE_HEADER StgWord
-atomic_inc(StgVolatilePtr p)
+EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord incr);
+EXTERN_INLINE StgWord
+atomic_inc(StgVolatilePtr p, StgWord incr)
 {
-    return ++(*p);
+    return ((*p) += incr);
 }
 
+
 INLINE_HEADER StgWord
 atomic_dec(StgVolatilePtr p)
 {
diff --git a/libraries/haskeline b/libraries/haskeline
-Subproject 40bcd6ac30577d1d240166674d1e328ac52c1fd
+Subproject 495b00b4f125c0b4e7f1e966cd961eae89cffdc
diff --git a/libraries/time b/libraries/time
-Subproject 12ba4321d34d646cf9040ad12810c4257d26ade
+Subproject d4f019b2c6a332be5443b5bf88d0c7fef91523c
diff --git a/mk/config.mk.in b/mk/config.mk.in
index 0c2003b680..6402ac5680 100644
--- a/mk/config.mk.in
+++ b/mk/config.mk.in
@@ -490,6 +490,7 @@ endif
 TargetPlatformFull    = @TargetPlatformFull@
 GccLT34         = @GccLT34@
 GccLT46         = @GccLT46@
+GccIsClang      = @GccIsClang@
 
 CC              = $(WhatGccIsCalled)
 CC_STAGE0       = @CC_STAGE0@
diff --git a/mk/install.mk.in b/mk/install.mk.in
index 2808c1f784..d6f68abd19 100644
--- a/mk/install.mk.in
+++ b/mk/install.mk.in
@@ -105,7 +105,7 @@ else
 # to be in the same place (and things like ghc-pkg need to agree on
 # where package.conf is, so we just set it globally).
 #
-ghclibdir     = $(libdir)/ghc-$(ProjectVersion)
+ghclibdir     = $(libdir)/$(CrossCompilePrefix)ghc-$(ProjectVersion)
 ghcdocdir     = $(datarootdir)/doc/ghc
 endif
 
diff --git a/mk/validate-settings.mk b/mk/validate-settings.mk
index 9aa824d5bc..ba645a4190 100644
--- a/mk/validate-settings.mk
+++ b/mk/validate-settings.mk
@@ -10,6 +10,8 @@ HADDOCK_DOCS    = YES
 #####################
 # Warnings
 
+ifneq "$(GccIsClang)" "YES"
+
 # Debian doesn't turn -Werror=unused-but-set-variable on by default, so
 # we turn it on explicitly for consistency with other users
 ifeq "$(GccLT46)" "NO"
@@ -18,6 +20,13 @@ SRC_CC_WARNING_OPTS += -Werror=unused-but-set-variable
 SRC_CC_WARNING_OPTS += -Wno-error=inline
 endif
 
+else
+
+# Don't warn about unknown GCC pragmas when using clang
+SRC_CC_WARNING_OPTS += -Wno-unknown-pragmas
+
+endif
+
 SRC_CC_OPTS     += $(WERROR) -Wall
 SRC_HC_OPTS     += $(WERROR) -Wall
 
diff --git a/packages b/packages
index ecba5c8252..39d8d8da49 100644
--- a/packages
+++ b/packages
@@ -2,7 +2,7 @@
 # the *repositories* that make up GHC. It is parsed by
 # * boot
 # * sync-all
-# * rules/extra-packages.mk
+# * rules/foreachLibrary.mk
 #
 # Some of this information is duplicated elsewhere in the build system:
 #    See Trac #3896
@@ -20,11 +20,13 @@
 #    - nofib and testsuite are optional helpers
 #
 # The format of the lines in this file is:
-#   localpath    tag    remotepath
+#   localpath  tag  remotepath  upstreamurl
 # where
 #   * 'localpath' is where to put the repository in a checked out tree.
 #   * 'remotepath' is where the repository is in the central repository.
 #     It is - for submodules.
+#   * 'upstreamurl' is the upstream Git repo location for packages
+#     maintained outside of GHC HQ.
 #
 #   * The 'tag' determines when "darcs-all get" will get the
 #     repo. If the tag is "-" then it will always get it, but if there
@@ -39,47 +41,47 @@
 #
 # Lines that start with a '#' are comments.
 #
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#    localpath                tag           remotepath
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-ghc-tarballs                    windows     ghc-tarballs.git
-libffi-tarballs                 -           libffi-tarballs.git
-utils/hsc2hs                    -           hsc2hs.git
-utils/haddock                   -           haddock.git
-libraries/array                 -           packages/array.git
-libraries/base                  -           packages/base.git
-libraries/binary                -           -
-libraries/bytestring            -           -
-libraries/Cabal                 -           -
-libraries/containers            -           -
-libraries/deepseq               -           packages/deepseq.git
-libraries/directory             -           packages/directory.git
-libraries/filepath              -           packages/filepath.git
-libraries/ghc-prim              -           packages/ghc-prim.git
-libraries/haskeline             -           -
-libraries/haskell98             -           packages/haskell98.git
-libraries/haskell2010           -           packages/haskell2010.git
-libraries/hoopl                 -           packages/hoopl.git
-libraries/hpc                   -           packages/hpc.git
-libraries/integer-gmp           -           packages/integer-gmp.git
-libraries/integer-simple        -           packages/integer-simple.git
-libraries/old-locale            -           packages/old-locale.git
-libraries/old-time              -           packages/old-time.git
-libraries/pretty                -           -
-libraries/process               -           packages/process.git
-libraries/template-haskell      -           packages/template-haskell.git
-libraries/terminfo              -           -
-libraries/time                  -           -
-libraries/transformers          -           -
-libraries/unix                  -           packages/unix.git
-libraries/Win32                 -           -
-libraries/xhtml                 -           -
-testsuite                       testsuite   testsuite.git
-nofib                           nofib       nofib.git
-libraries/parallel              extra       packages/parallel.git
-libraries/stm                   extra       packages/stm.git
-libraries/random                dph         -
-libraries/primitive             dph         -
-libraries/vector                dph         -
-libraries/dph                   dph         packages/dph.git
-.                               -           ghc.git
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# localpath                  tag         remotepath                      upstreamurl
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ghc-tarballs                 windows     ghc-tarballs.git                -
+libffi-tarballs              -           libffi-tarballs.git             -
+utils/hsc2hs                 -           hsc2hs.git                      -
+utils/haddock                -           haddock.git                     -
+libraries/array              -           packages/array.git              -
+libraries/base               -           packages/base.git               -
+libraries/binary             -           -                               https://github.com/kolmodin/binary.git
+libraries/bytestring         -           -                               https://github.com/haskell/bytestring.git
+libraries/Cabal              -           -                               https://github.com/haskell/cabal.git
+libraries/containers         -           -                               https://github.com/haskell/containers.git
+libraries/deepseq            -           packages/deepseq.git            -
+libraries/directory          -           packages/directory.git          -
+libraries/filepath           -           packages/filepath.git           -
+libraries/ghc-prim           -           packages/ghc-prim.git           -
+libraries/haskeline          -           -                               http://git.haskell.org/darcs-mirrors/haskeline.git
+libraries/haskell98          -           packages/haskell98.git          -
+libraries/haskell2010        -           packages/haskell2010.git        -
+libraries/hoopl              -           packages/hoopl.git              -
+libraries/hpc                -           packages/hpc.git                -
+libraries/integer-gmp        -           packages/integer-gmp.git        -
+libraries/integer-simple     -           packages/integer-simple.git     -
+libraries/old-locale         -           packages/old-locale.git         -
+libraries/old-time           -           packages/old-time.git           -
+libraries/pretty             -           -                               https://github.com/haskell/pretty.git
+libraries/process            -           packages/process.git            -
+libraries/template-haskell   -           packages/template-haskell.git   -
+libraries/terminfo           -           -                               http://git.haskell.org/darcs-mirrors/terminfo.git
+libraries/time               -           -                               http://git.haskell.org/darcs-mirrors/time.git
+libraries/transformers       -           -                               http://git.haskell.org/darcs-mirrors/transformers.git
+libraries/unix               -           packages/unix.git               -
+libraries/Win32              -           -                               https://github.com/haskell/win32.git
+libraries/xhtml              -           -                               https://github.com/haskell/xhtml.git
+testsuite                    testsuite   testsuite.git                   -
+nofib                        nofib       nofib.git                       -
+libraries/parallel           extra       packages/parallel.git           -
+libraries/stm                extra       packages/stm.git                -
+libraries/random             dph         -                               -
+libraries/primitive          dph         -                               http://git.haskell.org/darcs-mirrors/primitive.git
+libraries/vector             dph         -                               http://git.haskell.org/darcs-mirrors/vector.git
+libraries/dph                dph         packages/dph.git                -
+.                            -           ghc.git                         -
+\ No newline at end of file
diff --git a/rts/Capability.c b/rts/Capability.c
index 811df582a8..5988d4205c 100644
--- a/rts/Capability.c
+++ b/rts/Capability.c
@@ -35,7 +35,13 @@ Capability MainCapability;
 
 nat n_capabilities = 0;
 nat enabled_capabilities = 0;
-Capability *capabilities = NULL;
+
+// The array of Capabilities.  It's important that when we need
+// to allocate more Capabilities we don't have to move the existing
+// Capabilities, because there may be pointers to them in use
+// (e.g. threads in waitForReturnCapability(), see #8209), so this is
+// an array of Capability* rather than an array of Capability.
+Capability **capabilities = NULL;
 
 // Holds the Capability which last became free.  This is used so that
 // an in-call has a chance of quickly finding a free Capability.
@@ -126,7 +132,7 @@ findSpark (Capability *cap)
       /* visit cap.s 0..n-1 in sequence until a theft succeeds. We could
       start at a random place instead of 0 as well.  */
       for ( i=0 ; i < n_capabilities ; i++ ) {
-          robbed = &capabilities[i];
+          robbed = capabilities[i];
           if (cap == robbed)  // ourselves...
               continue;
 
@@ -169,7 +175,7 @@ anySparks (void)
     nat i;
 
     for (i=0; i < n_capabilities; i++) {
-        if (!emptySparkPoolCap(&capabilities[i])) {
+        if (!emptySparkPoolCap(capabilities[i])) {
             return rtsTrue;
         }
     }
@@ -323,7 +329,8 @@ initCapabilities( void )
 #else /* !THREADED_RTS */
 
     n_capabilities = 1;
-    capabilities = &MainCapability;
+    capabilities = stgMallocBytes(sizeof(Capability*), "initCapabilities");
+    capabilities[0] = &MainCapability;
     initCapability(&MainCapability, 0);
 
 #endif
@@ -333,46 +340,40 @@ initCapabilities( void )
     // There are no free capabilities to begin with.  We will start
     // a worker Task to each Capability, which will quickly put the
     // Capability on the free list when it finds nothing to do.
-    last_free_capability = &capabilities[0];
+    last_free_capability = capabilities[0];
 }
 
-Capability *
+void
 moreCapabilities (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
 {
 #if defined(THREADED_RTS)
     nat i;
-    Capability *old_capabilities = capabilities;
+    Capability **old_capabilities = capabilities;
+
+    capabilities = stgMallocBytes(to * sizeof(Capability*), "moreCapabilities");
 
     if (to == 1) {
         // THREADED_RTS must work on builds that don't have a mutable
         // BaseReg (eg. unregisterised), so in this case
 	// capabilities[0] must coincide with &MainCapability.
-        capabilities = &MainCapability;
-    } else {
-        capabilities = stgMallocBytes(to * sizeof(Capability),
-                                      "moreCapabilities");
-
-        if (from > 0) {
-            memcpy(capabilities, old_capabilities, from * sizeof(Capability));
-        }
+        capabilities[0] = &MainCapability;
     }
 
-    for (i = from; i < to; i++) {
-	initCapability(&capabilities[i], i);
+    for (i = 0; i < to; i++) {
+        if (i < from) {
+            capabilities[i] = old_capabilities[i];
+        } else {
+            capabilities[i] = stgMallocBytes(sizeof(Capability),
+                                             "moreCapabilities");
+            initCapability(capabilities[i], i);
+        }
     }
 
-    last_free_capability = &capabilities[0];
-
     debugTrace(DEBUG_sched, "allocated %d more capabilities", to - from);
 
-    // Return the old array to free later.
-    if (from > 1) {
-        return old_capabilities;
-    } else {
-        return NULL;
+    if (old_capabilities != NULL) {
+        stgFree(old_capabilities);
     }
-#else
-    return NULL;
 #endif
 }
 
@@ -385,7 +386,7 @@ void contextSwitchAllCapabilities(void)
 {
     nat i;
     for (i=0; i < n_capabilities; i++) {
-        contextSwitchCapability(&capabilities[i]);
+        contextSwitchCapability(capabilities[i]);
     }
 }
 
@@ -393,7 +394,7 @@ void interruptAllCapabilities(void)
 {
     nat i;
     for (i=0; i < n_capabilities; i++) {
-        interruptCapability(&capabilities[i]);
+        interruptCapability(capabilities[i]);
     }
 }
 
@@ -606,8 +607,8 @@ waitForReturnCapability (Capability **pCap, Task *task)
 	    // otherwise, search for a free capability
             cap = NULL;
 	    for (i = 0; i < n_capabilities; i++) {
-		if (!capabilities[i].running_task) {
-                    cap = &capabilities[i];
+                if (!capabilities[i]->running_task) {
+                    cap = capabilities[i];
 		    break;
 		}
 	    }
@@ -955,7 +956,7 @@ shutdownCapabilities(Task *task, rtsBool safe)
     nat i;
     for (i=0; i < n_capabilities; i++) {
         ASSERT(task->incall->tso == NULL);
-        shutdownCapability(&capabilities[i], task, safe);
+        shutdownCapability(capabilities[i], task, safe);
     }
 #if defined(THREADED_RTS)
     ASSERT(checkSparkCountInvariant());
@@ -981,11 +982,13 @@ freeCapabilities (void)
 #if defined(THREADED_RTS)
     nat i;
     for (i=0; i < n_capabilities; i++) {
-        freeCapability(&capabilities[i]);
+        freeCapability(capabilities[i]);
+        stgFree(capabilities[i]);
     }
 #else
     freeCapability(&MainCapability);
 #endif
+    stgFree(capabilities);
     traceCapsetDelete(CAPSET_OSPROCESS_DEFAULT);
     traceCapsetDelete(CAPSET_CLOCKDOMAIN_DEFAULT);
 }
@@ -1032,7 +1035,7 @@ markCapabilities (evac_fn evac, void *user)
 {
     nat n;
     for (n = 0; n < n_capabilities; n++) {
-        markCapability(evac, user, &capabilities[n], rtsFalse);
+        markCapability(evac, user, capabilities[n], rtsFalse);
     }
 }
 
@@ -1044,13 +1047,13 @@ rtsBool checkSparkCountInvariant (void)
     nat i;
 
     for (i = 0; i < n_capabilities; i++) {
-        sparks.created   += capabilities[i].spark_stats.created;
-        sparks.dud       += capabilities[i].spark_stats.dud;
-        sparks.overflowed+= capabilities[i].spark_stats.overflowed;
-        sparks.converted += capabilities[i].spark_stats.converted;
-        sparks.gcd       += capabilities[i].spark_stats.gcd;
-        sparks.fizzled   += capabilities[i].spark_stats.fizzled;
-        remaining        += sparkPoolSize(capabilities[i].sparks);
+        sparks.created   += capabilities[i]->spark_stats.created;
+        sparks.dud       += capabilities[i]->spark_stats.dud;
+        sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
+        sparks.converted += capabilities[i]->spark_stats.converted;
+        sparks.gcd       += capabilities[i]->spark_stats.gcd;
+        sparks.fizzled   += capabilities[i]->spark_stats.fizzled;
+        remaining        += sparkPoolSize(capabilities[i]->sparks);
     }
     
     /* The invariant is
diff --git a/rts/Capability.h b/rts/Capability.h
index 3348f88e4d..9de47357c4 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -132,8 +132,8 @@ struct Capability_ {
     StgTRecHeader *free_trec_headers;
     nat transaction_tokens;
 } // typedef Capability is defined in RtsAPI.h
-  // Capabilities are stored in an array, so make sure that adjacent
-  // Capabilities don't share any cache-lines:
+  // We never want a Capability to overlap a cache line with anything
+  // else, so round it up to a cache line size:
 #ifndef mingw32_HOST_OS
   ATTRIBUTE_ALIGNED(64)
 #endif
@@ -181,7 +181,7 @@ void initCapabilities (void);
 
 // Add and initialise more Capabilities
 //
-Capability * moreCapabilities (nat from, nat to);
+void moreCapabilities (nat from, nat to);
 
 // Release a capability.  This is called by a Task that is exiting
 // Haskell to make a foreign call, or in various other cases when we
@@ -211,7 +211,7 @@ INLINE_HEADER void releaseCapability_ (Capability* cap STG_UNUSED,
 
 // Array of all the capabilities
 //
-extern Capability *capabilities;
+extern Capability **capabilities;
 
 // The Capability that was last free.  Used as a good guess for where
 // to assign new threads.
diff --git a/rts/CheckUnload.c b/rts/CheckUnload.c
index a758b06db3..d3fb8934b5 100644
--- a/rts/CheckUnload.c
+++ b/rts/CheckUnload.c
@@ -246,7 +246,7 @@ void checkUnload (StgClosure *static_objects)
   HashTable *addrs;
   StgClosure* p;
   const StgInfoTable *info;
-  ObjectCode *oc, *prev;
+  ObjectCode *oc, *prev, *next;
   gen_workspace *ws;
   StgClosure* link;
 
@@ -254,7 +254,7 @@ void checkUnload (StgClosure *static_objects)
 
   // Mark every unloadable object as unreferenced initially
   for (oc = unloaded_objects; oc; oc = oc->next) {
-      IF_DEBUG(linker, debugBelch("Checking whether to unload %s\n",
+      IF_DEBUG(linker, debugBelch("Checking whether to unload %" PATH_FMT "\n",
                                   oc->fileName));
       oc->referenced = rtsFalse;
   }
@@ -283,19 +283,20 @@ void checkUnload (StgClosure *static_objects)
   // marked as unreferenced can be physically unloaded, because we
   // have no references to it.
   prev = NULL;
-  for (oc = unloaded_objects; oc; prev = oc, oc = oc->next) {
+  for (oc = unloaded_objects; oc; prev = oc, oc = next) {
+      next = oc->next;
       if (oc->referenced == 0) {
           if (prev == NULL) {
               unloaded_objects = oc->next;
           } else {
               prev->next = oc->next;
           }
-          IF_DEBUG(linker, debugBelch("Unloading object file %s\n",
+          IF_DEBUG(linker, debugBelch("Unloading object file %" PATH_FMT "\n",
                                       oc->fileName));
           freeObjectCode(oc);
       } else {
-          IF_DEBUG(linker, debugBelch("Object file still in use: %s\n",
-                                      oc->fileName));
+          IF_DEBUG(linker, debugBelch("Object file still in use: %"
+                                      PATH_FMT "\n", oc->fileName));
       }
   }
 
diff --git a/rts/Linker.c b/rts/Linker.c
index 4961ecf053..c8451b0425 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -169,7 +169,6 @@ static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
 #define struct_stat struct _stat
 #define open wopen
 #define WSTR(s) L##s
-#define PATH_FMT "S"
 #else
 #define pathcmp strcmp
 #define pathlen strlen
@@ -177,7 +176,6 @@ static ObjectCode* mkOc( pathchar *path, char *image, int imageSize,
 #define pathstat stat
 #define struct_stat struct stat
 #define WSTR(s) s
-#define PATH_FMT "s"
 #endif
 
 static pathchar* pathdup(pathchar *path)
@@ -1140,8 +1138,11 @@ typedef struct _RtsSymbolVal {
       SymI_HasProto(stg_labelThreadzh)                                  \
       SymI_HasProto(stg_newArrayzh)                                     \
       SymI_HasProto(stg_newArrayArrayzh)                                \
+      SymI_HasProto(stg_casArrayzh)                                     \
       SymI_HasProto(stg_newBCOzh)                                       \
       SymI_HasProto(stg_newByteArrayzh)                                 \
+      SymI_HasProto(stg_casIntArrayzh)                                  \
+      SymI_HasProto(stg_fetchAddIntArrayzh)                             \
       SymI_HasProto_redirect(newCAF, newDynCAF)                         \
       SymI_HasProto(stg_newMVarzh)                                      \
       SymI_HasProto(stg_newMutVarzh)                                    \
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index d8acaef77b..3cfec94b72 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -137,6 +137,34 @@ stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment )
     return (p);
 }
 
+// RRN: This one does not use the "ticketing" approach because it
+// deals in unboxed scalars, not heap pointers.
+stg_casIntArrayzh( gcptr arr, W_ ind, W_ old, W_ new )
+/* MutableByteArray# s -> Int# -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
+{
+    gcptr p;
+    W_ h;
+
+    p = arr + SIZEOF_StgArrWords + WDS(ind);
+    (h) = ccall cas(p, old, new);
+
+    return(h);
+}
+
+
+stg_fetchAddIntArrayzh( gcptr arr, W_ ind, W_ incr )
+/* MutableByteArray# s -> Int# -> Int# -> State# s -> (# State# s, Int# #) */
+{
+    gcptr p;
+    W_ h;
+
+    p = arr + SIZEOF_StgArrWords + WDS(ind);
+    (h) = ccall atomic_inc(p, incr);
+
+    return(h);
+}
+
+
 stg_newArrayzh ( W_ n /* words */, gcptr init )
 {
     W_ words, size;
@@ -206,6 +234,29 @@ stg_unsafeThawArrayzh ( gcptr arr )
   }
 }
 
+// RRN: Uses the ticketed approach; see casMutVar
+stg_casArrayzh ( gcptr arr, W_ ind, gcptr old, gcptr new )
+/* MutableArray# s a -> Int# -> a -> a -> State# s -> (# State# s, Int#, Any a #) */
+{
+    gcptr p, h;
+    W_ len;
+
+    p = arr + SIZEOF_StgMutArrPtrs + WDS(ind);
+    (h) = ccall cas(p, old, new);
+    
+    if (h != old) {
+        // Failure, return what was there instead of 'old':
+        return (1,h);
+    } else {
+        // Compare and Swap Succeeded:
+        SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS);
+        len = StgMutArrPtrs_ptrs(arr);
+        // The write barrier.  We must write a byte into the mark table:
+        I8[arr + SIZEOF_StgMutArrPtrs + WDS(len) + (ind >> MUT_ARR_PTRS_CARD_BITS )] = 1;
+        return (0,new);
+    }
+}
+
 stg_newArrayArrayzh ( W_ n /* words */ )
 {
     W_ words, size;
@@ -262,8 +313,13 @@ stg_newMutVarzh ( gcptr init )
     return (mv);
 }
 
+// RRN: To support the "ticketed" approach, we return the NEW rather
+// than old value if the CAS is successful.  This is received in an
+// opaque form in the Haskell code, preventing the compiler from
+// changing its pointer identity.  The ticket can then be safely used
+// in future CAS operations.
 stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
- /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */
+ /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, Any a #) */
 {
     gcptr h;
 
@@ -275,7 +331,7 @@ stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new )
         if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) {
            ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr");
         }
-        return (0,h);
+        return (0,new);
     }
 }
 
diff --git a/rts/Profiling.c b/rts/Profiling.c
index ec38c927c2..0c673261dc 100644
--- a/rts/Profiling.c
+++ b/rts/Profiling.c
@@ -149,7 +149,7 @@ initProfiling1 (void)
     {
         nat n;
         for (n=0; n < n_capabilities; n++) {
-            capabilities[n].r.rCCCS = CCS_SYSTEM;
+            capabilities[n]->r.rCCCS = CCS_SYSTEM;
         }
     }
 
diff --git a/rts/Proftimer.c b/rts/Proftimer.c
index 7ee32f835c..6458f6e095 100644
--- a/rts/Proftimer.c
+++ b/rts/Proftimer.c
@@ -76,7 +76,7 @@ handleProfTick(void)
     if (do_prof_ticks) {
         nat n;
         for (n=0; n < n_capabilities; n++) {
-            capabilities[n].r.rCCCS->time_ticks++;
+            capabilities[n]->r.rCCCS->time_ticks++;
         }
     }
 #endif
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index dc21149d98..8cf88487f2 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -1789,7 +1789,7 @@ computeRetainerSet( void )
 	// because we can find MUT_VAR objects which have not been
 	// visited during retainer profiling.
         for (n = 0; n < n_capabilities; n++) {
-          for (bd = capabilities[n].mut_lists[g]; bd != NULL; bd = bd->link) {
+          for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) {
 	    for (ml = bd->start; ml < bd->free; ml++) {
 
 		maybeInitRetainerSet((StgClosure *)*ml);
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 408146f195..07ebec62b1 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -376,7 +376,7 @@ schedule (Capability *initialCapability, Task *task)
     // it was originally on.
 #ifdef THREADED_RTS
     if (cap->disabled && !t->bound) {
-        Capability *dest_cap = &capabilities[cap->no % enabled_capabilities];
+        Capability *dest_cap = capabilities[cap->no % enabled_capabilities];
         migrateThread(cap, t, dest_cap);
         continue;
     }
@@ -716,7 +716,7 @@ schedulePushWork(Capability *cap USED_IF_THREADS,
 
     // First grab as many free Capabilities as we can.
     for (i=0, n_free_caps=0; i < n_capabilities; i++) {
-	cap0 = &capabilities[i];
+        cap0 = capabilities[i];
         if (cap != cap0 && !cap0->disabled && tryGrabCapability(cap0,task)) {
 	    if (!emptyRunQueue(cap0)
                 || cap0->returning_tasks_hd != NULL
@@ -1395,7 +1395,7 @@ static void acquireAllCapabilities(Capability *cap, Task *task)
 
     for (i=0; i < n_capabilities; i++) {
         debugTrace(DEBUG_sched, "grabbing all the capabilies (%d/%d)", i, n_capabilities);
-        tmpcap = &capabilities[i];
+        tmpcap = capabilities[i];
         if (tmpcap != cap) {
             // we better hope this task doesn't get migrated to
             // another Capability while we're waiting for this one.
@@ -1418,8 +1418,8 @@ static void releaseAllCapabilities(nat n, Capability *cap, Task *task)
 
     for (i = 0; i < n; i++) {
         if (cap->no != i) {
-            task->cap = &capabilities[i];
-            releaseCapability(&capabilities[i]);
+            task->cap = capabilities[i];
+            releaseCapability(capabilities[i]);
         }
     }
     task->cap = cap;
@@ -1540,21 +1540,21 @@ scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
             || (RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
                 collect_gen >= RtsFlags.ParFlags.parGcLoadBalancingGen)) {
             for (i=0; i < n_capabilities; i++) {
-                if (capabilities[i].disabled) {
-                    idle_cap[i] = tryGrabCapability(&capabilities[i], task);
+                if (capabilities[i]->disabled) {
+                    idle_cap[i] = tryGrabCapability(capabilities[i], task);
                 } else {
                     idle_cap[i] = rtsFalse;
                 }
             }
         } else {
             for (i=0; i < n_capabilities; i++) {
-                if (capabilities[i].disabled) {
-                    idle_cap[i] = tryGrabCapability(&capabilities[i], task);
+                if (capabilities[i]->disabled) {
+                    idle_cap[i] = tryGrabCapability(capabilities[i], task);
                 } else if (i == cap->no ||
-                           capabilities[i].idle < RtsFlags.ParFlags.parGcNoSyncWithIdle) {
+                           capabilities[i]->idle < RtsFlags.ParFlags.parGcNoSyncWithIdle) {
                     idle_cap[i] = rtsFalse;
                 } else {
-                    idle_cap[i] = tryGrabCapability(&capabilities[i], task);
+                    idle_cap[i] = tryGrabCapability(capabilities[i], task);
                     if (!idle_cap[i]) {
                         n_failed_trygrab_idles++;
                     } else {
@@ -1575,7 +1575,7 @@ scheduleDoGC (Capability **pcap, Task *task USED_IF_THREADS,
 
         for (i=0; i < n_capabilities; i++) {
             gc_threads[i]->idle = idle_cap[i];
-            capabilities[i].idle++;
+            capabilities[i]->idle++;
         }
 
         // For all capabilities participating in this GC, wait until
@@ -1606,10 +1606,10 @@ delete_threads_and_gc:
         // threads.  It just avoids the GC having to do any work to
         // figure out that any remaining sparks are garbage.
         for (i = 0; i < n_capabilities; i++) {
-            capabilities[i].spark_stats.gcd +=
-                sparkPoolSize(capabilities[i].sparks);
+            capabilities[i]->spark_stats.gcd +=
+                sparkPoolSize(capabilities[i]->sparks);
             // No race here since all Caps are stopped.
-            discardSparksCap(&capabilities[i]);
+            discardSparksCap(capabilities[i]);
         }
 #endif
         sched_state = SCHED_SHUTTING_DOWN;
@@ -1625,10 +1625,10 @@ delete_threads_and_gc:
 #if defined(THREADED_RTS)
     for (i = enabled_capabilities; i < n_capabilities; i++) {
         Capability *tmp_cap, *dest_cap;
-        tmp_cap = &capabilities[i];
+        tmp_cap = capabilities[i];
         ASSERT(tmp_cap->disabled);
         if (i != cap->no) {
-            dest_cap = &capabilities[i % enabled_capabilities];
+            dest_cap = capabilities[i % enabled_capabilities];
             while (!emptyRunQueue(tmp_cap)) {
                 tso = popRunQueue(tmp_cap);
                 migrateThread(tmp_cap, tso, dest_cap);
@@ -1703,11 +1703,11 @@ delete_threads_and_gc:
         for (i = 0; i < n_capabilities; i++) {
             if (i != cap->no) {
                 if (idle_cap[i]) {
-                    ASSERT(capabilities[i].running_task == task);
-                    task->cap = &capabilities[i];
-                    releaseCapability(&capabilities[i]);
+                    ASSERT(capabilities[i]->running_task == task);
+                    task->cap = capabilities[i];
+                    releaseCapability(capabilities[i]);
                 } else {
-                    ASSERT(capabilities[i].running_task != task);
+                    ASSERT(capabilities[i]->running_task != task);
                 }
             }
         }
@@ -1799,7 +1799,7 @@ forkProcess(HsStablePtr *entry
     ACQUIRE_LOCK(&task->lock);
 
     for (i=0; i < n_capabilities; i++) {
-        ACQUIRE_LOCK(&capabilities[i].lock);
+        ACQUIRE_LOCK(&capabilities[i]->lock);
     }
 
     stopTimer(); // See #4074
@@ -1820,8 +1820,8 @@ forkProcess(HsStablePtr *entry
         RELEASE_LOCK(&task->lock);
 
         for (i=0; i < n_capabilities; i++) {
-            releaseCapability_(&capabilities[i],rtsFalse);
-            RELEASE_LOCK(&capabilities[i].lock);
+            releaseCapability_(capabilities[i],rtsFalse);
+            RELEASE_LOCK(&capabilities[i]->lock);
         }
         boundTaskExiting(task);
 
@@ -1837,7 +1837,7 @@ forkProcess(HsStablePtr *entry
         initMutex(&task->lock);
 
         for (i=0; i < n_capabilities; i++) {
-            initMutex(&capabilities[i].lock);
+            initMutex(&capabilities[i]->lock);
         }
 #endif
 
@@ -1871,7 +1871,7 @@ forkProcess(HsStablePtr *entry
         discardTasksExcept(task);
 
         for (i=0; i < n_capabilities; i++) {
-            cap = &capabilities[i];
+            cap = capabilities[i];
 
             // Empty the run queue.  It seems tempting to let all the
             // killed threads stay on the run queue as zombies to be
@@ -1900,7 +1900,7 @@ forkProcess(HsStablePtr *entry
                 releaseCapability(cap);
             }
         }
-        cap = &capabilities[0];
+        cap = capabilities[0];
         task->cap = cap;
 
         // Empty the threads lists.  Otherwise, the garbage
@@ -1965,8 +1965,7 @@ setNumCapabilities (nat new_n_capabilities USED_IF_THREADS)
     Task *task;
     Capability *cap;
     nat sync;
-    StgTSO* t;
-    nat g, n;
+    nat n;
     Capability *old_capabilities = NULL;
     nat old_n_capabilities = n_capabilities;
 
@@ -2013,8 +2012,8 @@ setNumCapabilities (nat new_n_capabilities USED_IF_THREADS)
         // structures, the nursery, etc.
         //
         for (n = new_n_capabilities; n < enabled_capabilities; n++) {
-            capabilities[n].disabled = rtsTrue;
-            traceCapDisable(&capabilities[n]);
+            capabilities[n]->disabled = rtsTrue;
+            traceCapDisable(capabilities[n]);
         }
         enabled_capabilities = new_n_capabilities;
     }
@@ -2025,8 +2024,8 @@ setNumCapabilities (nat new_n_capabilities USED_IF_THREADS)
         // enable any disabled capabilities, up to the required number
         for (n = enabled_capabilities;
              n < new_n_capabilities && n < n_capabilities; n++) {
-            capabilities[n].disabled = rtsFalse;
-            traceCapEnable(&capabilities[n]);
+            capabilities[n]->disabled = rtsFalse;
+            traceCapEnable(capabilities[n]);
         }
         enabled_capabilities = n;
 
@@ -2042,23 +2041,10 @@ setNumCapabilities (nat new_n_capabilities USED_IF_THREADS)
             // Resize the capabilities array
             // NB. after this, capabilities points somewhere new.  Any pointers
             // of type (Capability *) are now invalid.
-            old_capabilities = moreCapabilities(n_capabilities, new_n_capabilities);
-
-            // update our own cap pointer
-            cap = &capabilities[cap->no];
+            moreCapabilities(n_capabilities, new_n_capabilities);
 
             // Resize and update storage manager data structures
             storageAddCapabilities(n_capabilities, new_n_capabilities);
-
-            // Update (Capability *) refs in the Task manager.
-            updateCapabilityRefs();
-
-            // Update (Capability *) refs from TSOs
-            for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-                for (t = generations[g].threads; t != END_TSO_QUEUE; t = t->global_link) {
-                    t->cap = &capabilities[t->cap->no];
-                }
-            }
         }
     }
 
@@ -2324,7 +2310,7 @@ scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso)
     if (cpu == cap->no) {
 	appendToRunQueue(cap,tso);
     } else {
-        migrateThread(cap, tso, &capabilities[cpu]);
+        migrateThread(cap, tso, capabilities[cpu]);
     }
 #else
     appendToRunQueue(cap,tso);
@@ -2407,7 +2393,7 @@ startWorkerTasks (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
     Capability *cap;
 
     for (i = from; i < to; i++) {
-        cap = &capabilities[i];
+        cap = capabilities[i];
         ACQUIRE_LOCK(&cap->lock);
         startWorkerTask(cap);
         RELEASE_LOCK(&cap->lock);
@@ -2506,9 +2492,6 @@ freeScheduler( void )
     // Capability).
     if (still_running == 0) {
         freeCapabilities();
-        if (n_capabilities != 1) {
-            stgFree(capabilities);
-        }
     }
     RELEASE_LOCK(&sched_mutex);
 #if defined(THREADED_RTS)
diff --git a/rts/Stats.c b/rts/Stats.c
index 3dc1ebe0fb..c19f23c59d 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -346,10 +346,10 @@ calcTotalAllocated(void)
     W_ tot_alloc = 0;
     W_ n;
     for (n = 0; n < n_capabilities; n++) {
-        tot_alloc += capabilities[n].total_allocated;
-        traceEventHeapAllocated(&capabilities[n],
+        tot_alloc += capabilities[n]->total_allocated;
+        traceEventHeapAllocated(capabilities[n],
                                 CAPSET_HEAP_DEFAULT,
-                                capabilities[n].total_allocated * sizeof(W_));
+                                capabilities[n]->total_allocated * sizeof(W_));
     }
 
     return tot_alloc;
@@ -730,12 +730,12 @@ stat_exit (void)
                 nat i;
                 SparkCounters sparks = { 0, 0, 0, 0, 0, 0};
                 for (i = 0; i < n_capabilities; i++) {
-                    sparks.created   += capabilities[i].spark_stats.created;
-                    sparks.dud       += capabilities[i].spark_stats.dud;
-                    sparks.overflowed+= capabilities[i].spark_stats.overflowed;
-                    sparks.converted += capabilities[i].spark_stats.converted;
-                    sparks.gcd       += capabilities[i].spark_stats.gcd;
-                    sparks.fizzled   += capabilities[i].spark_stats.fizzled;
+                    sparks.created   += capabilities[i]->spark_stats.created;
+                    sparks.dud       += capabilities[i]->spark_stats.dud;
+                    sparks.overflowed+= capabilities[i]->spark_stats.overflowed;
+                    sparks.converted += capabilities[i]->spark_stats.converted;
+                    sparks.gcd       += capabilities[i]->spark_stats.gcd;
+                    sparks.fizzled   += capabilities[i]->spark_stats.fizzled;
                 }
 
                 statsPrintf("  SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n",
@@ -900,10 +900,10 @@ statDescribeGens(void)
 
       mut = 0;
       for (i = 0; i < n_capabilities; i++) {
-          mut += countOccupied(capabilities[i].mut_lists[g]);
+          mut += countOccupied(capabilities[i]->mut_lists[g]);
 
           // Add the pinned object block.
-          bd = capabilities[i].pinned_object_block;
+          bd = capabilities[i]->pinned_object_block;
           if (bd != NULL) {
               gen_live   += bd->free - bd->start;
               gen_blocks += bd->blocks;
@@ -999,12 +999,12 @@ extern void getSparkStats( SparkCounters *s ) {
     s->gcd = 0;
     s->fizzled = 0;
     for (i = 0; i < n_capabilities; i++) {
-        s->created   += capabilities[i].spark_stats.created;
-        s->dud       += capabilities[i].spark_stats.dud;
-        s->overflowed+= capabilities[i].spark_stats.overflowed;
-        s->converted += capabilities[i].spark_stats.converted;
-        s->gcd       += capabilities[i].spark_stats.gcd;
-        s->fizzled   += capabilities[i].spark_stats.fizzled;
+        s->created   += capabilities[i]->spark_stats.created;
+        s->dud       += capabilities[i]->spark_stats.dud;
+        s->overflowed+= capabilities[i]->spark_stats.overflowed;
+        s->converted += capabilities[i]->spark_stats.converted;
+        s->gcd       += capabilities[i]->spark_stats.gcd;
+        s->fizzled   += capabilities[i]->spark_stats.fizzled;
     }
 }
 #endif
diff --git a/rts/Task.c b/rts/Task.c
index e6781a17ff..a044bc3672 100644
--- a/rts/Task.c
+++ b/rts/Task.c
@@ -326,34 +326,6 @@ discardTasksExcept (Task *keep)
     RELEASE_LOCK(&all_tasks_mutex);
 }
 
-//
-// After the capabilities[] array has moved, we have to adjust all
-// (Capability *) pointers to point to the new array.  The old array
-// is still valid at this point.
-//
-void updateCapabilityRefs (void)
-{
-    Task *task;
-    InCall *incall;
-
-    ACQUIRE_LOCK(&all_tasks_mutex);
-
-    for (task = all_tasks; task != NULL; task=task->all_next) {
-        if (task->cap != NULL) {
-            task->cap = &capabilities[task->cap->no];
-        }
-
-        for (incall = task->incall; incall != NULL; incall = incall->prev_stack) {
-            if (incall->suspended_cap != NULL) {
-                incall->suspended_cap = &capabilities[incall->suspended_cap->no];
-            }
-        }
-    }
-
-    RELEASE_LOCK(&all_tasks_mutex);
-}
-
-
 #if defined(THREADED_RTS)
 
 void
diff --git a/rts/Task.h b/rts/Task.h
index f18f154117..052647b327 100644
--- a/rts/Task.h
+++ b/rts/Task.h
@@ -213,11 +213,6 @@ void interruptWorkerTask (Task *task);
 
 #endif /* THREADED_RTS */
 
-// Update any (Capability *) pointers belonging to Tasks after the
-// Capability array is moved/resized.
-//
-void updateCapabilityRefs (void);
-
 // For stats
 extern nat taskCount;
 extern nat workerCount;
diff --git a/rts/Threads.c b/rts/Threads.c
index f2b800512e..14fb7e872c 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -801,7 +801,7 @@ printAllThreads(void)
   debugBelch("all threads:\n");
 
   for (i = 0; i < n_capabilities; i++) {
-      cap = &capabilities[i];
+      cap = capabilities[i];
       debugBelch("threads on capability %d:\n", cap->no);
       for (t = cap->run_queue_hd; t != END_TSO_QUEUE; t = t->_link) {
 	  printThreadStatus(t);
diff --git a/rts/Timer.c b/rts/Timer.c
index aa4b8d8fd7..b7762f985c 100644
--- a/rts/Timer.c
+++ b/rts/Timer.c
@@ -119,7 +119,7 @@ startTimer(void)
 void
 stopTimer(void)
 {
-    if (atomic_inc(&timer_disabled) == 1) {
+    if (atomic_inc(&timer_disabled, 1) == 1) {
         if (RtsFlags.MiscFlags.tickInterval != 0) {
             stopTicker();
         }
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index 247f1a01c6..375aeea2f2 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -933,7 +933,7 @@ compact(StgClosure *static_objects)
         bdescr *bd;
         StgPtr p;
         for (n = 0; n < n_capabilities; n++) {
-            for (bd = capabilities[n].mut_lists[g]; 
+            for (bd = capabilities[n]->mut_lists[g];
                  bd != NULL; bd = bd->link) {
                 for (p = bd->start; p < bd->free; p++) {
                     thread((StgClosure **)p);
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 1b2cb12212..6fd3b1b1c9 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -236,8 +236,8 @@ GarbageCollect (nat collect_gen,
   // attribute any costs to CCS_GC
 #ifdef PROFILING
   for (n = 0; n < n_capabilities; n++) {
-      save_CCS[n] = capabilities[n].r.rCCCS;
-      capabilities[n].r.rCCCS = CCS_GC;
+      save_CCS[n] = capabilities[n]->r.rCCCS;
+      capabilities[n]->r.rCCCS = CCS_GC;
   }
 #endif
 
@@ -339,18 +339,18 @@ GarbageCollect (nat collect_gen,
   if (n_gc_threads == 1) {
       for (n = 0; n < n_capabilities; n++) {
 #if defined(THREADED_RTS)
-          scavenge_capability_mut_Lists1(&capabilities[n]);
+          scavenge_capability_mut_Lists1(capabilities[n]);
 #else
-          scavenge_capability_mut_lists(&capabilities[n]);
+          scavenge_capability_mut_lists(capabilities[n]);
 #endif
       }
   } else {
       scavenge_capability_mut_lists(gct->cap);
       for (n = 0; n < n_capabilities; n++) {
           if (gc_threads[n]->idle) {
-              markCapability(mark_root, gct, &capabilities[n],
+              markCapability(mark_root, gct, capabilities[n],
                              rtsTrue/*don't mark sparks*/);
-              scavenge_capability_mut_lists(&capabilities[n]);
+              scavenge_capability_mut_lists(capabilities[n]);
           }
       }
   }
@@ -363,7 +363,7 @@ GarbageCollect (nat collect_gen,
   gct->evac_gen_no = 0;
   if (n_gc_threads == 1) {
       for (n = 0; n < n_capabilities; n++) {
-          markCapability(mark_root, gct, &capabilities[n],
+          markCapability(mark_root, gct, capabilities[n],
                          rtsTrue/*don't mark sparks*/);
       }
   } else {
@@ -417,12 +417,12 @@ GarbageCollect (nat collect_gen,
 #ifdef THREADED_RTS
   if (n_gc_threads == 1) {
       for (n = 0; n < n_capabilities; n++) {
-          pruneSparkQueue(&capabilities[n]);
+          pruneSparkQueue(capabilities[n]);
       }
   } else {
       for (n = 0; n < n_capabilities; n++) {
           if (n == cap->no || gc_threads[n]->idle) {
-              pruneSparkQueue(&capabilities[n]);
+              pruneSparkQueue(capabilities[n]);
          }
       }
   }
@@ -495,7 +495,7 @@ GarbageCollect (nat collect_gen,
     if (g > 0) {
         W_ mut_list_size = 0;
         for (n = 0; n < n_capabilities; n++) {
-            mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
+            mut_list_size += countOccupied(capabilities[n]->mut_lists[g]);
         }
 	copied +=  mut_list_size;
 
@@ -646,14 +646,14 @@ GarbageCollect (nat collect_gen,
   // Reset the nursery: make the blocks empty
   if (DEBUG_IS_ON || n_gc_threads == 1) {
       for (n = 0; n < n_capabilities; n++) {
-          clearNursery(&capabilities[n]);
+          clearNursery(capabilities[n]);
       }
   } else {
       // When doing parallel GC, clearNursery() is called by the
       // worker threads
       for (n = 0; n < n_capabilities; n++) {
           if (gc_threads[n]->idle) {
-              clearNursery(&capabilities[n]);
+              clearNursery(capabilities[n]);
           }
       }
   }
@@ -753,7 +753,7 @@ GarbageCollect (nat collect_gen,
   // restore enclosing cost centre
 #ifdef PROFILING
   for (n = 0; n < n_capabilities; n++) {
-      capabilities[n].r.rCCCS = save_CCS[n];
+      capabilities[n]->r.rCCCS = save_CCS[n];
   }
 #endif
 
@@ -794,7 +794,7 @@ new_gc_thread (nat n, gc_thread *t)
     nat g;
     gen_workspace *ws;
 
-    t->cap = &capabilities[n];
+    t->cap = capabilities[n];
 
 #ifdef THREADED_RTS
     t->id = 0;
@@ -866,12 +866,6 @@ initGcThreads (nat from USED_IF_THREADS, nat to USED_IF_THREADS)
                                      "initGcThreads");
     }
 
-    // We have to update the gct->cap pointers to point to the new
-    // Capability array now.
-    for (i = 0; i < from; i++) {
-        gc_threads[i]->cap = &capabilities[gc_threads[i]->cap->no];
-    }
-
     for (i = from; i < to; i++) {
         gc_threads[i] =
             stgMallocBytes(sizeof(gc_thread) +
@@ -924,7 +918,7 @@ static StgWord
 inc_running (void)
 {
     StgWord new;
-    new = atomic_inc(&gc_running_threads);
+    new = atomic_inc(&gc_running_threads, 1);
     ASSERT(new <= n_gc_threads);
     return new;
 }
@@ -1124,7 +1118,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS)
         for (i=0; i < n_threads; i++) {
             if (i == me || gc_threads[i]->idle) continue;
             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
-                prodCapability(&capabilities[i], cap->running_task);
+                prodCapability(capabilities[i], cap->running_task);
             }
         }
         for (j=0; j < 10; j++) {
@@ -1132,7 +1126,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS)
             for (i=0; i < n_threads; i++) {
                 if (i == me || gc_threads[i]->idle) continue;
                 write_barrier();
-                interruptCapability(&capabilities[i]);
+                interruptCapability(capabilities[i]);
                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
                     retry = rtsTrue;
                 }
@@ -1228,8 +1222,8 @@ prepare_collected_gen (generation *gen)
     g = gen->no;
     if (g != 0) {
         for (i = 0; i < n_capabilities; i++) {
-	    freeChain(capabilities[i].mut_lists[g]);
-	    capabilities[i].mut_lists[g] = allocBlock();
+            freeChain(capabilities[i]->mut_lists[g]);
+            capabilities[i]->mut_lists[g] = allocBlock();
 	}
     }
 
@@ -1360,7 +1354,7 @@ prepare_uncollected_gen (generation *gen)
     // allocate a fresh block for each one.  We'll traverse these
     // mutable lists as roots early on in the GC.
     for (i = 0; i < n_capabilities; i++) {
-        stash_mut_list(&capabilities[i], gen->no);
+        stash_mut_list(capabilities[i], gen->no);
     }
 
     ASSERT(gen->scavenged_large_objects == NULL);
@@ -1429,7 +1423,7 @@ collect_pinned_object_blocks (void)
 
     for (n = 0; n < n_capabilities; n++) {
         prev = NULL;
-        for (bd = capabilities[n].pinned_object_blocks; bd != NULL; bd = bd->link) {
+        for (bd = capabilities[n]->pinned_object_blocks; bd != NULL; bd = bd->link) {
             prev = bd;
         }
         if (prev != NULL) {
@@ -1437,8 +1431,8 @@ collect_pinned_object_blocks (void)
             if (g0->large_objects != NULL) {
                 g0->large_objects->u.back = prev;
             }
-            g0->large_objects = capabilities[n].pinned_object_blocks;
-            capabilities[n].pinned_object_blocks = 0;
+            g0->large_objects = capabilities[n]->pinned_object_blocks;
+            capabilities[n]->pinned_object_blocks = 0;
         }
     }
 }
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index 9b579abbbc..0d48ba2724 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -615,7 +615,7 @@ checkLocalMutableLists (nat cap_no)
 {
     nat g;
     for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
-        checkMutableList(capabilities[cap_no].mut_lists[g], g);
+        checkMutableList(capabilities[cap_no]->mut_lists[g], g);
     }
 }
 
@@ -756,7 +756,7 @@ findMemoryLeak (void)
     nat g, i;
     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
         for (i = 0; i < n_capabilities; i++) {
-            markBlocks(capabilities[i].mut_lists[g]);
+            markBlocks(capabilities[i]->mut_lists[g]);
             markBlocks(gc_threads[i]->gens[g].part_list);
             markBlocks(gc_threads[i]->gens[g].scavd_list);
             markBlocks(gc_threads[i]->gens[g].todo_bd);
@@ -767,7 +767,7 @@ findMemoryLeak (void)
 
     for (i = 0; i < n_capabilities; i++) {
         markBlocks(nurseries[i].blocks);
-        markBlocks(capabilities[i].pinned_object_block);
+        markBlocks(capabilities[i]->pinned_object_block);
     }
 
 #ifdef PROFILING
@@ -847,7 +847,7 @@ memInventory (rtsBool show)
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
       gen_blocks[g] = 0;
       for (i = 0; i < n_capabilities; i++) {
-	  gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
+          gen_blocks[g] += countBlocks(capabilities[i]->mut_lists[g]);
           gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].part_list);
           gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].scavd_list);
           gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].todo_bd);
@@ -859,10 +859,10 @@ memInventory (rtsBool show)
   for (i = 0; i < n_capabilities; i++) {
       ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks);
       nursery_blocks += nurseries[i].n_blocks;
-      if (capabilities[i].pinned_object_block != NULL) {
-          nursery_blocks += capabilities[i].pinned_object_block->blocks;
+      if (capabilities[i]->pinned_object_block != NULL) {
+          nursery_blocks += capabilities[i]->pinned_object_block->blocks;
       }
-      nursery_blocks += countBlocks(capabilities[i].pinned_object_blocks);
+      nursery_blocks += countBlocks(capabilities[i]->pinned_object_blocks);
   }
 
   retainer_blocks = 0;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index a5337bc5b2..b575fc3e52 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -215,7 +215,7 @@ void storageAddCapabilities (nat from, nat to)
     // we've moved the nurseries, so we have to update the rNursery
     // pointers from the Capabilities.
     for (i = 0; i < to; i++) {
-        capabilities[i].r.rNursery = &nurseries[i];
+        capabilities[i]->r.rNursery = &nurseries[i];
     }
 
     /* The allocation area.  Policy: keep the allocation area
@@ -229,7 +229,7 @@ void storageAddCapabilities (nat from, nat to)
     // allocate a block for each mut list
     for (n = from; n < to; n++) {
         for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
-            capabilities[n].mut_lists[g] = allocBlock();
+            capabilities[n]->mut_lists[g] = allocBlock();
         }
     }
 
@@ -493,8 +493,8 @@ assignNurseriesToCapabilities (nat from, nat to)
     nat i;
 
     for (i = from; i < to; i++) {
-        capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
-        capabilities[i].r.rCurrentAlloc   = NULL;
+        capabilities[i]->r.rCurrentNursery = nurseries[i].blocks;
+        capabilities[i]->r.rCurrentAlloc   = NULL;
     }
 }
 
@@ -939,7 +939,7 @@ void updateNurseriesStats (void)
     nat i;
 
     for (i = 0; i < n_capabilities; i++) {
-        capabilities[i].total_allocated += countOccupied(nurseries[i].blocks);
+        capabilities[i]->total_allocated += countOccupied(nurseries[i].blocks);
     }
 }
 
diff --git a/rules/shell-wrapper.mk b/rules/shell-wrapper.mk
index b23e385035..5672f36877 100644
--- a/rules/shell-wrapper.mk
+++ b/rules/shell-wrapper.mk
@@ -62,7 +62,7 @@ ifeq "$$($1_$2_INSTALL_SHELL_WRAPPER_NAME)" ""
 $1_$2_INSTALL_SHELL_WRAPPER_NAME = $$($1_$2_PROG)
 endif
 
-# Install the binary in $(libexecdir), and install a shell wrapper in $(bindir)
+# Install the binary in $(ghclibexecdir), and install a shell wrapper in $(bindir)
 INSTALL_LIBEXECS += $1/$2/build/tmp/$$($1_$2_PROG)
 BINDIST_WRAPPERS += $$($1_$2_SHELL_WRAPPER_NAME)
 
diff --git a/sync-all b/sync-all
index 85a697af52..f37c37996c 100755
--- a/sync-all
+++ b/sync-all
@@ -50,11 +50,12 @@ sub parsePackages {
     foreach (@repos) {
         chomp;
         $lineNum++;
-        if (/^([^# ]+) +([^ ]+) +([^ ]+)$/) {
+        if (/^([^# ]+) +([^ ]+) +([^ ]+) +([^ ]+)$/) {
             my %line;
             $line{"localpath"}  = $1;
             $line{"tag"}        = $2;
             $line{"remotepath"} = $3;
+            $line{"upstreamurl"}= $4;
             push @packages, \%line;
 
             $tags{$2} = 0;
diff --git a/utils/hp2ps/Axes.c b/utils/hp2ps/Axes.c
index 4c2e4f5e95..adc3c45146 100644
--- a/utils/hp2ps/Axes.c
+++ b/utils/hp2ps/Axes.c
@@ -203,14 +203,18 @@ Round(floatish y)
     int i;
 
     if (y > 10.0) {
-	for (i = 0; y > 10.0; y /= 10.0, i++) ;
+	for (i = 0; y > 10.0; y /= 10.0, i++)
+	    ;
 	y = OneTwoFive(y);
-	for ( ; i > 0; y = y * 10.0, i--) ;
+	for ( ; i > 0; y = y * 10.0, i--) 
+	    ;
 
     } else if (y < 1.0) {
-	for (i = 0; y < 1.0; y *= 10.0, i++) ;
+	for (i = 0; y < 1.0; y *= 10.0, i++) 
+	    ;
         y = OneTwoFive(y);
-        for ( ; i > 0; y = y / 10.0, i--) ;
+        for ( ; i > 0; y = y / 10.0, i--) 
+            ;
  
     } else {
 	y = OneTwoFive(y);
diff --git a/validate b/validate
index 170bb2db25..95425d3770 100755
--- a/validate
+++ b/validate
@@ -1,21 +1,37 @@
 #!/bin/sh
 
-# Flags:
-#   --no-clean:       don't make clean first, just carry on from
-#                     a previous interrupted validation run
-#   --testsuite-only: don't build the compiler, just run
-#                     the test suite
-#   --hpc:            build stage2 with -fhpc, and see how much of the
-#                     compiler the test suite covers.
-#                     2008-07-01: 63% slower than the default.
-#                     HTML generated here: testsuite/hpc_output/hpc_index.html
-#   --normal:         Default settings
-#   --fast:           Omit dyn way, omit binary distribution
-#   --slow:           Build stage2 with -DDEBUG.
-#                     2008-07-01: 14% slower than the default.
-
 set -e
 
+show_help () {
+    cat <<EOF
+Usage: $0 [FLAGS]...
+
+Validate GHC source tree against testsuite; see
+http://ghc.haskell.org/trac/ghc/wiki/TestingPatches for more
+information.
+
+Flags:
+  --no-clean        don't make clean first, just carry on from
+                    a previous interrupted validation run
+  --testsuite-only  don't build the compiler, just run the test suite
+  --hpc             build stage2 with -fhpc, and see how much of the
+                    compiler the test suite covers.
+                    2008-07-01: 63% slower than the default.
+                    HTML generated here: testsuite/hpc_output/hpc_index.html
+  --normal          Default settings
+  --fast            Omit dyn way, omit binary distribution
+  --slow            Build stage2 with -DDEBUG.
+                    2008-07-01: 14% slower than the default.
+  --help            shows this usage help.
+
+  Set environment variable 'CPUS' to number of cores, to exploit
+  multiple cpu cores, e.g.
+
+    CPUS=8 ./validate
+
+EOF
+}
+
 no_clean=0
 testsuite_only=0
 hpc=NO
@@ -42,8 +58,12 @@ do
     --normal)
         speed=NORMAL
         ;;
+    --help)
+        show_help
+        exit 0;;
     *)
-        echo "Bad argument: $1" >&2
+        echo "$0: unrecognized argument '$1'" >&2
+        echo "Try '$0 --help' for more information." >&2
         exit 1;;
     esac
     shift
author	Patrick Palka <patrick@parcs.ath.cx>	2013-09-04 12:10:27 -0400
committer	Patrick Palka <patrick@parcs.ath.cx>	2013-09-04 12:10:27 -0400
commit	d127a697192851ea6bf308525a8a8895da71b639 (patch)
tree	895f8976273df8e96b7c52382529ca0dfee61a5a
parent	a2e338f3ae5a101d333fb260ed58ec238106e88e (diff)
parent	32ade417f7e82b6fbcb6f1c93871ba3141a8f5c8 (diff)
download	haskell-d127a697192851ea6bf308525a8a8895da71b639.tar.gz