Merge remote-tracking branch 'origin/master' into wip/14691

author: Joachim Breitner <mail@joachim-breitner.de> 2018-01-25 21:38:59 -0500
committer: Joachim Breitner <mail@joachim-breitner.de> 2018-01-25 21:38:59 -0500
commit: 73ed13ed36e5a8e3fe29b9c349dfc292bed66a98 (patch)
tree: 007bfa621a8d2cf4154ce22eba183d38819825bd
parent: dc6de0a3d6c7acc1b4147a66de8fd6d8b9c60e54 (diff)
parent: 06366890ba77c20198d7fccc870083b0bbfb1b11 (diff)
download: haskell-73ed13ed36e5a8e3fe29b9c349dfc292bed66a98.tar.gz
75 files changed, 1434 insertions, 255 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index fdbfd6e857..8ac4a6fa7b 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -587,6 +587,8 @@ data CallishMachOp
   | MO_Memcmp Int
 
   | MO_PopCnt Width
+  | MO_Pdep Width
+  | MO_Pext Width
   | MO_Clz Width
   | MO_Ctz Width
 
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index 7ffb4fbe42..8afbd2f9d9 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -1006,6 +1006,16 @@ callishMachOps = listToUFM $
         ( "popcnt32", (,) $ MO_PopCnt W32 ),
         ( "popcnt64", (,) $ MO_PopCnt W64 ),
 
+        ( "pdep8",  (,) $ MO_Pdep W8  ),
+        ( "pdep16", (,) $ MO_Pdep W16 ),
+        ( "pdep32", (,) $ MO_Pdep W32 ),
+        ( "pdep64", (,) $ MO_Pdep W64 ),
+
+        ( "pext8",  (,) $ MO_Pext W8  ),
+        ( "pext16", (,) $ MO_Pext W16 ),
+        ( "pext32", (,) $ MO_Pext W32 ),
+        ( "pext64", (,) $ MO_Pext W64 ),
+
         ( "cmpxchg8",  (,) $ MO_Cmpxchg W8  ),
         ( "cmpxchg16", (,) $ MO_Cmpxchg W16 ),
         ( "cmpxchg32", (,) $ MO_Cmpxchg W32 ),
diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs
index b2a7716c62..2310db2619 100644
--- a/compiler/cmm/Hoopl/Dataflow.hs
+++ b/compiler/cmm/Hoopl/Dataflow.hs
@@ -132,7 +132,8 @@ fixpointAnalysis direction lattice do_block entries blockmap = loop start
     blocks     = sortBlocks direction entries blockmap
     num_blocks = length blocks
     block_arr  = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks
-    start      = {-# SCC "start" #-} [0 .. num_blocks - 1]
+    start      = {-# SCC "start" #-} IntSet.fromDistinctAscList
+      [0 .. num_blocks - 1]
     dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
     join       = fact_join lattice
 
@@ -140,8 +141,7 @@ fixpointAnalysis direction lattice do_block entries blockmap = loop start
         :: IntHeap     -- ^ Worklist, i.e., blocks to process
         -> FactBase f  -- ^ Current result (increases monotonically)
         -> FactBase f
-    loop []              !fbase1 = fbase1
-    loop (index : todo1) !fbase1 =
+    loop todo !fbase1 | Just (index, todo1) <- IntSet.minView todo =
         let block = block_arr ! index
             out_facts = {-# SCC "do_block" #-} do_block block fbase1
             -- For each of the outgoing edges, we join it with the current
@@ -151,6 +151,7 @@ fixpointAnalysis direction lattice do_block entries blockmap = loop start
                 mapFoldWithKey
                     (updateFact join dep_blocks) (todo1, fbase1) out_facts
         in loop todo2 fbase2
+    loop _ !fbase1 = fbase1
 
 rewriteCmmBwd
     :: DataflowLattice f
@@ -196,7 +197,8 @@ fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap
     num_blocks = length blocks
     block_arr  = {-# SCC "block_arr_rewrite" #-}
                  listArray (0, num_blocks - 1) blocks
-    start      = {-# SCC "start_rewrite" #-} [0 .. num_blocks - 1]
+    start      = {-# SCC "start_rewrite" #-}
+                 IntSet.fromDistinctAscList [0 .. num_blocks - 1]
     dep_blocks = {-# SCC "dep_blocks_rewrite" #-} mkDepBlocks dir blocks
     join       = fact_join lattice
 
@@ -205,8 +207,8 @@ fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap
         -> LabelMap CmmBlock  -- ^ Rewritten blocks.
         -> FactBase f         -- ^ Current facts.
         -> UniqSM (LabelMap CmmBlock, FactBase f)
-    loop []              !blocks1 !fbase1 = return (blocks1, fbase1)
-    loop (index : todo1) !blocks1 !fbase1 = do
+    loop todo !blocks1 !fbase1
+      | Just (index, todo1) <- IntSet.minView todo = do
         -- Note that we use the *original* block here. This is important.
         -- We're optimistically rewriting blocks even before reaching the fixed
         -- point, which means that the rewrite might be incorrect. So if the
@@ -220,6 +222,7 @@ fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap
                 mapFoldWithKey
                     (updateFact join dep_blocks) (todo1, fbase1) out_facts
         loop todo2 blocks2 fbase2
+    loop _ !blocks1 !fbase1 = return (blocks1, fbase1)
 
 
 {-
@@ -344,7 +347,7 @@ updateFact fact_join dep_blocks lbl new_fact (todo, fbase)
               (NotChanged _) -> (todo, fbase)
               (Changed f) -> let !z = mapInsert lbl f fbase in (changed, z)
   where
-    changed = IntSet.foldr insertIntHeap todo $
+    changed = todo `IntSet.union`
               mapFindWithDefault IntSet.empty lbl dep_blocks
 
 {-
@@ -436,19 +439,4 @@ joinBlocksOO (BMiddle n) b = blockCons n b
 joinBlocksOO b (BMiddle n) = blockSnoc b n
 joinBlocksOO b1 b2 = BCat b1 b2
 
--- -----------------------------------------------------------------------------
--- a Heap of Int
-
--- We should really use a proper Heap here, but my attempts to make
--- one have not succeeded in beating the simple ordered list.  Another
--- alternative is IntSet (using deleteFindMin), but that was also
--- slower than the ordered list in my experiments --SDM 25/1/2012
-
-type IntHeap = [Int] -- ordered
-
-insertIntHeap :: Int -> [Int] -> [Int]
-insertIntHeap x [] = [x]
-insertIntHeap x (y:ys)
-  | x < y     = x : y : ys
-  | x == y    = x : ys
-  | otherwise = y : insertIntHeap x ys
+type IntHeap = IntSet
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 1ddd1cd266..76e4d4cb94 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -789,6 +789,8 @@ pprCallishMachOp_for_C mop
         MO_Memcmp _     -> text "memcmp"
         (MO_BSwap w)    -> ptext (sLit $ bSwapLabel w)
         (MO_PopCnt w)   -> ptext (sLit $ popCntLabel w)
+        (MO_Pext w)     -> ptext (sLit $ pextLabel w)
+        (MO_Pdep w)     -> ptext (sLit $ pdepLabel w)
         (MO_Clz w)      -> ptext (sLit $ clzLabel w)
         (MO_Ctz w)      -> ptext (sLit $ ctzLabel w)
         (MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop)
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 0a6ac9dba3..948af2aba0 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -584,6 +584,20 @@ emitPrimOp _      [res] PopCnt32Op [w] = emitPopCntCall res w W32
 emitPrimOp _      [res] PopCnt64Op [w] = emitPopCntCall res w W64
 emitPrimOp dflags [res] PopCntOp   [w] = emitPopCntCall res w (wordWidth dflags)
 
+-- Parallel bit deposit
+emitPrimOp _      [res] Pdep8Op  [src, mask] = emitPdepCall res src mask W8
+emitPrimOp _      [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
+emitPrimOp _      [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
+emitPrimOp _      [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
+emitPrimOp dflags [res] PdepOp   [src, mask] = emitPdepCall res src mask (wordWidth dflags)
+
+-- Parallel bit extract
+emitPrimOp _      [res] Pext8Op  [src, mask] = emitPextCall res src mask W8
+emitPrimOp _      [res] Pext16Op [src, mask] = emitPextCall res src mask W16
+emitPrimOp _      [res] Pext32Op [src, mask] = emitPextCall res src mask W32
+emitPrimOp _      [res] Pext64Op [src, mask] = emitPextCall res src mask W64
+emitPrimOp dflags [res] PextOp   [src, mask] = emitPextCall res src mask (wordWidth dflags)
+
 -- count leading zeros
 emitPrimOp _      [res] Clz8Op  [w] = emitClzCall res w W8
 emitPrimOp _      [res] Clz16Op [w] = emitClzCall res w W16
@@ -2266,6 +2280,20 @@ emitPopCntCall res x width = do
         (MO_PopCnt width)
         [ x ]
 
+emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
+emitPdepCall res x y width = do
+    emitPrimCall
+        [ res ]
+        (MO_Pdep width)
+        [ x, y ]
+
+emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
+emitPextCall res x y width = do
+    emitPrimCall
+        [ res ]
+        (MO_Pext width)
+        [ x, y ]
+
 emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
 emitClzCall res x width = do
     emitPrimCall
diff --git a/compiler/coreSyn/CoreOpt.hs b/compiler/coreSyn/CoreOpt.hs
index 0f35e8f3ac..04e604eb06 100644
--- a/compiler/coreSyn/CoreOpt.hs
+++ b/compiler/coreSyn/CoreOpt.hs
@@ -968,9 +968,9 @@ pushCoTyArg co ty
        -- tyR = forall (a2 :: k2). ty2
 
     co1 = mkNthCo 0 co
-       -- co1 :: k1 ~ k2
-       -- Note that NthCo can extract an equality between the kinds
-       -- of the types related by a coercion between forall-types.
+       -- co1 :: k1 ~N k2
+       -- Note that NthCo can extract a Nominal equality between the
+       -- kinds of the types related by a coercion between forall-types.
        -- See the NthCo case in CoreLint.
 
     co2 = mkInstCo co (mkCoherenceLeftCo (mkNomReflCo ty) co1)
diff --git a/compiler/coreSyn/CoreUnfold.hs b/compiler/coreSyn/CoreUnfold.hs
index c459fd2941..2e2b7a3b48 100644
--- a/compiler/coreSyn/CoreUnfold.hs
+++ b/compiler/coreSyn/CoreUnfold.hs
@@ -1241,8 +1241,8 @@ tryUnfolding dflags id lone_variable
           = True
           | otherwise
           = case cont_info of
-              CaseCtxt   -> not (lone_variable && is_wf)  -- Note [Lone variables]
-              ValAppCtxt -> True                              -- Note [Cast then apply]
+              CaseCtxt   -> not (lone_variable && is_exp)  -- Note [Lone variables]
+              ValAppCtxt -> True                           -- Note [Cast then apply]
               RuleArgCtxt -> uf_arity > 0  -- See Note [Unfold info lazy contexts]
               DiscArgCtxt -> uf_arity > 0  --
               RhsCtxt     -> uf_arity > 0  --
@@ -1388,9 +1388,10 @@ because the latter is strict.
         s = "foo"
         f = \x -> ...(error s)...
 
-Fundamentally such contexts should not encourage inlining because the
+Fundamentally such contexts should not encourage inlining because, provided
+the RHS is "expandable" (see Note [exprIsExpandable] in CoreUtils) the
 context can ``see'' the unfolding of the variable (e.g. case or a
-RULE) so there's no gain.  If the thing is bound to a value.
+RULE) so there's no gain.
 
 However, watch out:
 
diff --git a/compiler/coreSyn/CoreUtils.hs b/compiler/coreSyn/CoreUtils.hs
index 5e32dc6093..3d5f4bcb5a 100644
--- a/compiler/coreSyn/CoreUtils.hs
+++ b/compiler/coreSyn/CoreUtils.hs
@@ -1083,29 +1083,6 @@ Note that exprIsHNF does not imply exprIsCheap.  Eg
 This responds True to exprIsHNF (you can discard a seq), but
 False to exprIsCheap.
 
-Note [exprIsExpandable]
-~~~~~~~~~~~~~~~~~~~~~~~
-An expression is "expandable" if we are willing to dupicate it, if doing
-so might make a RULE or case-of-constructor fire.  Mainly this means
-data-constructor applications, but it's a bit more generous than exprIsCheap
-because it is true of "CONLIKE" Ids: see Note [CONLIKE pragma] in BasicTypes.
-
-It is used to set the uf_expandable field of an Unfolding, and that
-in turn is used
-  * In RULE matching
-  * In exprIsConApp_maybe, exprIsLiteral_maybe, exprIsLambda_maybe
-
-But take care: exprIsExpandable should /not/ be true of primops.  I
-found this in test T5623a:
-    let q = /\a. Ptr a (a +# b)
-    in case q @ Float of Ptr v -> ...q...
-
-q's inlining should not be expandable, else exprIsConApp_maybe will
-say that (q @ Float) expands to (Ptr a (a +# b)), and that will
-duplicate the (a +# b) primop, which we should not do lightly.
-(It's quite hard to trigger this bug, but T13155 does so for GHC 8.0.)
-
-
 Note [Arguments and let-bindings exprIsCheapX]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 What predicate should we apply to the argument of an application, or the
@@ -1131,16 +1108,12 @@ in this (which it previously was):
 -}
 
 --------------------
-exprIsCheap :: CoreExpr -> Bool
-exprIsCheap = exprIsCheapX isCheapApp
-
-exprIsExpandable :: CoreExpr -> Bool -- See Note [exprIsExpandable]
-exprIsExpandable = exprIsCheapX isExpandableApp
-
 exprIsWorkFree :: CoreExpr -> Bool   -- See Note [exprIsWorkFree]
 exprIsWorkFree = exprIsCheapX isWorkFreeApp
 
---------------------
+exprIsCheap :: CoreExpr -> Bool
+exprIsCheap = exprIsCheapX isCheapApp
+
 exprIsCheapX :: CheapAppFun -> CoreExpr -> Bool
 exprIsCheapX ok_app e
   = ok e
@@ -1168,6 +1141,75 @@ exprIsCheapX ok_app e
       -- App, Let: see Note [Arguments and let-bindings exprIsCheapX]
 
 
+{- Note [exprIsExpandable]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+An expression is "expandable" if we are willing to duplicate it, if doing
+so might make a RULE or case-of-constructor fire.  Consider
+   let x = (a,b)
+       y = build g
+   in ....(case x of (p,q) -> rhs)....(foldr k z y)....
+
+We don't inline 'x' or 'y' (see Note [Lone variables] in CoreUnfold),
+but we do want
+
+ * the case-expression to simplify
+   (via exprIsConApp_maybe, exprIsLiteral_maybe)
+
+ * the foldr/build RULE to fire
+   (by expanding the unfolding during rule matching)
+
+So we classify the unfolding of a let-binding as "expandable" (via the
+uf_expandable field) if we want to do this kind of on-the-fly
+expansion.  Specifically:
+
+* True of constructor applications (K a b)
+
+* True of applications of a "CONLIKE" Id; see Note [CONLIKE pragma] in BasicTypes.
+  (NB: exprIsCheap might not be true of this)
+
+* False of case-expressions.  If we have
+    let x = case ... in ...(case x of ...)...
+  we won't simplify.  We have to inline x.  See Trac #14688.
+
+* False of let-expressions (same reason); and in any case we
+  float lets out of an RHS if doing so will reveal an expandable
+  application (see SimplEnv.doFloatFromRhs).
+
+* Take care: exprIsExpandable should /not/ be true of primops.  I
+  found this in test T5623a:
+    let q = /\a. Ptr a (a +# b)
+    in case q @ Float of Ptr v -> ...q...
+
+  q's inlining should not be expandable, else exprIsConApp_maybe will
+  say that (q @ Float) expands to (Ptr a (a +# b)), and that will
+  duplicate the (a +# b) primop, which we should not do lightly.
+  (It's quite hard to trigger this bug, but T13155 does so for GHC 8.0.)
+-}
+
+-------------------------------------
+exprIsExpandable :: CoreExpr -> Bool
+-- See Note [exprIsExpandable]
+exprIsExpandable e
+  = ok e
+  where
+    ok e = go 0 e
+
+    -- n is the number of value arguments
+    go n (Var v)                      = isExpandableApp v n
+    go _ (Lit {})                     = True
+    go _ (Type {})                    = True
+    go _ (Coercion {})                = True
+    go n (Cast e _)                   = go n e
+    go n (Tick t e) | tickishCounts t = False
+                    | otherwise       = go n e
+    go n (Lam x e)  | isRuntimeVar x  = n==0 || go (n-1) e
+                    | otherwise       = go n e
+    go n (App f e)  | isRuntimeArg e  = go (n+1) f && ok e
+                    | otherwise       = go n f
+    go _ (Case {})                    = False
+    go _ (Let {})                     = False
+
+
 -------------------------------------
 type CheapAppFun = Id -> Arity -> Bool
   -- Is an application of this function to n *value* args
diff --git a/compiler/coreSyn/MkCore.hs b/compiler/coreSyn/MkCore.hs
index 49a9e9dbbd..f2b940bfd1 100644
--- a/compiler/coreSyn/MkCore.hs
+++ b/compiler/coreSyn/MkCore.hs
@@ -867,4 +867,3 @@ mkAbsentErrorApp res_ty err_msg
   = mkApps (Var aBSENT_ERROR_ID) [ Type res_ty, err_string ]
   where
     err_string = Lit (mkMachString err_msg)
-
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index a88642b531..e812dd445f 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -46,6 +46,8 @@ import Data.Maybe ( catMaybes )
 type Atomic = Bool
 type LlvmStatements = OrdList LlvmStatement
 
+data Signage = Signed | Unsigned deriving (Eq, Show)
+
 -- -----------------------------------------------------------------------------
 -- | Top-level of the LLVM proc Code generator
 --
@@ -207,7 +209,7 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
     let args_hints' = zip args arg_hints
     argVars <- arg_varsW args_hints' ([], nilOL, [])
     fptr    <- liftExprData $ getFunPtr funTy t
-    argVars' <- castVarsW $ zip argVars argTy
+    argVars' <- castVarsW Signed $ zip argVars argTy
 
     doTrashStmts
     let argSuffix = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
@@ -218,6 +220,11 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
 -- and return types
 genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
     genCallSimpleCast w t dsts args
+
+genCall t@(PrimTarget (MO_Pdep w)) dsts args =
+    genCallSimpleCast2 w t dsts args
+genCall t@(PrimTarget (MO_Pext w)) dsts args =
+    genCallSimpleCast2 w t dsts args
 genCall t@(PrimTarget (MO_Clz w)) dsts args =
     genCallSimpleCast w t dsts args
 genCall t@(PrimTarget (MO_Ctz w)) dsts args =
@@ -285,7 +292,7 @@ genCall t@(PrimTarget op) [] args
     let args_hints = zip args arg_hints
     argVars       <- arg_varsW args_hints ([], nilOL, [])
     fptr          <- getFunPtrW funTy t
-    argVars' <- castVarsW $ zip argVars argTy
+    argVars' <- castVarsW Signed $ zip argVars argTy
 
     doTrashStmts
     let alignVal = mkIntLit i32 align
@@ -518,7 +525,7 @@ genCallExtract target@(PrimTarget op) w (argA, argB) (llvmTypeA, llvmTypeB) = do
     -- Process the arguments.
     let args_hints = zip [argA, argB] (snd $ foreignTargetHints target)
     (argsV1, args1, top1) <- arg_vars args_hints ([], nilOL, [])
-    (argsV2, args2) <- castVars $ zip argsV1 argTy
+    (argsV2, args2) <- castVars Signed $ zip argsV1 argTy
 
     -- Get the function and make the call.
     fname <- cmmPrimOpFunctions op
@@ -558,9 +565,9 @@ genCallSimpleCast w t@(PrimTarget op) [dst] args = do
     let (_, arg_hints) = foreignTargetHints t
     let args_hints = zip args arg_hints
     (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
-    (argsV', stmts4)            <- castVars $ zip argsV [width]
+    (argsV', stmts4)            <- castVars Signed $ zip argsV [width]
     (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
-    ([retV'], stmts5)           <- castVars [(retV,dstTy)]
+    ([retV'], stmts5)           <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
     let s2                       = Store retV' dstV
 
     let stmts = stmts2 `appOL` stmts4 `snocOL`
@@ -569,6 +576,37 @@ genCallSimpleCast w t@(PrimTarget op) [dst] args = do
 genCallSimpleCast _ _ dsts _ =
     panic ("genCallSimpleCast: " ++ show (length dsts) ++ " dsts")
 
+-- Handle simple function call that only need simple type casting, of the form:
+--   truncate arg >>= \a -> call(a) >>= zext
+--
+-- since GHC only really has i32 and i64 types and things like Word8 are backed
+-- by an i32 and just present a logical i8 range. So we must handle conversions
+-- from i32 to i8 explicitly as LLVM is strict about types.
+genCallSimpleCast2 :: Width -> ForeignTarget -> [CmmFormal] -> [CmmActual]
+              -> LlvmM StmtData
+genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do
+    let width = widthToLlvmInt w
+        dstTy = cmmToLlvmType $ localRegType dst
+
+    fname                       <- cmmPrimOpFunctions op
+    (fptr, _, top3)             <- getInstrinct fname width (const width <$> args)
+
+    dstV                        <- getCmmReg (CmmLocal dst)
+
+    let (_, arg_hints) = foreignTargetHints t
+    let args_hints = zip args arg_hints
+    (argsV, stmts2, top2)       <- arg_vars args_hints ([], nilOL, [])
+    (argsV', stmts4)            <- castVars Signed $ zip argsV (const width <$> argsV)
+    (retV, s1)                  <- doExpr width $ Call StdCall fptr argsV' []
+    ([retV'], stmts5)           <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)]
+    let s2                       = Store retV' dstV
+
+    let stmts = stmts2 `appOL` stmts4 `snocOL`
+                s1 `appOL` stmts5 `snocOL` s2
+    return (stmts, top2 ++ top3)
+genCallSimpleCast2 _ _ dsts _ =
+    panic ("genCallSimpleCast2: " ++ show (length dsts) ++ " dsts")
+
 -- | Create a function pointer from a target.
 getFunPtrW :: (LMString -> LlvmType) -> ForeignTarget
            -> WriterT LlvmAccum LlvmM LlvmVar
@@ -638,31 +676,32 @@ arg_vars ((e, _):rest) (vars, stmts, tops)
 
 
 -- | Cast a collection of LLVM variables to specific types.
-castVarsW :: [(LlvmVar, LlvmType)]
+castVarsW :: Signage
+          -> [(LlvmVar, LlvmType)]
           -> WriterT LlvmAccum LlvmM [LlvmVar]
-castVarsW vars = do
-    (vars, stmts) <- lift $ castVars vars
+castVarsW signage vars = do
+    (vars, stmts) <- lift $ castVars signage vars
     tell $ LlvmAccum stmts mempty
     return vars
 
 -- | Cast a collection of LLVM variables to specific types.
-castVars :: [(LlvmVar, LlvmType)]
+castVars :: Signage -> [(LlvmVar, LlvmType)]
          -> LlvmM ([LlvmVar], LlvmStatements)
-castVars vars = do
-                done <- mapM (uncurry castVar) vars
+castVars signage vars = do
+                done <- mapM (uncurry (castVar signage)) vars
                 let (vars', stmts) = unzip done
                 return (vars', toOL stmts)
 
 -- | Cast an LLVM variable to a specific type, panicing if it can't be done.
-castVar :: LlvmVar -> LlvmType -> LlvmM (LlvmVar, LlvmStatement)
-castVar v t | getVarType v == t
+castVar :: Signage -> LlvmVar -> LlvmType -> LlvmM (LlvmVar, LlvmStatement)
+castVar signage v t | getVarType v == t
             = return (v, Nop)
 
             | otherwise
             = do dflags <- getDynFlags
                  let op = case (getVarType v, t) of
                       (LMInt n, LMInt m)
-                          -> if n < m then LM_Sext else LM_Trunc
+                          -> if n < m then extend else LM_Trunc
                       (vt, _) | isFloat vt && isFloat t
                           -> if llvmWidthInBits dflags vt < llvmWidthInBits dflags t
                                 then LM_Fpext else LM_Fptrunc
@@ -676,7 +715,16 @@ castVar v t | getVarType v == t
                       (vt, _) -> panic $ "castVars: Can't cast this type ("
                                   ++ showSDoc dflags (ppr vt) ++ ") to (" ++ showSDoc dflags (ppr t) ++ ")"
                  doExpr t $ Cast op v t
+    where extend = case signage of
+            Signed      -> LM_Sext
+            Unsigned    -> LM_Zext
+
 
+cmmPrimOpRetValSignage :: CallishMachOp -> Signage
+cmmPrimOpRetValSignage mop = case mop of
+    MO_Pdep _   -> Unsigned
+    MO_Pext _   -> Unsigned
+    _           -> Signed
 
 -- | Decide what C function to use to implement a CallishMachOp
 cmmPrimOpFunctions :: CallishMachOp -> LlvmM LMString
@@ -735,6 +783,15 @@ cmmPrimOpFunctions mop = do
     (MO_Clz w)    -> fsLit $ "llvm.ctlz."   ++ showSDoc dflags (ppr $ widthToLlvmInt w)
     (MO_Ctz w)    -> fsLit $ "llvm.cttz."   ++ showSDoc dflags (ppr $ widthToLlvmInt w)
 
+    (MO_Pdep w)   ->  let w' = showSDoc dflags (ppr $ widthInBits w)
+                      in  if isBmi2Enabled dflags
+                            then fsLit $ "llvm.x86.bmi.pdep."   ++ w'
+                            else fsLit $ "hs_pdep"              ++ w'
+    (MO_Pext w)   ->  let w' = showSDoc dflags (ppr $ widthInBits w)
+                      in  if isBmi2Enabled dflags
+                            then fsLit $ "llvm.x86.bmi.pext."   ++ w'
+                            else fsLit $ "hs_pext"              ++ w'
+
     (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
 
     MO_AddIntC w    -> fsLit $ "llvm.sadd.with.overflow."
@@ -1212,7 +1269,7 @@ genMachOp _ op [x] = case op of
 
         negateVec ty v2 negOp = do
             (vx, stmts1, top) <- exprToVar x
-            ([vx'], stmts2) <- castVars [(vx, ty)]
+            ([vx'], stmts2) <- castVars Signed [(vx, ty)]
             (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
             return (v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
 
@@ -1275,7 +1332,7 @@ genMachOp_slow :: EOption -> MachOp -> [CmmExpr] -> LlvmM ExprData
 genMachOp_slow _ (MO_V_Extract l w) [val, idx] = runExprData $ do
     vval <- exprToVarW val
     vidx <- exprToVarW idx
-    [vval'] <- castVarsW [(vval, LMVector l ty)]
+    [vval'] <- castVarsW Signed [(vval, LMVector l ty)]
     doExprW ty $ Extract vval' vidx
   where
     ty = widthToLlvmInt w
@@ -1283,7 +1340,7 @@ genMachOp_slow _ (MO_V_Extract l w) [val, idx] = runExprData $ do
 genMachOp_slow _ (MO_VF_Extract l w) [val, idx] = runExprData $ do
     vval <- exprToVarW val
     vidx <- exprToVarW idx
-    [vval'] <- castVarsW [(vval, LMVector l ty)]
+    [vval'] <- castVarsW Signed [(vval, LMVector l ty)]
     doExprW ty $ Extract vval' vidx
   where
     ty = widthToLlvmFloat w
@@ -1293,7 +1350,7 @@ genMachOp_slow _ (MO_V_Insert l w) [val, elt, idx] = runExprData $ do
     vval <- exprToVarW val
     velt <- exprToVarW elt
     vidx <- exprToVarW idx
-    [vval'] <- castVarsW [(vval, ty)]
+    [vval'] <- castVarsW Signed [(vval, ty)]
     doExprW ty $ Insert vval' velt vidx
   where
     ty = LMVector l (widthToLlvmInt w)
@@ -1302,7 +1359,7 @@ genMachOp_slow _ (MO_VF_Insert l w) [val, elt, idx] = runExprData $ do
     vval <- exprToVarW val
     velt <- exprToVarW elt
     vidx <- exprToVarW idx
-    [vval'] <- castVarsW [(vval, ty)]
+    [vval'] <- castVarsW Signed [(vval, ty)]
     doExprW ty $ Insert vval' velt vidx
   where
     ty = LMVector l (widthToLlvmFloat w)
@@ -1414,7 +1471,7 @@ genMachOp_slow opt op [x, y] = case op of
         binCastLlvmOp ty binOp = runExprData $ do
             vx <- exprToVarW x
             vy <- exprToVarW y
-            [vx', vy'] <- castVarsW [(vx, ty), (vy, ty)]
+            [vx', vy'] <- castVarsW Signed [(vx, ty), (vy, ty)]
             doExprW ty $ binOp vx' vy'
 
         -- | Need to use EOption here as Cmm expects word size results from
diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs
index 90976b115b..c6c9f9e1f6 100644
--- a/compiler/main/DriverPipeline.hs
+++ b/compiler/main/DriverPipeline.hs
@@ -848,6 +848,8 @@ llvmOptions dflags =
               ++ ["+avx512cd"| isAvx512cdEnabled dflags ]
               ++ ["+avx512er"| isAvx512erEnabled dflags ]
               ++ ["+avx512pf"| isAvx512pfEnabled dflags ]
+              ++ ["+bmi"     | isBmiEnabled dflags      ]
+              ++ ["+bmi2"    | isBmi2Enabled dflags     ]
 
 -- -----------------------------------------------------------------------------
 -- | Each phase in the pipeline returns the next phase to execute, and the
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index ef4e2f8b85..05d1ec10bc 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -150,6 +150,8 @@ module DynFlags (
         isSseEnabled,
         isSse2Enabled,
         isSse4_2Enabled,
+        isBmiEnabled,
+        isBmi2Enabled,
         isAvxEnabled,
         isAvx2Enabled,
         isAvx512cdEnabled,
@@ -1005,6 +1007,7 @@ data DynFlags = DynFlags {
 
   -- | Machine dependent flags (-m<blah> stuff)
   sseVersion            :: Maybe SseVersion,
+  bmiVersion            :: Maybe BmiVersion,
   avx                   :: Bool,
   avx2                  :: Bool,
   avx512cd              :: Bool, -- Enable AVX-512 Conflict Detection Instructions.
@@ -1806,6 +1809,7 @@ defaultDynFlags mySettings myLlvmTargets =
         interactivePrint = Nothing,
         nextWrapperNum = panic "defaultDynFlags: No nextWrapperNum",
         sseVersion = Nothing,
+        bmiVersion = Nothing,
         avx = False,
         avx2 = False,
         avx512cd = False,
@@ -3201,6 +3205,10 @@ dynamic_flags_deps = [
                                                   d { sseVersion = Just SSE4 }))
   , make_ord_flag defGhcFlag "msse4.2"      (noArg (\d ->
                                                  d { sseVersion = Just SSE42 }))
+  , make_ord_flag defGhcFlag "mbmi"         (noArg (\d ->
+                                                 d { bmiVersion = Just BMI1 }))
+  , make_ord_flag defGhcFlag "mbmi2"        (noArg (\d ->
+                                                 d { bmiVersion = Just BMI2 }))
   , make_ord_flag defGhcFlag "mavx"         (noArg (\d -> d { avx = True }))
   , make_ord_flag defGhcFlag "mavx2"        (noArg (\d -> d { avx2 = True }))
   , make_ord_flag defGhcFlag "mavx512cd"    (noArg (\d ->
@@ -4049,6 +4057,7 @@ xFlagsDeps = [
   flagSpec "MonomorphismRestriction"          LangExt.MonomorphismRestriction,
   flagSpec "MultiParamTypeClasses"            LangExt.MultiParamTypeClasses,
   flagSpec "MultiWayIf"                       LangExt.MultiWayIf,
+  flagSpec "NumericUnderscores"               LangExt.NumericUnderscores,
   flagSpec "NPlusKPatterns"                   LangExt.NPlusKPatterns,
   flagSpec "NamedFieldPuns"                   LangExt.RecordPuns,
   flagSpec "NamedWildCards"                   LangExt.NamedWildCards,
@@ -5447,12 +5456,32 @@ isAvx512pfEnabled :: DynFlags -> Bool
 isAvx512pfEnabled dflags = avx512pf dflags
 
 -- -----------------------------------------------------------------------------
+-- BMI2
+
+data BmiVersion = BMI1
+                | BMI2
+                deriving (Eq, Ord)
+
+isBmiEnabled :: DynFlags -> Bool
+isBmiEnabled dflags = case platformArch (targetPlatform dflags) of
+    ArchX86_64 -> bmiVersion dflags >= Just BMI1
+    ArchX86    -> bmiVersion dflags >= Just BMI1
+    _          -> False
+
+isBmi2Enabled :: DynFlags -> Bool
+isBmi2Enabled dflags = case platformArch (targetPlatform dflags) of
+    ArchX86_64 -> bmiVersion dflags >= Just BMI2
+    ArchX86    -> bmiVersion dflags >= Just BMI2
+    _          -> False
+
+-- -----------------------------------------------------------------------------
 -- Linker/compiler information
 
 -- LinkerInfo contains any extra options needed by the system linker.
 data LinkerInfo
   = GnuLD    [Option]
   | GnuGold  [Option]
+  | LlvmLLD  [Option]
   | DarwinLD [Option]
   | SolarisLD [Option]
   | AixLD    [Option]
diff --git a/compiler/main/SysTools/Info.hs b/compiler/main/SysTools/Info.hs
index e9dc68508b..6b310578ff 100644
--- a/compiler/main/SysTools/Info.hs
+++ b/compiler/main/SysTools/Info.hs
@@ -96,6 +96,7 @@ https://gcc.gnu.org/onlinedocs/gcc-5.2.0/gcc.pdf :
 neededLinkArgs :: LinkerInfo -> [Option]
 neededLinkArgs (GnuLD o)     = o
 neededLinkArgs (GnuGold o)   = o
+neededLinkArgs (LlvmLLD o)   = o
 neededLinkArgs (DarwinLD o)  = o
 neededLinkArgs (SolarisLD o) = o
 neededLinkArgs (AixLD o)     = o
@@ -140,6 +141,9 @@ getLinkerInfo' dflags = do
           -- ELF specific flag, see Note [ELF needed shared libs]
           return (GnuGold [Option "-Wl,--no-as-needed"])
 
+        | any ("LLD" `isPrefixOf`) stdo =
+          return (LlvmLLD [])
+
          -- Unknown linker.
         | otherwise = fail "invalid --version output, or linker is unsupported"
 
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs
index ad61a002d3..399d646000 100644
--- a/compiler/nativeGen/CPrim.hs
+++ b/compiler/nativeGen/CPrim.hs
@@ -5,6 +5,8 @@ module CPrim
     , atomicRMWLabel
     , cmpxchgLabel
     , popCntLabel
+    , pdepLabel
+    , pextLabel
     , bSwapLabel
     , clzLabel
     , ctzLabel
@@ -26,6 +28,24 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w
     pprWidth W64 = "64"
     pprWidth w   = pprPanic "popCntLabel: Unsupported word width " (ppr w)
 
+pdepLabel :: Width -> String
+pdepLabel w = "hs_pdep" ++ pprWidth w
+  where
+    pprWidth W8  = "8"
+    pprWidth W16 = "16"
+    pprWidth W32 = "32"
+    pprWidth W64 = "64"
+    pprWidth w   = pprPanic "pdepLabel: Unsupported word width " (ppr w)
+
+pextLabel :: Width -> String
+pextLabel w = "hs_pext" ++ pprWidth w
+  where
+    pprWidth W8  = "8"
+    pprWidth W16 = "16"
+    pprWidth W32 = "32"
+    pprWidth W64 = "64"
+    pprWidth w   = pprPanic "pextLabel: Unsupported word width " (ppr w)
+
 bSwapLabel :: Width -> String
 bSwapLabel w = "hs_bswap" ++ pprWidth w
   where
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 898a31a657..e2c568c836 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -2004,6 +2004,8 @@ genCCall' dflags gcp target dest_regs args
 
                     MO_BSwap w   -> (fsLit $ bSwapLabel w, False)
                     MO_PopCnt w  -> (fsLit $ popCntLabel w, False)
+                    MO_Pdep w    -> (fsLit $ pdepLabel w, False)
+                    MO_Pext w    -> (fsLit $ pextLabel w, False)
                     MO_Clz _     -> unsupported
                     MO_Ctz _     -> unsupported
                     MO_AtomicRMW {} -> unsupported
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 55c1d1531d..6dfd58950e 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -654,6 +654,8 @@ outOfLineMachOp_table mop
 
         MO_BSwap w   -> fsLit $ bSwapLabel w
         MO_PopCnt w  -> fsLit $ popCntLabel w
+        MO_Pdep w    -> fsLit $ pdepLabel w
+        MO_Pext w    -> fsLit $ pextLabel w
         MO_Clz w     -> fsLit $ clzLabel w
         MO_Ctz w     -> fsLit $ ctzLabel w
         MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 6c0e0ac783..eb6af1ff41 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1872,6 +1872,72 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
     format = intFormat width
     lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
 
+genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
+         args@[src, mask] = do
+    let platform = targetPlatform dflags
+    if isBmi2Enabled dflags
+        then do code_src  <- getAnyReg src
+                code_mask <- getAnyReg mask
+                src_r     <- getNewRegNat format
+                mask_r    <- getNewRegNat format
+                let dst_r = getRegisterReg platform False (CmmLocal dst)
+                return $ code_src src_r `appOL` code_mask mask_r `appOL`
+                    (if width == W8 then
+                         -- The PDEP instruction doesn't take a r/m8
+                         unitOL (MOVZxL II8  (OpReg src_r ) (OpReg src_r )) `appOL`
+                         unitOL (MOVZxL II8  (OpReg mask_r) (OpReg mask_r)) `appOL`
+                         unitOL (PDEP   II16 (OpReg mask_r) (OpReg src_r ) dst_r)
+                     else
+                         unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
+                    (if width == W8 || width == W16 then
+                         -- We used a 16-bit destination register above,
+                         -- so zero-extend
+                         unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
+                     else nilOL)
+        else do
+            targetExpr <- cmmMakeDynamicReference dflags
+                          CallReference lbl
+            let target = ForeignTarget targetExpr (ForeignConvention CCallConv
+                                                           [NoHint] [NoHint]
+                                                           CmmMayReturn)
+            genCCall dflags is32Bit target dest_regs args
+  where
+    format = intFormat width
+    lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width))
+
+genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
+         args@[src, mask] = do
+    let platform = targetPlatform dflags
+    if isBmi2Enabled dflags
+        then do code_src  <- getAnyReg src
+                code_mask <- getAnyReg mask
+                src_r     <- getNewRegNat format
+                mask_r    <- getNewRegNat format
+                let dst_r = getRegisterReg platform False (CmmLocal dst)
+                return $ code_src src_r `appOL` code_mask mask_r `appOL`
+                    (if width == W8 then
+                         -- The PEXT instruction doesn't take a r/m8
+                         unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
+                         unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
+                         unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r)
+                     else
+                         unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
+                    (if width == W8 || width == W16 then
+                         -- We used a 16-bit destination register above,
+                         -- so zero-extend
+                         unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
+                     else nilOL)
+        else do
+            targetExpr <- cmmMakeDynamicReference dflags
+                          CallReference lbl
+            let target = ForeignTarget targetExpr (ForeignConvention CCallConv
+                                                           [NoHint] [NoHint]
+                                                           CmmMayReturn)
+            genCCall dflags is32Bit target dest_regs args
+  where
+    format = intFormat width
+    lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width))
+
 genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
   | is32Bit && width == W64 = do
     -- Fallback to `hs_clz64` on i386
@@ -2689,6 +2755,9 @@ outOfLineCmmOp mop res args
               MO_Clz w     -> fsLit $ clzLabel w
               MO_Ctz _     -> unsupported
 
+              MO_Pdep w    -> fsLit $ pdepLabel w
+              MO_Pext w    -> fsLit $ pextLabel w
+
               MO_AtomicRMW _ _ -> fsLit "atomicrmw"
               MO_AtomicRead _  -> fsLit "atomicread"
               MO_AtomicWrite _ -> fsLit "atomicwrite"
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index c937d4dba0..f4f625b4a5 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -345,6 +345,10 @@ data Instr
         | BSF         Format Operand Reg -- bit scan forward
         | BSR         Format Operand Reg -- bit scan reverse
 
+    -- bit manipulation instructions
+        | PDEP        Format Operand Operand Reg -- [BMI2] deposit bits to   the specified mask
+        | PEXT        Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
+
     -- prefetch
         | PREFETCH  PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
                                         -- variant can be NTA, Lvl0, Lvl1, or Lvl2
@@ -464,6 +468,9 @@ x86_regUsageOfInstr platform instr
     BSF    _ src dst -> mkRU (use_R src []) [dst]
     BSR    _ src dst -> mkRU (use_R src []) [dst]
 
+    PDEP   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
+    PEXT   _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
+
     -- note: might be a better way to do this
     PREFETCH _  _ src -> mkRU (use_R src []) []
     LOCK i              -> x86_regUsageOfInstr platform i
@@ -640,6 +647,8 @@ x86_patchRegsOfInstr instr env
     CLTD _              -> instr
 
     POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
+    PDEP   fmt src mask dst -> PDEP   fmt (patchOp src) (patchOp mask) (env dst)
+    PEXT   fmt src mask dst -> PEXT   fmt (patchOp src) (patchOp mask) (env dst)
     BSF    fmt src dst -> BSF    fmt (patchOp src) (env dst)
     BSR    fmt src dst -> BSR    fmt (patchOp src) (env dst)
 
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 84ce7516b5..f5011b2a95 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -648,6 +648,9 @@ pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst
 pprInstr (BSF format src dst)    = pprOpOp (sLit "bsf")    format src (OpReg dst)
 pprInstr (BSR format src dst)    = pprOpOp (sLit "bsr")    format src (OpReg dst)
 
+pprInstr (PDEP format src mask dst)   = pprFormatOpOpReg (sLit "pdep") format src mask dst
+pprInstr (PEXT format src mask dst)   = pprFormatOpOpReg (sLit "pext") format src mask dst
+
 pprInstr (PREFETCH NTA format src ) = pprFormatOp_ (sLit "prefetchnta") format src
 pprInstr (PREFETCH Lvl0 format src) = pprFormatOp_ (sLit "prefetcht0") format src
 pprInstr (PREFETCH Lvl1 format src) = pprFormatOp_ (sLit "prefetcht1") format src
@@ -1262,6 +1265,16 @@ pprFormatRegRegReg name format reg1 reg2 reg3
         pprReg format reg3
     ]
 
+pprFormatOpOpReg :: LitString -> Format -> Operand -> Operand -> Reg -> SDoc
+pprFormatOpOpReg name format op1 op2 reg3
+  = hcat [
+        pprMnemonic name format,
+        pprOperand format op1,
+        comma,
+        pprOperand format op2,
+        comma,
+        pprReg format reg3
+    ]
 
 pprFormatAddrReg :: LitString -> Format -> AddrMode -> Reg -> SDoc
 pprFormatAddrReg name format op dst
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 76cc4ee6e6..d8a670e7fb 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -177,12 +177,14 @@ $docsym    = [\| \^ \* \$]
 @varsym    = ($symbol # \:) $symbol*  -- variable (operator) symbol
 @consym    = \: $symbol*              -- constructor (operator) symbol
 
-@decimal     = $decdigit+
-@binary      = $binit+
-@octal       = $octit+
-@hexadecimal = $hexit+
-@exponent    = [eE] [\-\+]? @decimal
-@bin_exponent = [pP] [\-\+]? @decimal
+-- See Note [Lexing NumericUnderscores extension] and #14473
+@numspc       = _*                   -- numeric spacer (#14473)
+@decimal      = $decdigit(@numspc $decdigit)*
+@binary       = $binit(@numspc $binit)*
+@octal        = $octit(@numspc $octit)*
+@hexadecimal  = $hexit(@numspc $hexit)*
+@exponent     = @numspc [eE] [\-\+]? @decimal
+@bin_exponent = @numspc [pP] [\-\+]? @decimal
 
 @qual = (@conid \.)+
 @qvarid = @qual @varid
@@ -190,8 +192,8 @@ $docsym    = [\| \^ \* \$]
 @qvarsym = @qual @varsym
 @qconsym = @qual @consym
 
-@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
-@hex_floating_point = @hexadecimal \. @hexadecimal @bin_exponent? | @hexadecimal @bin_exponent
+@floating_point = @numspc @decimal \. @decimal @exponent? | @numspc @decimal @exponent
+@hex_floating_point = @numspc @hexadecimal \. @hexadecimal @bin_exponent? | @numspc @hexadecimal @bin_exponent
 
 -- normal signed numerical literals can only be explicitly negative,
 -- not explicitly positive (contrast @exponent)
@@ -485,24 +487,34 @@ $tab          { warnTab }
 
 -- For the normal boxed literals we need to be careful
 -- when trying to be close to Haskell98
+
+-- Note [Lexing NumericUnderscores extension] (#14473)
+--
+-- NumericUnderscores extension allows underscores in numeric literals.
+-- Multiple underscores are represented with @numspc macro.
+-- To be simpler, we have only the definitions with underscores.
+-- And then we have a separate function (tok_integral and tok_frac)
+-- that validates the literals.
+-- If extensions are not enabled, check that there are no underscores.
+--
 <0> {
   -- Normal integral literals (:: Num a => a, from Integer)
   @decimal                                                               { tok_num positive 0 0 decimal }
-  0[bB] @binary                / { ifExtension binaryLiteralsEnabled }   { tok_num positive 2 2 binary }
-  0[oO] @octal                                                           { tok_num positive 2 2 octal }
-  0[xX] @hexadecimal                                                     { tok_num positive 2 2 hexadecimal }
+  0[bB] @numspc @binary        / { ifExtension binaryLiteralsEnabled }   { tok_num positive 2 2 binary }
+  0[oO] @numspc @octal                                                   { tok_num positive 2 2 octal }
+  0[xX] @numspc @hexadecimal                                             { tok_num positive 2 2 hexadecimal }
   @negative @decimal           / { ifExtension negativeLiteralsEnabled } { tok_num negative 1 1 decimal }
-  @negative 0[bB] @binary      / { ifExtension negativeLiteralsEnabled `alexAndPred`
-                                   ifExtension binaryLiteralsEnabled }   { tok_num negative 3 3 binary }
-  @negative 0[oO] @octal       / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 octal }
-  @negative 0[xX] @hexadecimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 hexadecimal }
+  @negative 0[bB] @numspc @binary  / { ifExtension negativeLiteralsEnabled `alexAndPred`
+                                       ifExtension binaryLiteralsEnabled }   { tok_num negative 3 3 binary }
+  @negative 0[oO] @numspc @octal   / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 octal }
+  @negative 0[xX] @numspc @hexadecimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 hexadecimal }
 
   -- Normal rational literals (:: Fractional a => a, from Rational)
-  @floating_point                                                        { strtoken tok_float }
-  @negative @floating_point    / { ifExtension negativeLiteralsEnabled } { strtoken tok_float }
-  0[xX] @hex_floating_point          / { ifExtension hexFloatLiteralsEnabled } { strtoken tok_hex_float }
-  @negative 0[xX]@hex_floating_point / { ifExtension hexFloatLiteralsEnabled `alexAndPred`
-                                    ifExtension negativeLiteralsEnabled } { strtoken tok_hex_float }
+  @floating_point                                                        { tok_frac 0 tok_float }
+  @negative @floating_point    / { ifExtension negativeLiteralsEnabled } { tok_frac 0 tok_float }
+  0[xX] @numspc @hex_floating_point     / { ifExtension hexFloatLiteralsEnabled } { tok_frac 0 tok_hex_float }
+  @negative 0[xX] @numspc @hex_floating_point / { ifExtension hexFloatLiteralsEnabled `alexAndPred`
+                                                  ifExtension negativeLiteralsEnabled } { tok_frac 0 tok_hex_float }
 }
 
 <0> {
@@ -510,26 +522,26 @@ $tab          { warnTab }
   -- It's simpler (and faster?) to give separate cases to the negatives,
   -- especially considering octal/hexadecimal prefixes.
   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
-  0[bB] @binary                \# / { ifExtension magicHashEnabled `alexAndPred`
+  0[bB] @numspc @binary        \# / { ifExtension magicHashEnabled `alexAndPred`
                                       ifExtension binaryLiteralsEnabled } { tok_primint positive 2 3 binary }
-  0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
-  0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  0[oO] @numspc @octal         \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @numspc @hexadecimal   \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
-  @negative 0[bB] @binary      \# / { ifExtension magicHashEnabled `alexAndPred`
-                                      ifExtension binaryLiteralsEnabled } { tok_primint negative 3 4 binary }
-  @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
-  @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
+  @negative 0[bB] @numspc @binary  \# / { ifExtension magicHashEnabled `alexAndPred`
+                                          ifExtension binaryLiteralsEnabled } { tok_primint negative 3 4 binary }
+  @negative 0[oO] @numspc @octal   \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @negative 0[xX] @numspc @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 
   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
-  0[bB] @binary                \# \# / { ifExtension magicHashEnabled `alexAndPred`
+  0[bB] @numspc @binary        \# \# / { ifExtension magicHashEnabled `alexAndPred`
                                          ifExtension binaryLiteralsEnabled } { tok_primword 2 4 binary }
-  0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
-  0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
+  0[oO] @numspc @octal         \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
+  0[xX] @numspc @hexadecimal   \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 
   -- Unboxed floats and doubles (:: Float#, :: Double#)
   -- prim_{float,double} work with signed literals
-  @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
-  @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
+  @signed @floating_point \# / { ifExtension magicHashEnabled } { tok_frac 1 tok_primfloat }
+  @signed @floating_point \# \# / { ifExtension magicHashEnabled } { tok_frac 2 tok_primdouble }
 }
 
 -- Strings and chars are lexed by hand-written code.  The reason is
@@ -943,11 +955,6 @@ strtoken :: (String -> Token) -> Action
 strtoken f span buf len =
   return (L span $! (f $! lexemeToString buf len))
 
-init_strtoken :: Int -> (String -> Token) -> Action
--- like strtoken, but drops the last N character(s)
-init_strtoken drop f span buf len =
-  return (L span $! (f $! lexemeToString buf (len-drop)))
-
 begin :: Int -> Action
 begin code _span _str _len = do pushLexState code; lexToken
 
@@ -1277,8 +1284,12 @@ tok_integral :: (SourceText -> Integer -> Token)
              -> Int -> Int
              -> (Integer, (Char -> Int))
              -> Action
-tok_integral itint transint transbuf translen (radix,char_to_int) span buf len
- = return $ L span $ itint (SourceText $ lexemeToString buf len)
+tok_integral itint transint transbuf translen (radix,char_to_int) span buf len = do
+  numericUnderscores <- extension numericUnderscoresEnabled  -- #14473
+  let src = lexemeToString buf len
+  if (not numericUnderscores) && ('_' `elem` src)
+    then failMsgP "Use NumericUnderscores to allow underscores in integer literals"
+    else return $ L span $ itint (SourceText src)
        $! transint $ parseUnsignedInteger
        (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
 
@@ -1310,6 +1321,14 @@ octal = (8,octDecDigit)
 hexadecimal = (16,hexDigit)
 
 -- readRational can understand negative rationals, exponents, everything.
+tok_frac :: Int -> (String -> Token) -> Action
+tok_frac drop f span buf len = do
+  numericUnderscores <- extension numericUnderscoresEnabled  -- #14473
+  let src = lexemeToString buf (len-drop)
+  if (not numericUnderscores) && ('_' `elem` src)
+    then failMsgP "Use NumericUnderscores to allow underscores in floating literals"
+    else return (L span $! (f $! src))
+
 tok_float, tok_primfloat, tok_primdouble :: String -> Token
 tok_float        str = ITrational   $! readFractionalLit str
 tok_hex_float    str = ITrational   $! readHexFractionalLit str
@@ -2221,6 +2240,7 @@ data ExtBits
   | HexFloatLiteralsBit
   | TypeApplicationsBit
   | StaticPointersBit
+  | NumericUnderscoresBit
   deriving Enum
 
 
@@ -2289,6 +2309,8 @@ typeApplicationEnabled :: ExtsBitmap -> Bool
 typeApplicationEnabled = xtest TypeApplicationsBit
 staticPointersEnabled :: ExtsBitmap -> Bool
 staticPointersEnabled = xtest StaticPointersBit
+numericUnderscoresEnabled :: ExtsBitmap -> Bool
+numericUnderscoresEnabled = xtest NumericUnderscoresBit
 
 -- PState for parsing options pragmas
 --
@@ -2344,6 +2366,7 @@ mkParserFlags flags =
                .|. PatternSynonymsBit          `setBitIf` xopt LangExt.PatternSynonyms          flags
                .|. TypeApplicationsBit         `setBitIf` xopt LangExt.TypeApplications         flags
                .|. StaticPointersBit           `setBitIf` xopt LangExt.StaticPointers           flags
+               .|. NumericUnderscoresBit       `setBitIf` xopt LangExt.NumericUnderscores       flags
 
       setBitIf :: ExtBits -> Bool -> ExtsBitmap
       b `setBitIf` cond | cond      = xbit b
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index d8d7f6e3e1..43e8f535d3 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -403,6 +403,28 @@ primop   PopCnt64Op   "popCnt64#"   GenPrimOp   WORD64 -> Word#
 primop   PopCntOp   "popCnt#"   Monadic   Word# -> Word#
     {Count the number of set bits in a word.}
 
+primop   Pdep8Op   "pdep8#"   Dyadic   Word# -> Word# -> Word#
+    {Deposit bits to lower 8 bits of a word at locations specified by a mask.}
+primop   Pdep16Op   "pdep16#"   Dyadic   Word# -> Word# -> Word#
+    {Deposit bits to lower 16 bits of a word at locations specified by a mask.}
+primop   Pdep32Op   "pdep32#"   Dyadic   Word# -> Word# -> Word#
+    {Deposit bits to lower 32 bits of a word at locations specified by a mask.}
+primop   Pdep64Op   "pdep64#"   GenPrimOp   WORD64 -> WORD64 -> WORD64
+    {Deposit bits to a word at locations specified by a mask.}
+primop   PdepOp   "pdep#"   Dyadic   Word# -> Word# -> Word#
+    {Deposit bits to a word at locations specified by a mask.}
+
+primop   Pext8Op   "pext8#"   Dyadic   Word# -> Word# -> Word#
+    {Extract bits from lower 8 bits of a word at locations specified by a mask.}
+primop   Pext16Op   "pext16#"   Dyadic   Word# -> Word# -> Word#
+    {Extract bits from lower 16 bits of a word at locations specified by a mask.}
+primop   Pext32Op   "pext32#"   Dyadic   Word# -> Word# -> Word#
+    {Extract bits from lower 32 bits of a word at locations specified by a mask.}
+primop   Pext64Op   "pext64#"   GenPrimOp   WORD64 -> WORD64 -> WORD64
+    {Extract bits from a word at locations specified by a mask.}
+primop   PextOp   "pext#"   Dyadic   Word# -> Word# -> Word#
+    {Extract bits from a word at locations specified by a mask.}
+
 primop   Clz8Op   "clz8#" Monadic   Word# -> Word#
     {Count leading zeros in the lower 8 bits of a word.}
 primop   Clz16Op   "clz16#" Monadic   Word# -> Word#
diff --git a/compiler/typecheck/TcGenDeriv.hs b/compiler/typecheck/TcGenDeriv.hs
index b2d45fda6d..1ac350523a 100644
--- a/compiler/typecheck/TcGenDeriv.hs
+++ b/compiler/typecheck/TcGenDeriv.hs
@@ -1132,7 +1132,7 @@ gen_Show_binds get_fixity loc tycon
   = (unitBag shows_prec, emptyBag)
   where
     data_cons = tyConDataCons tycon
-    shows_prec = mkFunBindEC 1 loc showsPrec_RDR id (map pats_etc data_cons)
+    shows_prec = mkFunBindEC 2 loc showsPrec_RDR id (map pats_etc data_cons)
     comma_space = nlHsVar showCommaSpace_RDR
 
     pats_etc data_con
diff --git a/compiler/typecheck/TcRnTypes.hs b/compiler/typecheck/TcRnTypes.hs
index 364fd95f3c..13391d632b 100644
--- a/compiler/typecheck/TcRnTypes.hs
+++ b/compiler/typecheck/TcRnTypes.hs
@@ -1397,10 +1397,13 @@ plusImportAvails
                    imp_orphs         = orphs1 `unionLists` orphs2,
                    imp_finsts        = finsts1 `unionLists` finsts2 }
   where
-    plus_mod_dep (m1, boot1) (m2, boot2)
-        = WARN( not (m1 == m2), (ppr m1 <+> ppr m2) $$ (ppr boot1 <+> ppr boot2) )
-                -- Check mod-names match
-          (m1, boot1 && boot2) -- If either side can "see" a non-hi-boot interface, use that
+    plus_mod_dep r1@(m1, boot1) r2@(m2, boot2)
+      | ASSERT2( m1 == m2, (ppr m1 <+> ppr m2) $$ (ppr boot1 <+> ppr boot2) )
+        boot1 = r2
+      | otherwise = r1
+      -- If either side can "see" a non-hi-boot interface, use that
+      -- Reusing existing tuples saves 10% of allocations on test
+      -- perf/compiler/MultiLayerModules
 
 {-
 ************************************************************************
@@ -2391,7 +2394,9 @@ Yuk!
 -}
 
 data Implication
-  = Implic {
+  = Implic {   -- Invariants for a tree of implications:
+               -- see TcType Note [TcLevel and untouchable type variables]
+
       ic_tclvl :: TcLevel,       -- TcLevel of unification variables
                                  -- allocated /inside/ this implication
 
@@ -2410,7 +2415,8 @@ data Implication
                                  -- for the implication, and hence for all the
                                  -- given evidence variables
 
-      ic_wanted :: WantedConstraints,  -- The wanted
+      ic_wanted :: WantedConstraints,  -- The wanteds
+                                       -- See Invariang (WantedInf) in TcType
 
       ic_binds  :: EvBindsVar,    -- Points to the place to fill in the
                                   -- abstraction and bindings.
diff --git a/compiler/typecheck/TcSimplify.hs b/compiler/typecheck/TcSimplify.hs
index 70c8f96d90..e0588ea914 100644
--- a/compiler/typecheck/TcSimplify.hs
+++ b/compiler/typecheck/TcSimplify.hs
@@ -1910,7 +1910,7 @@ allow the implication to make progress.
 
 promoteTyVar :: TcLevel -> TcTyVar  -> TcM Bool
 -- When we float a constraint out of an implication we must restore
--- invariant (MetaTvInv) in Note [TcLevel and untouchable type variables] in TcType
+-- invariant (WantedInv) in Note [TcLevel and untouchable type variables] in TcType
 -- Return True <=> we did some promotion
 -- See Note [Promoting unification variables]
 promoteTyVar tclvl tv
@@ -1924,7 +1924,7 @@ promoteTyVar tclvl tv
 
 promoteTyVarTcS :: TcLevel -> TcTyVar  -> TcS ()
 -- When we float a constraint out of an implication we must restore
--- invariant (MetaTvInv) in Note [TcLevel and untouchable type variables] in TcType
+-- invariant (WantedInv) in Note [TcLevel and untouchable type variables] in TcType
 -- See Note [Promoting unification variables]
 -- We don't just call promoteTyVar because we want to use unifyTyVar,
 -- not writeMetaTyVar
@@ -2067,7 +2067,7 @@ When we are inferring a type, we simplify the constraint, and then use
 approximateWC to produce a list of candidate constraints.  Then we MUST
 
   a) Promote any meta-tyvars that have been floated out by
-     approximateWC, to restore invariant (MetaTvInv) described in
+     approximateWC, to restore invariant (WantedInv) described in
      Note [TcLevel and untouchable type variables] in TcType.
 
   b) Default the kind of any meta-tyvars that are not mentioned in
@@ -2084,8 +2084,8 @@ Note [Promoting unification variables]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 When we float an equality out of an implication we must "promote" free
 unification variables of the equality, in order to maintain Invariant
-(MetaTvInv) from Note [TcLevel and untouchable type variables] in TcType.  for the
-leftover implication.
+(WantedInv) from Note [TcLevel and untouchable type variables] in
+TcType.  for the leftover implication.
 
 This is absolutely necessary. Consider the following example. We start
 with two implications and a class with a functional dependency.
@@ -2346,13 +2346,14 @@ The "bound variables of the implication" are
   3. The binders of all evidence bindings in `ic_binds`. Example
          forall a. (d :: t1 ~ t2)
             EvBinds { (co :: t1 ~# t2) = superclass-sel d }
-            => [W] co : (a ~# b |> co)
-     Here `co` is gotten by superclass selection from `d`.
+            => [W] co2 : (a ~# b |> co)
+     Here `co` is gotten by superclass selection from `d`, and the
+     wanted constraint co2 must not float.
 
-  4. And the evidence variable of any equality constraint whose type
-     mentions a bound variable.  Example:
+  4. And the evidence variable of any equality constraint (incl
+     Wanted ones) whose type mentions a bound variable.  Example:
         forall k. [W] co1 :: t1 ~# t2 |> co2
-            [W] co2 :: k ~# *
+                  [W] co2 :: k ~# *
      Here, since `k` is bound, so is `co2` and hence so is `co1`.
 
 Here (1,2,3) are handled by the "seed_skols" calculation, and
diff --git a/compiler/typecheck/TcType.hs b/compiler/typecheck/TcType.hs
index 441545ce24..de37aa862d 100644
--- a/compiler/typecheck/TcType.hs
+++ b/compiler/typecheck/TcType.hs
@@ -680,25 +680,36 @@ Note [TcLevel and untouchable type variables]
 
 * INVARIANTS.  In a tree of Implications,
 
-    (ImplicInv) The level number of an Implication is
+    (ImplicInv) The level number (ic_tclvl) of an Implication is
                 STRICTLY GREATER THAN that of its parent
 
-    (MetaTvInv) The level number of a unification variable is
-                LESS THAN OR EQUAL TO that of its parent
-                implication
+    (GivenInv) The level number of a unification variable appearing
+                 in the 'ic_given' of an implication I should be
+                 STRICTLY LESS THAN the ic_tclvl of I
+
+    (WantedInv) The level number of a unification variable appearing
+                in the 'ic_wanted' of an implication I should be
+                LESS THAN OR EQUAL TO the ic_tclvl of I
+                See Note [WantedInv]
 
 * A unification variable is *touchable* if its level number
   is EQUAL TO that of its immediate parent implication.
 
-* INVARIANT
-    (GivenInv)  The free variables of the ic_given of an
-                implication are all untouchable; ie their level
-                numbers are LESS THAN the ic_tclvl of the implication
+Note [WantedInv]
+~~~~~~~~~~~~~~~~
+Why is WantedInv important?  Consider this implication, where
+the constraint (C alpha[3]) disobeys WantedInv:
+
+   forall[2] a. blah => (C alpha[3])
+                        (forall[3] b. alpha[3] ~ b)
+
+We can unify alpha:=b in the inner implication, because 'alpha' is
+touchable; but then 'b' has excaped its scope into the outer implication.
 
 Note [Skolem escape prevention]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We only unify touchable unification variables.  Because of
-(MetaTvInv), there can be no occurrences of the variable further out,
+(WantedInv), there can be no occurrences of the variable further out,
 so the unification can't cause the skolems to escape. Example:
      data T = forall a. MkT a (a->Int)
      f x (MkT v f) = length [v,x]
@@ -770,7 +781,7 @@ sameDepthAs (TcLevel ctxt_tclvl) (TcLevel tv_tclvl)
                              --     So <= would be equivalent
 
 checkTcLevelInvariant :: TcLevel -> TcLevel -> Bool
--- Checks (MetaTvInv) from Note [TcLevel and untouchable type variables]
+-- Checks (WantedInv) from Note [TcLevel and untouchable type variables]
 checkTcLevelInvariant (TcLevel ctxt_tclvl) (TcLevel tv_tclvl)
   = ctxt_tclvl >= tv_tclvl
 
diff --git a/compiler/types/Coercion.hs b/compiler/types/Coercion.hs
index 3f83b09f67..cec56b189e 100644
--- a/compiler/types/Coercion.hs
+++ b/compiler/types/Coercion.hs
@@ -29,7 +29,7 @@ module Coercion (
         mkAxInstLHS, mkUnbranchedAxInstLHS,
         mkPiCo, mkPiCos, mkCoCast,
         mkSymCo, mkTransCo, mkTransAppCo,
-        mkNthCo, mkNthCoRole, mkLRCo,
+        mkNthCo, mkLRCo,
         mkInstCo, mkAppCo, mkAppCos, mkTyConAppCo, mkFunCo, mkFunCos,
         mkForAllCo, mkForAllCos, mkHomoForAllCos, mkHomoForAllCos_NoRefl,
         mkPhantomCo,
@@ -804,15 +804,6 @@ mkTransCo co1 (Refl {}) = co1
 mkTransCo (Refl {}) co2 = co2
 mkTransCo co1 co2       = TransCo co1 co2
 
--- the Role is the desired one. It is the caller's responsibility to make
--- sure this request is reasonable
-mkNthCoRole :: Role -> Int -> Coercion -> Coercion
-mkNthCoRole role n co
-  = downgradeRole role nth_role $ nth_co
-  where
-    nth_co = mkNthCo n co
-    nth_role = coercionRole nth_co
-
 mkNthCo :: Int -> Coercion -> Coercion
 mkNthCo 0 (Refl _ ty)
   | Just (tv, _) <- splitForAllTy_maybe ty
diff --git a/compiler/types/TyCoRep.hs b/compiler/types/TyCoRep.hs
index d9cc42b6f2..588963d012 100644
--- a/compiler/types/TyCoRep.hs
+++ b/compiler/types/TyCoRep.hs
@@ -1214,6 +1214,8 @@ instance Outputable UnivCoProvenance where
 -- | A coercion to be filled in by the type-checker. See Note [Coercion holes]
 data CoercionHole
   = CoercionHole { ch_co_var :: CoVar
+                       -- See Note [CoercionHoles and coercion free variables]
+
                  , ch_ref    :: IORef (Maybe Coercion)
                  }
 
@@ -1254,7 +1256,7 @@ During typechecking, constraint solving for type classes works by
 
 For equality constraints we use a different strategy.  See Note [The
 equality types story] in TysPrim for background on equality constraints.
-  - For boxed equality constraints, (t1 ~N t2) and (t1 ~R t2), it's just
+  - For /boxed/ equality constraints, (t1 ~N t2) and (t1 ~R t2), it's just
     like type classes above. (Indeed, boxed equality constraints *are* classes.)
   - But for /unboxed/ equality constraints (t1 ~R# t2) and (t1 ~N# t2)
     we use a different plan
@@ -1270,15 +1272,24 @@ For unboxed equalities:
 
 The main reason for all this is that there may be no good place to let-bind
 the evidence for unboxed equalities:
-  - We emit constraints for kind coercions, to be used
-    to cast a type's kind. These coercions then must be used in types. Because
-    they might appear in a top-level type, there is no place to bind these
-   (unlifted) coercions in the usual way.
+
+  - We emit constraints for kind coercions, to be used to cast a
+    type's kind. These coercions then must be used in types. Because
+    they might appear in a top-level type, there is no place to bind
+    these (unlifted) coercions in the usual way.
 
   - A coercion for (forall a. t1) ~ (forall a. t2) will look like
        forall a. (coercion for t1~t2)
-    But the coercion for (t1~t2) may mention 'a', and we don't have let-bindings
-    within coercions.  We could add them, but coercion holes are easier.
+    But the coercion for (t1~t2) may mention 'a', and we don't have
+    let-bindings within coercions.  We could add them, but coercion
+    holes are easier.
+
+  - Moreover, nothing is lost from the lack of let-bindings. For
+    dicionaries want to achieve sharing to avoid recomoputing the
+    dictionary.  But coercions are entirely erased, so there's little
+    benefit to sharing. Indeed, even if we had a let-binding, we
+    always inline types and coercions at every use site and drop the
+    binding.
 
 Other notes about HoleCo:
 
@@ -1289,14 +1300,26 @@ Other notes about HoleCo:
    type-checking vs forms that can appear in core proper, holes in Core will
    be ruled out.
 
- * The Unique carried with a coercion hole is used solely for debugging.
+ * See Note [CoercionHoles and coercion free variables]
+
+ * Coercion holes can be compared for equality like other coercions:
+   by looking at the types coerced.
+
+
+Note [CoercionHoles and coercion free variables]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Why does a CoercionHole contain a CoVar, as well as reference to
+fill in?  Because we want to treat that CoVar as a free variable of
+the coercion.  See Trac #14584, and Note [What prevents a
+constraint from floating] in TcSimplify, item (4):
+
+        forall k. [W] co1 :: t1 ~# t2 |> co2
+                  [W] co2 :: k ~# *
 
- * Coercion holes can be compared for equality only like other coercions:
-   only by looking at the types coerced.
+Here co2 is a CoercionHole. But we /must/ know that it is free in
+co1, because that's all that stops it floating outside the
+implication.
 
- * We don't use holes for other evidence because other evidence wants to
-   be /shared/. But coercions are entirely erased, so there's little
-   benefit to sharing.
 
 Note [ProofIrrelProv]
 ~~~~~~~~~~~~~~~~~~~~~
@@ -1461,6 +1484,7 @@ tyCoFVsOfCo (CoVarCo v) fv_cand in_scope acc
   = tyCoFVsOfCoVar v fv_cand in_scope acc
 tyCoFVsOfCo (HoleCo h) fv_cand in_scope acc
   = tyCoFVsOfCoVar (coHoleCoVar h) fv_cand in_scope acc
+    -- See Note [CoercionHoles and coercion free variables]
 tyCoFVsOfCo (AxiomInstCo _ _ cos) fv_cand in_scope acc = tyCoFVsOfCos cos fv_cand in_scope acc
 tyCoFVsOfCo (UnivCo p _ t1 t2) fv_cand in_scope acc
   = (tyCoFVsOfProv p `unionFV` tyCoFVsOfType t1
@@ -1525,6 +1549,7 @@ coVarsOfCo (ForAllCo tv kind_co co)
 coVarsOfCo (FunCo _ co1 co2)    = coVarsOfCo co1 `unionVarSet` coVarsOfCo co2
 coVarsOfCo (CoVarCo v)          = coVarsOfCoVar v
 coVarsOfCo (HoleCo h)           = coVarsOfCoVar (coHoleCoVar h)
+                                  -- See Note [CoercionHoles and coercion free variables]
 coVarsOfCo (AxiomInstCo _ _ as) = coVarsOfCos as
 coVarsOfCo (UnivCo p _ t1 t2)   = coVarsOfProv p `unionVarSet` coVarsOfTypes [t1, t2]
 coVarsOfCo (SymCo co)           = coVarsOfCo co
diff --git a/compiler/utils/StringBuffer.hs b/compiler/utils/StringBuffer.hs
index 39941e2f7a..a5fc4e7f12 100644
--- a/compiler/utils/StringBuffer.hs
+++ b/compiler/utils/StringBuffer.hs
@@ -323,5 +323,6 @@ parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
   = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
     go i x | i == len  = x
            | otherwise = case fst (utf8DecodeChar (ptr `plusPtr` (cur + i))) of
+               '_'  -> go (i + 1) x    -- skip "_" (#14473)
                char -> go (i + 1) (x * radix + toInteger (char_to_int char))
   in go 0 0
diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs
index 7a46db7665..a4520ed679 100644
--- a/compiler/utils/Util.hs
+++ b/compiler/utils/Util.hs
@@ -1142,12 +1142,18 @@ readRational__ r = do
 
      lexDecDigits = nonnull isDigit
 
-     lexDotDigits ('.':s) = return (span isDigit s)
+     lexDotDigits ('.':s) = return (span' isDigit s)
      lexDotDigits s       = return ("",s)
 
-     nonnull p s = do (cs@(_:_),t) <- return (span p s)
+     nonnull p s = do (cs@(_:_),t) <- return (span' p s)
                       return (cs,t)
 
+     span' _ xs@[]         =  (xs, xs)
+     span' p xs@(x:xs')
+               | x == '_'  = span' p xs'   -- skip "_" (#14473)
+               | p x       =  let (ys,zs) = span' p xs' in (x:ys,zs)
+               | otherwise =  ([],xs)
+
 readRational :: String -> Rational -- NB: *does* handle a leading "-"
 readRational top_s
   = case top_s of
@@ -1176,12 +1182,12 @@ readHexRational str =
 readHexRational__ :: String -> Maybe Rational
 readHexRational__ ('0' : x : rest)
   | x == 'X' || x == 'x' =
-  do let (front,rest2) = span isHexDigit rest
+  do let (front,rest2) = span' isHexDigit rest
      guard (not (null front))
      let frontNum = steps 16 0 front
      case rest2 of
        '.' : rest3 ->
-          do let (back,rest4) = span isHexDigit rest3
+          do let (back,rest4) = span' isHexDigit rest3
              guard (not (null back))
              let backNum = steps 16 frontNum back
                  exp1    = -4 * length back
@@ -1201,13 +1207,18 @@ readHexRational__ ('0' : x : rest)
   mk :: Integer -> Int -> Rational
   mk n e = fromInteger n * 2^^e
 
-  dec cs = case span isDigit cs of
+  dec cs = case span' isDigit cs of
              (ds,"") | not (null ds) -> Just (steps 10 0 ds)
              _ -> Nothing
 
   steps base n ds = foldl' (step base) n ds
   step  base n d  = base * n + fromIntegral (digitToInt d)
 
+  span' _ xs@[]         =  (xs, xs)
+  span' p xs@(x:xs')
+            | x == '_'  = span' p xs'   -- skip "_"  (#14473)
+            | p x       =  let (ys,zs) = span' p xs' in (x:ys,zs)
+            | otherwise =  ([],xs)
 
 readHexRational__ _ = Nothing
 
diff --git a/docs/users_guide/glasgow_exts.rst b/docs/users_guide/glasgow_exts.rst
index 34efbfd3e2..7d79222bfa 100644
--- a/docs/users_guide/glasgow_exts.rst
+++ b/docs/users_guide/glasgow_exts.rst
@@ -553,6 +553,93 @@ by one bit left (negative) or right (positive).  Here are some examples:
 
 
 
+.. _numeric-underscores:
+
+Numeric underscores
+-------------------
+
+.. ghc-flag:: -XNumericUnderscores
+    :shortdesc: Enable support for :ref:`numeric underscores <numeric-underscores>`.
+    :type: dynamic
+    :reverse: -XNoNumericUnderscores
+    :category:
+
+    :since: 8.6.1
+
+    Allow the use of underscores in numeric literals.
+
+GHC allows for numeric literals to be given in decimal, octal, hexadecimal,
+binary, or float notation.
+
+The language extension :ghc-flag:`-XNumericUnderscores` adds support for expressing
+underscores in numeric literals.
+For instance, the numeric literal ``1_000_000`` will be parsed into
+``1000000`` when :ghc-flag:`-XNumericUnderscores` is enabled.
+That is, underscores in numeric literals are ignored when
+:ghc-flag:`-XNumericUnderscores` is enabled.
+See also :ghc-ticket:`14473`.
+
+For example: ::
+
+    -- decimal
+    million    = 1_000_000
+    billion    = 1_000_000_000
+    lightspeed = 299_792_458
+    version    = 8_04_1
+    date       = 2017_12_31
+
+    -- hexadecimal
+    red_mask = 0xff_00_00
+    size1G   = 0x3fff_ffff
+
+    -- binary
+    bit8th   = 0b01_0000_0000
+    packbits = 0b1_11_01_0000_0_111
+    bigbits  = 0b1100_1011__1110_1111__0101_0011
+
+    -- float
+    pi       = 3.141_592_653_589_793
+    faraday  = 96_485.332_89
+    avogadro = 6.022_140_857e+23
+
+    -- function
+    isUnderMillion = (< 1_000_000)
+
+    clip64M x
+        | x > 0x3ff_ffff = 0x3ff_ffff
+        | otherwise = x
+
+    test8bit x = (0b01_0000_0000 .&. x) /= 0
+
+About validity: ::
+
+    x0 = 1_000_000   -- valid
+    x1 = 1__000000   -- valid
+    x2 = 1000000_    -- invalid
+    x3 = _1000000    -- invalid
+
+    e0 = 0.0001      -- valid
+    e1 = 0.000_1     -- valid
+    e2 = 0_.0001     -- invalid
+    e3 = _0.0001     -- invalid
+    e4 = 0._0001     -- invalid
+    e5 = 0.0001_     -- invalid
+
+    f0 = 1e+23       -- valid
+    f1 = 1_e+23      -- valid
+    f2 = 1__e+23     -- valid
+    f3 = 1e_+23      -- invalid
+
+    g0 = 1e+23       -- valid
+    g1 = 1e+_23      -- invalid
+    g2 = 1e+23_      -- invalid
+
+    h0 = 0xffff      -- valid
+    h1 = 0xff_ff     -- valid
+    h2 = 0x_ffff     -- valid
+    h3 = 0x__ffff    -- valid
+    h4 = _0xffff     -- invalid
+
 .. _pattern-guards:
 
 Pattern guards
@@ -14958,28 +15045,67 @@ HasCallStack
 ``GHC.Stack.HasCallStack`` is a lightweight method of obtaining a
 partial call-stack at any point in the program.
 
-A function can request its call-site with the ``HasCallStack`` constraint.
-For example, we can define ::
+A function can request its call-site with the ``HasCallStack`` constraint
+and access it as a Haskell value by using ``callStack``.
+
+One can then use functions from ``GHC.Stack`` to inspect or pretty
+print (as is done in ``f`` below) the call stack.
+
+   f :: HasCallStack => IO ()
+   f = putStrLn (prettyCallStack callStack)
+
+   g :: HasCallStack => IO ()
+   g = f
+
+Evaluating ``f`` directly shows a call stack with a single entry,
+while evaluating ``g``, which also requests its call-site, shows
+two entries, one for each computation "annotated" with
+``HasCallStack``.
+
+.. code-block:: none
+
+   ghci> f
+   CallStack (from HasCallStack):
+     f, called at <interactive>:19:1 in interactive:Ghci1
+   ghci> g
+   CallStack (from HasCallStack):
+     f, called at <interactive>:17:5 in main:Main
+     g, called at <interactive>:20:1 in interactive:Ghci2
+
+The ``error`` function from the Prelude supports printing the call stack that
+led to the error in addition to the usual error message:
+
+.. code-block:: none
+
+   ghci> error "bad"
+   *** Exception: bad
+   CallStack (from HasCallStack):
+     error, called at <interactive>:25:1 in interactive:Ghci5
 
-   errorWithCallStack :: HasCallStack => String -> a
+The call stack here consists of a single entry, pinpointing the source
+of the call to ``error``. However, by annotating several computations
+with ``HasCallStack``, figuring out the exact circumstances and sequences
+of calls that lead to a call to ``error`` becomes a lot easier, as demonstrated
+with the simple example below. ::
 
-as a variant of ``error`` that will get its call-site (as of GHC 8.0,
-``error`` already gets its call-site, but let's assume for the sake of
-demonstration that it does not). We can access the call-stack inside
-``errorWithCallStack`` with ``GHC.Stack.callStack``. ::
+   f :: HasCallStack => IO ()
+   f = error "bad bad bad"
 
-   errorWithCallStack :: HasCallStack => String -> a
-   errorWithCallStack msg = error (msg ++ "\n" ++ prettyCallStack callStack)
+   g :: HasCallStack => IO ()
+   g = f
 
-Thus, if we call ``errorWithCallStack`` we will get a formatted call-stack
-alongside our error message.
+   h :: HasCallStack => IO ()
+   h = g
 
 .. code-block:: none
 
-   ghci> errorWithCallStack "die"
-   *** Exception: die
+   ghci> h
+   *** Exception: bad bad bad
    CallStack (from HasCallStack):
-     errorWithCallStack, called at <interactive>:2:1 in interactive:Ghci1
+     error, called at call-stack.hs:4:5 in main:Main
+     f, called at call-stack.hs:7:5 in main:Main
+     g, called at call-stack.hs:10:5 in main:Main
+     h, called at <interactive>:28:1 in interactive:Ghci1
 
 The ``CallStack`` will only extend as far as the types allow it, for
 example ::
diff --git a/libraries/Cabal b/libraries/Cabal
-Subproject 3f20e1faf9bc86ecb154ccf3e8b913bff14b926
+Subproject d2cf3f13dad9f1172cf9371ea197d1204e9e17a
diff --git a/libraries/base/GHC/Stack/Types.hs b/libraries/base/GHC/Stack/Types.hs
index d9e755239c..b5858f2fa0 100644
--- a/libraries/base/GHC/Stack/Types.hs
+++ b/libraries/base/GHC/Stack/Types.hs
@@ -75,25 +75,28 @@ type HasCallStack = (?callStack :: CallStack)
 -- For example, we can define
 --
 -- @
--- errorWithCallStack :: HasCallStack => String -> a
+-- putStrLnWithCallStack :: HasCallStack => String -> IO ()
 -- @
 --
--- as a variant of @error@ that will get its call-site. We can access the
--- call-stack inside @errorWithCallStack@ with 'GHC.Stack.callStack'.
+-- as a variant of @putStrLn@ that will get its call-site and print it,
+-- along with the string given as argument. We can access the
+-- call-stack inside @putStrLnWithCallStack@ with 'GHC.Stack.callStack'.
 --
 -- @
--- errorWithCallStack :: HasCallStack => String -> a
--- errorWithCallStack msg = error (msg ++ "\\n" ++ prettyCallStack callStack)
+-- putStrLnWithCallStack :: HasCallStack => String -> IO ()
+-- putStrLnWithCallStack msg = do
+--   putStrLn msg
+--   putStrLn (prettyCallStack callStack)
 -- @
 --
--- Thus, if we call @errorWithCallStack@ we will get a formatted call-stack
--- alongside our error message.
+-- Thus, if we call @putStrLnWithCallStack@ we will get a formatted call-stack
+-- alongside our string.
 --
 --
--- >>> errorWithCallStack "die"
--- *** Exception: die
+-- >>> putStrLnWithCallStack "hello"
+-- hello
 -- CallStack (from HasCallStack):
---   errorWithCallStack, called at <interactive>:2:1 in interactive:Ghci1
+--   putStrLnWithCallStack, called at <interactive>:2:1 in interactive:Ghci1
 --
 --
 -- GHC solves 'HasCallStack' constraints in three steps:
diff --git a/libraries/base/base.cabal b/libraries/base/base.cabal
index 4252cdc025..dc529a8686 100644
--- a/libraries/base/base.cabal
+++ b/libraries/base/base.cabal
@@ -3,7 +3,7 @@ name:           base
 version:        4.11.0.0
 -- NOTE: Don't forget to update ./changelog.md
 
-license:        BSD3
+license:        BSD-3-Clause
 license-file:   LICENSE
 maintainer:     libraries@haskell.org
 bug-reports:    http://ghc.haskell.org/trac/ghc/newticket?component=libraries/base
diff --git a/libraries/ghc-boot-th/GHC/LanguageExtensions/Type.hs b/libraries/ghc-boot-th/GHC/LanguageExtensions/Type.hs
index 3e8c2a0e15..2b06c851a8 100644
--- a/libraries/ghc-boot-th/GHC/LanguageExtensions/Type.hs
+++ b/libraries/ghc-boot-th/GHC/LanguageExtensions/Type.hs
@@ -133,4 +133,5 @@ data Extension
    | StrictData
    | MonadFailDesugaring
    | EmptyDataDeriving
+   | NumericUnderscores
    deriving (Eq, Enum, Show, Generic)
diff --git a/libraries/ghc-prim/cbits/pdep.c b/libraries/ghc-prim/cbits/pdep.c
new file mode 100644
index 0000000000..8435ffe186
--- /dev/null
+++ b/libraries/ghc-prim/cbits/pdep.c
@@ -0,0 +1,48 @@
+#include "Rts.h"
+#include "MachDeps.h"
+
+extern StgWord64 hs_pdep64(StgWord64 src, StgWord64 mask);
+
+StgWord64
+hs_pdep64(StgWord64 src, StgWord64 mask)
+{
+  uint64_t result = 0;
+
+  while (1) {
+    // Mask out all but the lowest bit
+    const uint64_t lowest = (-mask & mask);
+
+    if (lowest == 0) {
+      break;
+    }
+
+    const uint64_t lsb = (uint64_t)((int64_t)(src << 63) >> 63);
+
+    result |= lsb & lowest;
+    mask &= ~lowest;
+    src >>= 1;
+  }
+
+  return result;
+}
+
+extern StgWord hs_pdep32(StgWord src, StgWord mask);
+StgWord
+hs_pdep32(StgWord src, StgWord mask)
+{
+  return hs_pdep64(src, mask);
+}
+
+extern StgWord hs_pdep16(StgWord src, StgWord mask);
+StgWord
+hs_pdep16(StgWord src, StgWord mask)
+{
+  return hs_pdep64(src, mask);
+}
+
+extern StgWord hs_pdep8(StgWord src, StgWord mask);
+StgWord
+hs_pdep8(StgWord src, StgWord mask)
+{
+  return hs_pdep64(src, mask);
+}
diff --git a/libraries/ghc-prim/cbits/pext.c b/libraries/ghc-prim/cbits/pext.c
new file mode 100644
index 0000000000..fe960b1342
--- /dev/null
+++ b/libraries/ghc-prim/cbits/pext.c
@@ -0,0 +1,44 @@
+#include "Rts.h"
+#include "MachDeps.h"
+
+extern StgWord64 hs_pext64(StgWord64 src, StgWord64 mask);
+
+StgWord64
+hs_pext64(StgWord64 src, StgWord64 mask)
+{
+  uint64_t result = 0;
+  int offset = 0;
+
+  for (int bit = 0; bit != sizeof(uint64_t) * 8; ++bit) {
+    const uint64_t src_bit = (src >> bit) & 1;
+    const uint64_t mask_bit = (mask >> bit) & 1;
+
+    if (mask_bit) {
+      result |= (uint64_t)(src_bit) << offset;
+      ++offset;
+    }
+  }
+
+  return result;
+}
+
+extern StgWord hs_pext32(StgWord src, StgWord mask);
+StgWord
+hs_pext32(StgWord src, StgWord mask)
+{
+  return hs_pext64(src, mask);
+}
+
+extern StgWord hs_pext16(StgWord src, StgWord mask);
+StgWord
+hs_pext16(StgWord src, StgWord mask)
+{
+  return hs_pext64(src, mask);
+}
+
+extern StgWord hs_pext8(StgWord src, StgWord mask);
+StgWord
+hs_pext8(StgWord src, StgWord mask)
+{
+  return hs_pext64(src, mask);
+}
diff --git a/libraries/ghc-prim/ghc-prim.cabal b/libraries/ghc-prim/ghc-prim.cabal
index e99686a10b..f395c9faac 100644
--- a/libraries/ghc-prim/ghc-prim.cabal
+++ b/libraries/ghc-prim/ghc-prim.cabal
@@ -2,7 +2,7 @@ cabal-version:  2.1
 name:           ghc-prim
 version:        0.5.2.0
 -- NOTE: Don't forget to update ./changelog.md
-license:        BSD3
+license:        BSD-3-Clause
 license-file:   LICENSE
 category:       GHC
 maintainer:     libraries@haskell.org
@@ -73,6 +73,8 @@ Library
         cbits/ctz.c
         cbits/debug.c
         cbits/longlong.c
+        cbits/pdep.c
+        cbits/pext.c
         cbits/popcnt.c
         cbits/word2float.c
 
diff --git a/libraries/transformers b/libraries/transformers
-Subproject 36311d39bc545261dab85d4a27af562db1868ed
+Subproject 33b3c8a71778ae37040088dfe022c648373777a
diff --git a/rts/Stats.c b/rts/Stats.c
index fa85878621..26bdac0ea5 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -16,7 +16,7 @@
 #include "Profiling.h"
 #include "GetTime.h"
 #include "sm/Storage.h"
-#include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
+#include "sm/GC.h" // gc_alloc_block_sync, whitehole_gc_spin
 #include "sm/GCThread.h"
 #include "sm/BlockAlloc.h"
 
@@ -769,7 +769,8 @@ stat_exit (void)
                 uint32_t g;
 
                 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
-                statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
+                statsPrintf("whitehole_gc_spin: %"FMT_Word64"\n"
+                            , whitehole_gc_spin);
                 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
                     statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
                 }
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 526f063336..738e3e460c 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -28,10 +28,6 @@
 #include "CNF.h"
 #include "Scav.h"
 
-#if defined(PROF_SPIN) && defined(THREADED_RTS) && defined(PARALLEL_GC)
-StgWord64 whitehole_spin = 0;
-#endif
-
 #if defined(THREADED_RTS) && !defined(PARALLEL_GC)
 #define evacuate(p) evacuate1(p)
 #define evacuate_BLACKHOLE(p) evacuate_BLACKHOLE1(p)
@@ -197,8 +193,9 @@ spin:
         info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
         if (info == (W_)&stg_WHITEHOLE_info) {
 #if defined(PROF_SPIN)
-            whitehole_spin++;
+            whitehole_gc_spin++;
 #endif
+            busy_wait_nop();
             goto spin;
         }
     if (IS_FORWARDING_PTR(info)) {
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 4dbc5e0aa6..c5ab7a8161 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -133,6 +133,10 @@ uint32_t n_gc_threads;
 // For stats:
 static long copied;        // *words* copied & scavenged during this GC
 
+#if defined(PROF_SPIN) && defined(THREADED_RTS)
+volatile StgWord64 whitehole_gc_spin = 0;
+#endif
+
 bool work_stealing;
 
 uint32_t static_flag = STATIC_FLAG_B;
diff --git a/rts/sm/GC.h b/rts/sm/GC.h
index c6b0c13a46..78f054931a 100644
--- a/rts/sm/GC.h
+++ b/rts/sm/GC.h
@@ -46,7 +46,7 @@ extern uint32_t mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS,
 #endif
 
 #if defined(PROF_SPIN) && defined(THREADED_RTS)
-extern StgWord64 whitehole_spin;
+extern volatile StgWord64 whitehole_gc_spin;
 #endif
 
 void gcWorkerThread (Capability *cap);
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index e801c340f2..c4dbdc26ca 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -197,11 +197,7 @@ initStorage (void)
 
 #if defined(THREADED_RTS)
   initSpinLock(&gc_alloc_block_sync);
-#if defined(PROF_SPIN)
-  whitehole_spin = 0;
 #endif
-#endif
-
   N = 0;
 
   for (n = 0; n < n_numa_nodes; n++) {
diff --git a/testsuite/tests/backpack/cabal/bkpcabal06/bkpcabal06.cabal b/testsuite/tests/backpack/cabal/bkpcabal06/bkpcabal06.cabal
index 7dfac20986..ff322a4e02 100644
--- a/testsuite/tests/backpack/cabal/bkpcabal06/bkpcabal06.cabal
+++ b/testsuite/tests/backpack/cabal/bkpcabal06/bkpcabal06.cabal
@@ -4,7 +4,7 @@ license:             BSD3
 author:              Edward Z. Yang
 maintainer:          ezyang@cs.stanford.edu
 build-type:          Simple
-cabal-version:       >=1.10
+cabal-version:       2.0
 
 library sig
   signatures: P
diff --git a/testsuite/tests/cabal/ghcpkg01.stdout b/testsuite/tests/cabal/ghcpkg01.stdout
index ef77814b4c..4523e2807b 100644
--- a/testsuite/tests/cabal/ghcpkg01.stdout
+++ b/testsuite/tests/cabal/ghcpkg01.stdout
@@ -8,23 +8,19 @@ key: testpkg-1.2.3.4-XXX
 license: BSD3
 copyright: (c) The Univsersity of Glasgow 2004
 maintainer: glasgow-haskell-users@haskell.org
+author: simonmar@microsoft.com
 stability: stable
 homepage: http://www.haskell.org/ghc
 package-url: http://www.haskell.org/ghc
 description:
     A Test Package
 category: none
-author: simonmar@microsoft.com
 exposed: True
-indefinite: False
 exposed-modules:
     A
 hidden-modules: B C.D
-abi: 
-trusted: False
 import-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
 library-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
-data-dir: ""
 hs-libraries: testpkg-1.2.3.4-XXX
 include-dirs: /usr/local/include/testpkg "c:/Program Files/testpkg"
 pkgroot: 
@@ -36,23 +32,19 @@ key: testpkg-1.2.3.4-XXX
 license: BSD3
 copyright: (c) The Univsersity of Glasgow 2004
 maintainer: glasgow-haskell-users@haskell.org
+author: simonmar@microsoft.com
 stability: stable
 homepage: http://www.haskell.org/ghc
 package-url: http://www.haskell.org/ghc
 description:
     A Test Package
 category: none
-author: simonmar@microsoft.com
 exposed: True
-indefinite: False
 exposed-modules:
     A
 hidden-modules: B C.D
-abi: 
-trusted: False
 import-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
 library-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
-data-dir: ""
 hs-libraries: testpkg-1.2.3.4-XXX
 include-dirs: /usr/local/include/testpkg "c:/Program Files/testpkg"
 pkgroot: 
@@ -70,23 +62,18 @@ key: testpkg-2.0-XXX
 license: BSD3
 copyright: (c) The Univsersity of Glasgow 2004
 maintainer: glasgow-haskell-users@haskell.org
+author: simonmar@microsoft.com
 stability: unstable
 homepage: http://www.haskell.org/ghc
 package-url: http://www.haskell.org/ghc
 description:
     A Test Package (new version)
 category: none
-author: simonmar@microsoft.com
-exposed: False
-indefinite: False
 exposed-modules:
     A
 hidden-modules: B C.D C.E
-abi: 
-trusted: False
 import-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
 library-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
-data-dir: ""
 hs-libraries: testpkg-2.0-XXX
 include-dirs: /usr/local/include/testpkg "c:/Program Files/testpkg"
 pkgroot: 
@@ -98,23 +85,18 @@ key: testpkg-2.0-XXX
 license: BSD3
 copyright: (c) The Univsersity of Glasgow 2004
 maintainer: glasgow-haskell-users@haskell.org
+author: simonmar@microsoft.com
 stability: unstable
 homepage: http://www.haskell.org/ghc
 package-url: http://www.haskell.org/ghc
 description:
     A Test Package (new version)
 category: none
-author: simonmar@microsoft.com
-exposed: False
-indefinite: False
 exposed-modules:
     A
 hidden-modules: B C.D C.E
-abi: 
-trusted: False
 import-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
 library-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
-data-dir: ""
 hs-libraries: testpkg-2.0-XXX
 include-dirs: /usr/local/include/testpkg "c:/Program Files/testpkg"
 pkgroot: 
@@ -126,23 +108,19 @@ key: testpkg-1.2.3.4-XXX
 license: BSD3
 copyright: (c) The Univsersity of Glasgow 2004
 maintainer: glasgow-haskell-users@haskell.org
+author: simonmar@microsoft.com
 stability: stable
 homepage: http://www.haskell.org/ghc
 package-url: http://www.haskell.org/ghc
 description:
     A Test Package
 category: none
-author: simonmar@microsoft.com
 exposed: True
-indefinite: False
 exposed-modules:
     A
 hidden-modules: B C.D
-abi: 
-trusted: False
 import-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
 library-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
-data-dir: ""
 hs-libraries: testpkg-1.2.3.4-XXX
 include-dirs: /usr/local/include/testpkg "c:/Program Files/testpkg"
 pkgroot: 
@@ -161,23 +139,18 @@ key: testpkg-1.2.3.4-XXX
 license: BSD3
 copyright: (c) The Univsersity of Glasgow 2004
 maintainer: glasgow-haskell-users@haskell.org
+author: simonmar@microsoft.com
 stability: stable
 homepage: http://www.haskell.org/ghc
 package-url: http://www.haskell.org/ghc
 description:
     A Test Package
 category: none
-author: simonmar@microsoft.com
-exposed: False
-indefinite: False
 exposed-modules:
     A
 hidden-modules: B C.D
-abi: 
-trusted: False
 import-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
 library-dirs: /usr/local/lib/testpkg "c:/Program Files/testpkg"
-data-dir: ""
 hs-libraries: testpkg-1.2.3.4-XXX
 include-dirs: /usr/local/include/testpkg "c:/Program Files/testpkg"
 pkgroot: 
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 214a9d5704..42d8a2f767 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -77,6 +77,8 @@ test('cgrun069', omit_ways(['ghci']), multi_compile_and_run,
 test('cgrun070', normal, compile_and_run, [''])
 test('cgrun071', normal, compile_and_run, [''])
 test('cgrun072', normal, compile_and_run, [''])
+test('cgrun075', normal, compile_and_run, [''])
+test('cgrun076', normal, compile_and_run, [''])
 
 test('T1852', normal, compile_and_run, [''])
 test('T1861', extra_run_opts('0'), compile_and_run, [''])
diff --git a/testsuite/tests/codeGen/should_run/cgrun075.hs b/testsuite/tests/codeGen/should_run/cgrun075.hs
new file mode 100644
index 0000000000..09e35b4d8a
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun075.hs
@@ -0,0 +1,115 @@
+{-# LANGUAGE BangPatterns, CPP, MagicHash #-}
+
+module Main ( main ) where
+
+import Data.Bits
+import GHC.Int
+import GHC.Prim
+import GHC.Word
+import Data.Int
+import Data.Word
+
+#include "MachDeps.h"
+
+main = putStr
+        (   test_pdep   ++ "\n"
+        ++  test_pdep8  ++ "\n"
+        ++  test_pdep16 ++ "\n"
+        ++  test_pdep32 ++ "\n"
+        ++  test_pdep64 ++ "\n"
+        ++  "\n"
+        )
+
+class Pdep a where
+  pdep :: a -> a -> a
+
+instance Pdep Word where
+  pdep (W#   src#) (W#   mask#) = W#   (pdep#   src# mask#)
+
+instance Pdep Word8 where
+  pdep (W8#  src#) (W8#  mask#) = W8#  (pdep8#  src# mask#)
+
+instance Pdep Word16 where
+  pdep (W16# src#) (W16# mask#) = W16# (pdep16# src# mask#)
+
+instance Pdep Word32 where
+  pdep (W32# src#) (W32# mask#) = W32# (pdep32# src# mask#)
+
+instance Pdep Word64 where
+  pdep (W64# src#) (W64# mask#) = W64# (pdep64# src# mask#)
+
+class SlowPdep a where
+  slowPdep :: a -> a -> a
+
+instance SlowPdep Word where
+  slowPdep s m = fromIntegral (slowPdep64 (fromIntegral s) (fromIntegral m))
+
+instance SlowPdep Word8 where
+  slowPdep s m = fromIntegral (slowPdep64 (fromIntegral s) (fromIntegral m))
+
+instance SlowPdep Word16 where
+  slowPdep s m = fromIntegral (slowPdep64 (fromIntegral s) (fromIntegral m))
+
+instance SlowPdep Word32 where
+  slowPdep s m = fromIntegral (slowPdep64 (fromIntegral s) (fromIntegral m))
+
+instance SlowPdep Word64 where
+  slowPdep s m = fromIntegral (slowPdep64 (fromIntegral s) (fromIntegral m))
+
+slowPdep64 :: Word64 -> Word64 -> Word64
+slowPdep64 = slowPdep64' 0
+
+slowPdep32 :: Word32 -> Word32 -> Word32
+slowPdep32 s m = fromIntegral (slowPdep64 (fromIntegral s) (fromIntegral m))
+
+lsb :: Word64 -> Word64
+lsb src = fromIntegral ((fromIntegral (src `shiftL` 63) :: Int64) `shiftR` 63)
+
+slowPdep64' :: Word64 -> Word64 -> Word64 -> Word64
+slowPdep64' result src mask = if lowest /= 0
+  then slowPdep64' newResult (src `shiftR` 1) (mask .&. complement lowest)
+  else result
+  where lowest    = (-mask) .&. mask
+        newResult = (result .|. ((lsb src) .&. lowest))
+
+test_pdep   = test (0 :: Word  ) pdep slowPdep
+test_pdep8  = test (0 :: Word8 ) pdep slowPdep
+test_pdep16 = test (0 :: Word16) pdep slowPdep
+test_pdep32 = test (0 :: Word32) pdep slowPdep
+test_pdep64 = test (0 :: Word64) pdep slowPdep
+
+mask n = (2 ^ n) - 1
+
+fst4 :: (a, b, c, d) -> a
+fst4 (a, _, _, _) = a
+
+runCase :: Eq a
+        => (a -> a -> a)
+        -> (a -> a -> a)
+        -> (a, a)
+        -> (Bool, a, a, (a, a))
+runCase fast slow (x, y) = (slow x y == fast x y, slow x y, fast x y, (x, y))
+
+test :: (Show a, Num a, Eq a) => a -> (a -> a -> a) -> (a -> a -> a) -> String
+test _ fast slow = case failing of
+    [] -> "OK"
+    ((_, e, a, i):xs) ->
+        "FAIL\n" ++ "   Input: " ++ show i ++ "\nExpected: " ++ show e ++
+        "\n  Actual: " ++ show a
+  where failing = dropWhile fst4 . map (runCase fast slow) $ cases
+        cases   = (,) <$> numbers <*> numbers
+        -- 10 random numbers
+#if SIZEOF_HSWORD == 4
+        numbers = [ 1480294021, 1626858410, 2316287658, 1246556957, 3806579062
+                  , 65945563  , 1521588071, 791321966 , 1355466914, 2284998160
+                  ]
+#elif SIZEOF_HSWORD == 8
+        numbers = [ 11004539497957619752, 5625461252166958202
+                  , 1799960778872209546 , 16979826074020750638
+                  , 12789915432197771481, 11680809699809094550
+                  , 13208678822802632247, 13794454868797172383
+                  , 13364728999716654549, 17516539991479925226
+                  ]
+#else
+# error Unexpected word size
+#endif
diff --git a/testsuite/tests/codeGen/should_run/cgrun075.stdout b/testsuite/tests/codeGen/should_run/cgrun075.stdout
new file mode 100644
index 0000000000..e22e2cd950
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun075.stdout
@@ -0,0 +1,6 @@
+OK
+OK
+OK
+OK
+OK
+
diff --git a/testsuite/tests/codeGen/should_run/cgrun076.hs b/testsuite/tests/codeGen/should_run/cgrun076.hs
new file mode 100644
index 0000000000..7fa42d74e0
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun076.hs
@@ -0,0 +1,115 @@
+{-# LANGUAGE BangPatterns, CPP, MagicHash #-}
+
+module Main ( main ) where
+
+import Data.Bits
+import GHC.Int
+import GHC.Prim
+import GHC.Word
+import Data.Int
+import Data.Word
+
+#include "MachDeps.h"
+
+main = putStr
+        (   test_pext   ++ "\n"
+        ++  test_pext8  ++ "\n"
+        ++  test_pext16 ++ "\n"
+        ++  test_pext32 ++ "\n"
+        ++  test_pext64 ++ "\n"
+        ++  "\n"
+        )
+
+class Pext a where
+  pext :: a -> a -> a
+
+instance Pext Word where
+  pext (W#   src#) (W#   mask#) = W#   (pext#   src# mask#)
+
+instance Pext Word8 where
+  pext (W8#  src#) (W8#  mask#) = W8#  (pext8#  src# mask#)
+
+instance Pext Word16 where
+  pext (W16# src#) (W16# mask#) = W16# (pext16# src# mask#)
+
+instance Pext Word32 where
+  pext (W32# src#) (W32# mask#) = W32# (pext32# src# mask#)
+
+instance Pext Word64 where
+  pext (W64# src#) (W64# mask#) = W64# (pext64# src# mask#)
+
+class SlowPext a where
+  slowPext :: a -> a -> a
+
+instance SlowPext Word where
+  slowPext s m = fromIntegral (slowPext64 (fromIntegral s) (fromIntegral m))
+
+instance SlowPext Word8 where
+  slowPext s m = fromIntegral (slowPext64 (fromIntegral s) (fromIntegral m))
+
+instance SlowPext Word16 where
+  slowPext s m = fromIntegral (slowPext64 (fromIntegral s) (fromIntegral m))
+
+instance SlowPext Word32 where
+  slowPext s m = fromIntegral (slowPext64 (fromIntegral s) (fromIntegral m))
+
+instance SlowPext Word64 where
+  slowPext s m = fromIntegral (slowPext64 (fromIntegral s) (fromIntegral m))
+
+slowPext64 :: Word64 -> Word64 -> Word64
+slowPext64 = slowPext64' 0 0 0
+
+slowPext32 :: Word32 -> Word32 -> Word32
+slowPext32 s m = fromIntegral (slowPext64 (fromIntegral s) (fromIntegral m))
+
+slowPext64' :: Word64 -> Int -> Int -> Word64 -> Word64 -> Word64
+slowPext64' result offset index src mask = if index /= 64
+  then if maskBit /= 0
+          then slowPext64' nextResult (offset + 1) (index + 1) src mask
+          else slowPext64' result      offset      (index + 1) src mask
+  else result
+  where srcBit      = (src  `shiftR` index) .&. 1
+        maskBit     = (mask `shiftR` index) .&. 1
+        nextResult  = result .|. (srcBit `shiftL` offset)
+
+test_pext   = test (0 :: Word  ) pext slowPext
+test_pext8  = test (0 :: Word8 ) pext slowPext
+test_pext16 = test (0 :: Word16) pext slowPext
+test_pext32 = test (0 :: Word32) pext slowPext
+test_pext64 = test (0 :: Word64) pext slowPext
+
+mask n = (2 ^ n) - 1
+
+fst4 :: (a, b, c, d) -> a
+fst4 (a, _, _, _) = a
+
+runCase :: Eq a
+        => (a -> a -> a)
+        -> (a -> a -> a)
+        -> (a, a)
+        -> (Bool, a, a, (a, a))
+runCase fast slow (x, y) = (slow x y == fast x y, slow x y, fast x y, (x, y))
+
+test :: (Show a, Num a, Eq a) => a -> (a -> a -> a) -> (a -> a -> a) -> String
+test _ fast slow = case failing of
+    [] -> "OK"
+    ((_, e, a, i):xs) ->
+        "FAIL\n" ++ "   Input: " ++ show i ++ "\nExpected: " ++ show e ++
+        "\n  Actual: " ++ show a
+  where failing = dropWhile fst4 . map (runCase fast slow) $ cases
+        cases   = (,) <$> numbers <*> numbers
+        -- 10 random numbers
+#if SIZEOF_HSWORD == 4
+        numbers = [ 1480294021, 1626858410, 2316287658, 1246556957, 3806579062
+                  , 65945563  , 1521588071, 791321966 , 1355466914, 2284998160
+                  ]
+#elif SIZEOF_HSWORD == 8
+        numbers = [ 11004539497957619752, 5625461252166958202
+                  , 1799960778872209546 , 16979826074020750638
+                  , 12789915432197771481, 11680809699809094550
+                  , 13208678822802632247, 13794454868797172383
+                  , 13364728999716654549, 17516539991479925226
+                  ]
+#else
+# error Unexpected word size
+#endif
diff --git a/testsuite/tests/codeGen/should_run/cgrun076.stdout b/testsuite/tests/codeGen/should_run/cgrun076.stdout
new file mode 100644
index 0000000000..e22e2cd950
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun076.stdout
@@ -0,0 +1,6 @@
+OK
+OK
+OK
+OK
+OK
+
diff --git a/testsuite/tests/deriving/should_compile/drv-empty-data.stderr b/testsuite/tests/deriving/should_compile/drv-empty-data.stderr
index e131c1cf5b..5baf6a6c6e 100644
--- a/testsuite/tests/deriving/should_compile/drv-empty-data.stderr
+++ b/testsuite/tests/deriving/should_compile/drv-empty-data.stderr
@@ -7,7 +7,7 @@ Derived class instances:
     GHC.Read.readListPrec = GHC.Read.readListPrecDefault
   
   instance GHC.Show.Show (DrvEmptyData.Void a) where
-    GHC.Show.showsPrec z = case z of
+    GHC.Show.showsPrec _ z = case z of
   
   instance GHC.Classes.Ord (DrvEmptyData.Void a) where
     GHC.Classes.compare _ z = GHC.Types.EQ
diff --git a/testsuite/tests/driver/T4437.hs b/testsuite/tests/driver/T4437.hs
index 27f5e1aaba..6a46e52ad6 100644
--- a/testsuite/tests/driver/T4437.hs
+++ b/testsuite/tests/driver/T4437.hs
@@ -39,7 +39,8 @@ expectedGhcOnlyExtensions :: [String]
 expectedGhcOnlyExtensions = ["RelaxedLayout",
                              "AlternativeLayoutRule",
                              "AlternativeLayoutRuleTransitional",
-                             "EmptyDataDeriving"]
+                             "EmptyDataDeriving",
+                             "NumericUnderscores"]
 
 expectedCabalOnlyExtensions :: [String]
 expectedCabalOnlyExtensions = ["Generics",
diff --git a/testsuite/tests/parser/should_fail/NoNumericUnderscores0.hs b/testsuite/tests/parser/should_fail/NoNumericUnderscores0.hs
new file mode 100644
index 0000000000..5e6821124a
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/NoNumericUnderscores0.hs
@@ -0,0 +1,12 @@
+{-# LANGUAGE NoNumericUnderscores #-}
+
+-- Test for NumericUnderscores extension.
+-- See Trac #14473
+-- This is a testcase for integer literal
+-- in NO NumericUnderscores extension.
+
+module NoNumericUnderscores0 where
+
+f :: Int -> ()
+f 1_000 = ()
+f _   = ()
diff --git a/testsuite/tests/parser/should_fail/NoNumericUnderscores0.stderr b/testsuite/tests/parser/should_fail/NoNumericUnderscores0.stderr
new file mode 100644
index 0000000000..af59581c14
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/NoNumericUnderscores0.stderr
@@ -0,0 +1,3 @@
+
+NoNumericUnderscores0.hs:11:3: error:
+    Use NumericUnderscores to allow underscores in integer literals
diff --git a/testsuite/tests/parser/should_fail/NoNumericUnderscores1.hs b/testsuite/tests/parser/should_fail/NoNumericUnderscores1.hs
new file mode 100644
index 0000000000..017f20528b
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/NoNumericUnderscores1.hs
@@ -0,0 +1,12 @@
+{-# LANGUAGE NoNumericUnderscores #-}
+
+-- Test for NumericUnderscores extension.
+-- See Trac #14473
+-- This is a testcase for floating literal
+-- in NO NumericUnderscores extension.
+
+module NoNumericUnderscores1 where
+
+f :: Float -> ()
+f 1_000.0_1 = ()
+f _   = ()
diff --git a/testsuite/tests/parser/should_fail/NoNumericUnderscores1.stderr b/testsuite/tests/parser/should_fail/NoNumericUnderscores1.stderr
new file mode 100644
index 0000000000..0dfbaa409e
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/NoNumericUnderscores1.stderr
@@ -0,0 +1,3 @@
+
+NoNumericUnderscores1.hs:11:3: error:
+    Use NumericUnderscores to allow underscores in floating literals
diff --git a/testsuite/tests/parser/should_fail/NumericUnderscoresFail0.hs b/testsuite/tests/parser/should_fail/NumericUnderscoresFail0.hs
new file mode 100644
index 0000000000..1f04184365
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/NumericUnderscoresFail0.hs
@@ -0,0 +1,13 @@
+{-# LANGUAGE NumericUnderscores #-}
+
+-- Test for NumericUnderscores extension.
+-- See Trac #14473
+-- This is a testcase for invalid case of NumericUnderscores.
+
+main :: IO ()
+main = do
+    print [
+            -- integer
+            1000000_,
+            _1000000
+          ]
diff --git a/testsuite/tests/parser/should_fail/NumericUnderscoresFail0.stderr b/testsuite/tests/parser/should_fail/NumericUnderscoresFail0.stderr
new file mode 100644
index 0000000000..8c872575a5
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/NumericUnderscoresFail0.stderr
@@ -0,0 +1,4 @@
+NumericUnderscoresFail0.hs:9:5: error:
+NumericUnderscoresFail0.hs:11:13: error:
+NumericUnderscoresFail0.hs:11:20: error:
+NumericUnderscoresFail0.hs:12:13: error:
diff --git a/testsuite/tests/parser/should_fail/NumericUnderscoresFail1.hs b/testsuite/tests/parser/should_fail/NumericUnderscoresFail1.hs
new file mode 100644
index 0000000000..0a6a3051d6
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/NumericUnderscoresFail1.hs
@@ -0,0 +1,20 @@
+{-# LANGUAGE NumericUnderscores #-}
+
+-- Test for NumericUnderscores extension.
+-- See Trac #14473
+-- This is a testcase for invalid case of NumericUnderscores.
+
+main :: IO ()
+main = do
+    print [
+            -- float
+            0_.0001,
+            _0.0001,
+            0.0001_,
+            0._0001,
+
+            -- float with exponent
+            1e_+23,
+            1e+23_,
+            1e+_23
+          ]
diff --git a/testsuite/tests/parser/should_fail/NumericUnderscoresFail1.stderr b/testsuite/tests/parser/should_fail/NumericUnderscoresFail1.stderr
new file mode 100644
index 0000000000..e1c91de091
--- /dev/null
+++ b/testsuite/tests/parser/should_fail/NumericUnderscoresFail1.stderr
@@ -0,0 +1,7 @@
+NumericUnderscoresFail1.hs:11:14: error:
+NumericUnderscoresFail1.hs:13:19: error:
+NumericUnderscoresFail1.hs:14:15: error:
+NumericUnderscoresFail1.hs:17:14: error: Variable not in scope: e_
+NumericUnderscoresFail1.hs:18:18: error:
+NumericUnderscoresFail1.hs:19:14: error: Variable not in scope: e
+NumericUnderscoresFail1.hs:19:16: error:
diff --git a/testsuite/tests/parser/should_fail/all.T b/testsuite/tests/parser/should_fail/all.T
index 483e5fe511..c16a988c2f 100644
--- a/testsuite/tests/parser/should_fail/all.T
+++ b/testsuite/tests/parser/should_fail/all.T
@@ -103,3 +103,10 @@ test('T8501b', normal, compile_fail, [''])
 test('T8501c', normal, compile_fail, [''])
 test('T12610', normal, compile_fail, [''])
 test('T14588', normal, compile_fail, [''])
+
+test('NoNumericUnderscores0', normal, compile_fail, [''])
+test('NoNumericUnderscores1', normal, compile_fail, [''])
+test('NumericUnderscoresFail0',
+     grep_errmsg(r'^NumericUnderscoresFail0.hs:'), compile_fail, [''])
+test('NumericUnderscoresFail1',
+     grep_errmsg(r'^NumericUnderscoresFail1.hs:'), compile_fail, [''])
diff --git a/testsuite/tests/parser/should_run/NumericUnderscores0.hs b/testsuite/tests/parser/should_run/NumericUnderscores0.hs
new file mode 100644
index 0000000000..7aefce95c6
--- /dev/null
+++ b/testsuite/tests/parser/should_run/NumericUnderscores0.hs
@@ -0,0 +1,101 @@
+{-# LANGUAGE NumericUnderscores #-}
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE HexFloatLiterals #-}
+{-# LANGUAGE NegativeLiterals #-}
+
+-- Test for NumericUnderscores extension.
+-- See Trac #14473
+-- This is a testcase for boxed literals.
+
+main :: IO ()
+main = do
+    -- Each case corresponds to the definition of Lexer.x
+    --
+    -- Normal integral literals
+    -- decimal
+    print [ 1_000_000 == 1000000,
+            1__0 == 10,
+            299_792_458 == 299792458,
+            8_04_1 == 8041,
+            2017_12_31 == 20171231
+          ]
+
+    -- binary
+    print [ 0b01_0000_0000 == 0b0100000000,
+            0b1_11_01_0000_0_111 == 0b1110100000111,
+            0b1100_1011__1110_1111__0101_0011 ==
+            0b110010111110111101010011
+          ]
+
+    -- octal
+    print [ 0o1_000_000 == 0o1000000,
+            0O1__0 == 0O10
+          ]
+
+    -- hexadecimal
+    print [ 0x1_000_000 == 0x1000000,
+            0x1__0 == 0x10,
+            0xff_00_00 == 0xff0000,
+            0X3fff_ffff == 0x3fffffff
+          ]
+
+    -- negative decimal
+    print [ -1_0 == -10
+          ]
+
+    -- negative binary
+    print [ -0b1_0 == -0b10
+          ]
+
+    -- negative octal
+    print [ -0o1_0 == -0o10
+          ]
+
+    -- negative hexadecimal
+    print [ -0x1_0 == -0x10
+          ]
+
+    ---- Normal rational literals
+    -- float
+    print [ 3.141_592_653_589_793 == 3.141592653589793,
+            96_485.332_89 == 96485.33289,
+            6.022_140_857e+23 == 6.022140857e+23
+          ]
+
+    -- negative float
+    print [ -1_0.0_1 == -10.01,
+            -1_0e+2 == -10e+2,
+            -1_0.0_1e+3 == -10.01e+3
+          ]
+
+    -- hexadecimal float
+    print [ 0xF_F.1F == 0xFF.1F,
+            0xF_01p-8 == 0xF01p-8,
+            0x0.F_1p4 == 0x0.F1p4
+          ]
+
+    -- negative hexadecimal float
+    print [ -0xF_F.F == -0xFF.F,
+            -0xF_01p-1 == -0xF01p-1,
+            -0x0.F_1p1 == -0x0.F1p1
+          ]
+
+    -- Additional testcase
+    --
+    -- Validity
+    print [ 0.000_1 == 0.0001,
+            1_0.000_1 == 10.0001,
+            1e+23 == 1e+23,
+            1_e+23 == 1e+23,
+            1__e+23 == 1e+23,
+            1.0_e+23 == 1.0e+23,
+            1.0_e+2_3 == 1.0e+23,
+            1_e23 == 1e23,
+            1_e-23 == 1e-23,
+            1_0_e23 == 10e23,
+            1_0_e-23 == 10e-23,
+            0b_01 == 0b01,
+            0b__11 == 0b11,
+            0x_ff == 0xff,
+            0x__ff == 0xff
+          ]
diff --git a/testsuite/tests/parser/should_run/NumericUnderscores0.stdout b/testsuite/tests/parser/should_run/NumericUnderscores0.stdout
new file mode 100644
index 0000000000..76f19a8ad9
--- /dev/null
+++ b/testsuite/tests/parser/should_run/NumericUnderscores0.stdout
@@ -0,0 +1,13 @@
+[True,True,True,True,True]
+[True,True,True]
+[True,True]
+[True,True,True,True]
+[True]
+[True]
+[True]
+[True]
+[True,True,True]
+[True,True,True]
+[True,True,True]
+[True,True,True]
+[True,True,True,True,True,True,True,True,True,True,True,True,True,True,True]
diff --git a/testsuite/tests/parser/should_run/NumericUnderscores1.hs b/testsuite/tests/parser/should_run/NumericUnderscores1.hs
new file mode 100644
index 0000000000..b9d0dca725
--- /dev/null
+++ b/testsuite/tests/parser/should_run/NumericUnderscores1.hs
@@ -0,0 +1,88 @@
+{-# LANGUAGE NumericUnderscores #-}
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE NegativeLiterals #-}
+
+-- Test for NumericUnderscores extension.
+-- See Trac #14473
+-- This is a testcase for unboxed literals.
+
+import GHC.Types
+
+main :: IO ()
+main = do
+    -- Each case corresponds to the definition of Lexer.x
+    --
+    -- Unboxed ints and words
+    -- decimal int
+    print [ (I# 1_000_000#) == 1000000,
+            (I# 299_792_458#) == 299792458
+          ]
+
+    -- binary int
+    print [ (I# 0b01_0000_0000#) == 0b0100000000,
+            (I# 0b1_11_01_0000_0_111#) == 0b1110100000111
+          ]
+
+    -- octal int
+    print [ (I# 0o1_000_000#) == 0o1000000,
+            (I# 0O1__0#) == 0O10
+          ]
+
+    -- hexadecimal int
+    print [ (I# 0x1_000_000#) == 0x1000000,
+            (I# 0X3fff_ffff#) == 0x3fffffff
+          ]
+
+    -- negative decimal int
+    print [ (I# -1_000_000#) == -1000000
+          ]
+
+    -- negative binary int
+    print [ (I# -0b01_0000_0000#) == -0b0100000000
+          ]
+
+    -- negative octal int
+    print [ (I# -0o1_000_000#) == -0o1000000
+          ]
+
+    -- negative hexadecimal int
+    print [ (I# -0x1_000_000#) == -0x1000000
+          ]
+
+    -- decimal word
+    print [ (W# 1_000_000##) == 1000000,
+            (W# 299_792_458##) == 299792458
+          ]
+
+    -- binary word
+    print [ (W# 0b1_0##) == 0b10
+          ]
+
+    -- octal word
+    print [ (W# 0o1_0##) == 0o10
+          ]
+
+    -- hexadecimal word
+    print [ (W# 0x1_0##) == 0x10
+          ]
+
+    -- Unboxed floats and doubles
+    -- float
+    print [ (F# 3.141_592_653_589_793#) == 3.141592653589793,
+            (F# 3_14e-2#) == 314e-2,
+            (F# 96_485.332_89#) == 96485.33289,
+            (F# 6.022_140_857e+23#) == 6.022140857e+23,
+            (F# -3.141_592#) == -3.141592,
+            (F# -3_14e-2#) == -314e-2,
+            (F# -6.022_140e+23#) == -6.022140e+23
+          ]
+
+    -- double
+    print [ (D# 3_14e-2##) == 314e-2,
+            (D# 96_485.332_89##) == 96485.33289,
+            (D# 6.022_140_857e+23##) == 6.022140857e+23,
+            (D# -3.141_592##) == -3.141592,
+            (D# -3_14e-2##) == -314e-2,
+            (D# -6.022_140e+23##) == -6.022140e+23
+          ]
diff --git a/testsuite/tests/parser/should_run/NumericUnderscores1.stdout b/testsuite/tests/parser/should_run/NumericUnderscores1.stdout
new file mode 100644
index 0000000000..bddde5bccb
--- /dev/null
+++ b/testsuite/tests/parser/should_run/NumericUnderscores1.stdout
@@ -0,0 +1,14 @@
+[True,True]
+[True,True]
+[True,True]
+[True,True]
+[True]
+[True]
+[True]
+[True]
+[True,True]
+[True]
+[True]
+[True]
+[True,True,True,True,True,True,True]
+[True,True,True,True,True,True]
diff --git a/testsuite/tests/parser/should_run/all.T b/testsuite/tests/parser/should_run/all.T
index bcf0bc83f3..0c9e65fd14 100644
--- a/testsuite/tests/parser/should_run/all.T
+++ b/testsuite/tests/parser/should_run/all.T
@@ -12,3 +12,5 @@ test('BinaryLiterals2', [], compile_and_run, [''])
 test('T10807', normal, compile_and_run, [''])
 test('NegativeZero', normal, compile_and_run, [''])
 test('HexFloatLiterals', normal, compile_and_run, [''])
+test('NumericUnderscores0', normal, compile_and_run, [''])
+test('NumericUnderscores1', normal, compile_and_run, [''])
diff --git a/testsuite/tests/patsyn/should_fail/T14507.stderr b/testsuite/tests/patsyn/should_fail/T14507.stderr
index 2ed89cbac6..cec70dfcdb 100644
--- a/testsuite/tests/patsyn/should_fail/T14507.stderr
+++ b/testsuite/tests/patsyn/should_fail/T14507.stderr
@@ -2,7 +2,7 @@
 T14507.hs:18:9: error:
     • Iceland Jack!  Iceland Jack! Stop torturing me!
       Pattern-bound variable x :: TypeRep a
-        has a type that mentions pattern-bound coercion: co_a2CF
+        has a type that mentions pattern-bound coercion: co
       Hint: use -fprint-explicit-coercions to see the coercions
       Probable fix: add a pattern signature
     • In the declaration for pattern synonym ‘SO’
diff --git a/testsuite/tests/patsyn/should_fail/all.T b/testsuite/tests/patsyn/should_fail/all.T
index 2b3b85bf28..0f4c608169 100644
--- a/testsuite/tests/patsyn/should_fail/all.T
+++ b/testsuite/tests/patsyn/should_fail/all.T
@@ -41,4 +41,4 @@ test('T14114', normal, compile_fail, [''])
 test('T14380', normal, compile_fail, [''])
 test('T14498', normal, compile_fail, [''])
 test('T14552', normal, compile_fail, [''])
-test('T14507', normal, compile_fail, [''])
+test('T14507', normal, compile_fail, ['-dsuppress-uniques'])
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index bd038a2407..431b2889e8 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -1143,10 +1143,11 @@ test('T13379',
 
 test('MultiLayerModules',
      [ compiler_stats_num_field('bytes allocated',
-          [(wordsize(64), 6294813000, 10),
+          [(wordsize(64), 5619893176, 10),
           # initial:    12139116496
           # 2017-05-12: 6956533312   Revert "Use a deterministic map for imp_dep_mods"
           # 2017-05-31: 6294813000   Faster checkFamInstConsistency
+          # 2018-01-21: 5619893176   Allocate less in plus_mod_dep
           ]),
        pre_cmd('./genMultiLayerModules'),
        extra_files(['genMultiLayerModules']),
@@ -1166,6 +1167,18 @@ test('ManyConstructors',
      multimod_compile,
      ['ManyConstructors', '-v0'])
 
+test('ManyAlternatives',
+     [ compiler_stats_num_field('bytes allocated',
+          [(wordsize(64), 1398898072, 10),
+          # initial:    1756999240
+          # 2018-01-20: 1398898072  Use IntSet in Dataflow
+          ]),
+       pre_cmd('./genManyAlternatives'),
+       extra_files(['genManyAlternatives']),
+     ],
+     multimod_compile,
+     ['ManyAlternatives', '-v0'])
+
 test('T13701',
      [ compiler_stats_num_field('bytes allocated',
           [(platform('x86_64-apple-darwin'), 2217187888, 10),
diff --git a/testsuite/tests/perf/compiler/genManyAlternatives b/testsuite/tests/perf/compiler/genManyAlternatives
new file mode 100755
index 0000000000..1035425bd4
--- /dev/null
+++ b/testsuite/tests/perf/compiler/genManyAlternatives
@@ -0,0 +1,34 @@
+SIZE=1000
+MODULE=ManyAlternatives
+
+# Generates a module with a large number of alternatives that looks
+# like this:
+#
+#   module ManyAlternatives where
+#
+#   data A1000 = A0
+#     | A0001
+#     | A0002
+#     ...
+#     | A1000
+#
+#   f :: A -> Int
+#   f A0001 = 1990001
+#   f A0002 = 1990002
+#   ...
+#   f A1000 = 1991000
+#
+# The point of this test is to check if we don't regress on #14667 reintroducing
+# some code that's quadratic in the number of alternatives.
+
+echo "module $MODULE where" > $MODULE.hs
+echo >> $MODULE.hs
+echo "data A$SIZE = A0" >> $MODULE.hs
+for i in $(seq -w 1 $SIZE); do
+  echo "  | A$i" >> $MODULE.hs
+done
+echo >> $MODULE.hs
+echo "f :: A$SIZE -> Int" >> $MODULE.hs
+for i in $(seq -w 1 $SIZE); do
+  echo "f A$i = 199$i" >> $MODULE.hs
+done
diff --git a/testsuite/tests/plugins/Makefile b/testsuite/tests/plugins/Makefile
index 1ff8d40e8b..96443c9b32 100644
--- a/testsuite/tests/plugins/Makefile
+++ b/testsuite/tests/plugins/Makefile
@@ -52,3 +52,8 @@ T12567a:
 	"$(TEST_HC)" $(TEST_HC_OPTS) $(ghcPluginWayFlags) --make T12567a.hs -package-db simple-plugin/pkg.T12567a/local.package.conf -hide-all-plugin-packages -plugin-package simple-plugin 1>&2
 	"$(TEST_HC)" $(TEST_HC_OPTS) $(ghcPluginWayFlags) --make T12567a.hs -package-db simple-plugin/pkg.T12567a/local.package.conf -hide-all-plugin-packages -plugin-package simple-plugin 2>&1 | grep "T12567a.hs, T12567a.o" 1>&2
 	"$(TEST_HC)" $(TEST_HC_OPTS) $(ghcPluginWayFlags) --make T12567b.hs -package-db simple-plugin/pkg.T12567a/local.package.conf -hide-all-plugin-packages -plugin-package simple-plugin 1>&2
+
+.PHONY: T14335
+T14335:
+	"$(TEST_HC)" $(TEST_HC_OPTS) $(ghcPluginWayFlags) -fexternal-interpreter --make -v0 plugins01.hs -package-db simple-plugin/pkg.plugins01/local.package.conf -fplugin Simple.Plugin -fplugin-opt Simple.Plugin:Irrelevant_Option -hide-all-plugin-packages -plugin-package simple-plugin
+	./plugins01
diff --git a/testsuite/tests/plugins/all.T b/testsuite/tests/plugins/all.T
index 5f53531518..0e523f0d28 100644
--- a/testsuite/tests/plugins/all.T
+++ b/testsuite/tests/plugins/all.T
@@ -67,3 +67,9 @@ test('T12567a',
      [extra_files(['T12567b.hs', 'simple-plugin/']),
       pre_cmd('$MAKE -s --no-print-directory -C simple-plugin package.T12567a TOP={top}')],
      run_command, ['$MAKE -s --no-print-directory T12567a'])
+
+test('T14335',
+     [extra_files(['simple-plugin/', 'plugins01.hs']),
+      pre_cmd('$MAKE -s --no-print-directory -C simple-plugin package.plugins01 TOP={top}'),
+      expect_broken(14335)],
+     run_command, ['$MAKE -s --no-print-directory T14335'])
diff --git a/testsuite/tests/rebindable/T14670.hs b/testsuite/tests/rebindable/T14670.hs
new file mode 100644
index 0000000000..8a99c576ed
--- /dev/null
+++ b/testsuite/tests/rebindable/T14670.hs
@@ -0,0 +1,11 @@
+{-# LANGUAGE RebindableSyntax #-}
+
+module Lib where
+
+import Prelude (IO)
+
+pure = undefined
+
+foo :: IO ()
+foo = do
+  pure ()
diff --git a/testsuite/tests/rebindable/all.T b/testsuite/tests/rebindable/all.T
index 83bfa2f278..f796a38750 100644
--- a/testsuite/tests/rebindable/all.T
+++ b/testsuite/tests/rebindable/all.T
@@ -34,3 +34,4 @@ test('T10112', normal, compile, [''])
 test('T11216', normal, compile, [''])
 test('T11216A', normal, compile, [''])
 test('T12080', normal, compile, [''])
+test('T14670', expect_broken(14670), compile, [''])
diff --git a/utils/ghc-cabal/Main.hs b/utils/ghc-cabal/Main.hs
index f330ce06c3..3faf998ee7 100644
--- a/utils/ghc-cabal/Main.hs
+++ b/utils/ghc-cabal/Main.hs
@@ -6,7 +6,7 @@ import qualified Distribution.ModuleName as ModuleName
 import Distribution.PackageDescription
 import Distribution.PackageDescription.Check hiding (doesFileExist)
 import Distribution.PackageDescription.Configuration
-import Distribution.PackageDescription.Parse
+import Distribution.PackageDescription.Parsec
 import Distribution.Package
 import Distribution.Simple
 import Distribution.Simple.Configure
@@ -93,7 +93,7 @@ runDefaultMain :: IO ()
 runDefaultMain
  = do let verbosity = normal
       gpdFile <- defaultPackageDesc verbosity
-      gpd <- readPackageDescription verbosity gpdFile
+      gpd <- readGenericPackageDescription verbosity gpdFile
       case buildType (flattenPackageDescription gpd) of
           Configure -> defaultMainWithHooks autoconfUserHooks
           -- time has a "Custom" Setup.hs, but it's actually Configure
@@ -118,7 +118,7 @@ doCheck directory
  = withCurrentDirectory directory
  $ do let verbosity = normal
       gpdFile <- defaultPackageDesc verbosity
-      gpd <- readPackageDescription verbosity gpdFile
+      gpd <- readGenericPackageDescription verbosity gpdFile
       case filter isFailure $ checkPackage gpd Nothing of
           []   -> return ()
           errs -> mapM_ print errs >> exitWith (ExitFailure 1)
author	Joachim Breitner <mail@joachim-breitner.de>	2018-01-25 21:38:59 -0500
committer	Joachim Breitner <mail@joachim-breitner.de>	2018-01-25 21:38:59 -0500
commit	73ed13ed36e5a8e3fe29b9c349dfc292bed66a98 (patch)
tree	007bfa621a8d2cf4154ce22eba183d38819825bd
parent	dc6de0a3d6c7acc1b4147a66de8fd6d8b9c60e54 (diff)
parent	06366890ba77c20198d7fccc870083b0bbfb1b11 (diff)
download	haskell-73ed13ed36e5a8e3fe29b9c349dfc292bed66a98.tar.gz