1 files changed, 113 insertions, 129 deletions
diff --git a/compiler/GHC/Types/Literal.hs b/compiler/GHC/Types/Literal.hs
index be23f2405e..3d41444848 100644
--- a/compiler/GHC/Types/Literal.hs
+++ b/compiler/GHC/Types/Literal.hs
@@ -20,23 +20,23 @@ module GHC.Types.Literal
 
         -- ** Creating Literals
         , mkLitInt, mkLitIntWrap, mkLitIntWrapC, mkLitIntUnchecked
-        , mkLitWord, mkLitWordWrap, mkLitWordWrapC
-        , mkLitInt8, mkLitInt8Wrap
-        , mkLitWord8, mkLitWord8Wrap
-        , mkLitInt16, mkLitInt16Wrap
-        , mkLitWord16, mkLitWord16Wrap
-        , mkLitInt32, mkLitInt32Wrap
-        , mkLitWord32, mkLitWord32Wrap
-        , mkLitInt64, mkLitInt64Wrap
-        , mkLitWord64, mkLitWord64Wrap
+        , mkLitWord, mkLitWordWrap, mkLitWordWrapC, mkLitWordUnchecked
+        , mkLitInt8, mkLitInt8Wrap, mkLitInt8Unchecked
+        , mkLitWord8, mkLitWord8Wrap, mkLitWord8Unchecked
+        , mkLitInt16, mkLitInt16Wrap, mkLitInt16Unchecked
+        , mkLitWord16, mkLitWord16Wrap, mkLitWord16Unchecked
+        , mkLitInt32, mkLitInt32Wrap, mkLitInt32Unchecked
+        , mkLitWord32, mkLitWord32Wrap, mkLitWord32Unchecked
+        , mkLitInt64, mkLitInt64Wrap, mkLitInt64Unchecked
+        , mkLitWord64, mkLitWord64Wrap, mkLitWord64Unchecked
         , mkLitFloat, mkLitDouble
         , mkLitChar, mkLitString
         , mkLitInteger, mkLitNatural
         , mkLitNumber, mkLitNumberWrap
+        , mkLitRubbish
 
         -- ** Operations on Literals
         , literalType
-        , absentLiteralOf
         , pprLiteral
         , litNumIsSigned
         , litNumCheckRange
@@ -61,7 +61,6 @@ module GHC.Types.Literal
         , charToIntLit, intToCharLit
         , floatToIntLit, intToFloatLit, doubleToIntLit, intToDoubleLit
         , nullAddrLit, floatToDoubleLit, doubleToFloatLit
-        , rubbishLit, isRubbishLit
         ) where
 
 #include "HsVersions.h"
@@ -70,7 +69,6 @@ import GHC.Prelude
 
 import GHC.Builtin.Types.Prim
 import {-# SOURCE #-} GHC.Builtin.Types
-import GHC.Builtin.Names
 import GHC.Core.Type
 import GHC.Core.TyCon
 import GHC.Utils.Outputable
@@ -79,7 +77,6 @@ import GHC.Types.Basic
 import GHC.Utils.Binary
 import GHC.Settings.Constants
 import GHC.Platform
-import GHC.Types.Unique.FM
 import GHC.Utils.Misc
 import GHC.Utils.Panic
 
@@ -114,8 +111,7 @@ import Numeric ( fromRat )
 -- * The literal derived from the label mentioned in a \"foreign label\"
 --   declaration ('LitLabel')
 --
--- * A 'LitRubbish' to be used in place of values of 'UnliftedRep'
---   (i.e. 'MutVar#') when the value is never used.
+-- * A 'LitRubbish' to be used in place of values that are never used.
 --
 -- * A character
 -- * A string
@@ -138,10 +134,13 @@ data Literal
                                 -- that can be represented as a Literal. Create
                                 -- with 'nullAddrLit'
 
-  | LitRubbish Bool             -- ^ A nonsense value; always boxed, but
-                                --      True <=> lifted, False <=> unlifted
-                                -- Used when a binding is absent.
-                                -- See Note [Rubbish literals]
+  | LitRubbish [PrimRep]        -- ^ A nonsense value of the given
+                                -- representation. See Note [Rubbish values].
+                                --
+                                -- The @[PrimRep]@ of a 'Type' can be obtained
+                                -- from 'typeMonoPrimRep_maybe'. The field
+                                -- becomes empty or singleton post-unarisation,
+                                -- see Note [Post-unarisation invariants].
 
   | LitFloat   Rational         -- ^ @Float#@. Create with 'mkLitFloat'
   | LitDouble  Rational         -- ^ @Double#@. Create with 'mkLitDouble'
@@ -194,6 +193,12 @@ litNumIsSigned nt = case nt of
   LitNumWord32  -> False
   LitNumWord64  -> False
 
+instance Binary LitNumType where
+   put_ bh numTyp = putByte bh (fromIntegral (fromEnum numTyp))
+   get bh = do
+      h <- getByte bh
+      return (toEnum (fromIntegral h))
+
 {-
 Note [BigNum literals]
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -225,12 +230,6 @@ for more details.
 
 -}
 
-instance Binary LitNumType where
-   put_ bh numTyp = putByte bh (fromIntegral (fromEnum numTyp))
-   get bh = do
-      h <- getByte bh
-      return (toEnum (fromIntegral h))
-
 instance Binary Literal where
     put_ bh (LitChar aa)     = do putByte bh 0; put_ bh aa
     put_ bh (LitString ab)   = do putByte bh 1; put_ bh ab
@@ -272,9 +271,10 @@ instance Binary Literal where
                     nt <- get bh
                     i  <- get bh
                     return (LitNumber nt i)
-              _ -> do
+              7 -> do
                     b <- get bh
                     return (LitRubbish b)
+              _ -> pprPanic "Binary:Literal" (int (fromIntegral h))
 
 instance Outputable Literal where
     ppr = pprLiteral id
@@ -555,6 +555,12 @@ mkLitNatural :: Integer -> Literal
 mkLitNatural x = ASSERT2( inNaturalRange x,  integer x )
                     (LitNumber LitNumNatural x)
 
+-- | Create a rubbish literal of the given representation.
+-- The representation of a 'Type' can be obtained via 'typeMonoPrimRep_maybe'.
+-- See Note [Rubbish values].
+mkLitRubbish :: [PrimRep] -> Literal
+mkLitRubbish = LitRubbish
+
 inNaturalRange :: Integer -> Bool
 inNaturalRange x = x >= 0
 
@@ -694,14 +700,6 @@ doubleToFloatLit l             = pprPanic "doubleToFloatLit" (ppr l)
 nullAddrLit :: Literal
 nullAddrLit = LitNullAddr
 
--- | A rubbish literal; see Note [Rubbish literals]
-rubbishLit :: Bool -> Literal
-rubbishLit is_lifted = LitRubbish is_lifted
-
-isRubbishLit :: Literal -> Bool
-isRubbishLit (LitRubbish {}) = True
-isRubbishLit _               = False
-
 {-
         Predicates
         ~~~~~~~~~~
@@ -797,7 +795,8 @@ litIsLifted (LitNumber nt _) = case nt of
   LitNumWord16  -> False
   LitNumWord32  -> False
   LitNumWord64  -> False
-litIsLifted _                  = False
+litIsLifted _                        = False
+  -- Even RUBBISH[LiftedRep] is unlifted, as rubbish values are always evaluated.
 
 {-
         Types
@@ -825,40 +824,10 @@ literalType (LitNumber lt _)  = case lt of
    LitNumWord16  -> word16PrimTy
    LitNumWord32  -> word32PrimTy
    LitNumWord64  -> word64PrimTy
-literalType (LitRubbish is_lifted) = mkForAllTy a Inferred (mkTyVarTy a)
+literalType (LitRubbish preps) = mkForAllTy a Inferred (mkTyVarTy a)
   where
-    -- See Note [Rubbish literals]
-    a | is_lifted = alphaTyVar
-      | otherwise = alphaTyVarUnliftedRep
-
-absentLiteralOf :: TyCon -> Maybe Literal
--- Return a literal of the appropriate primitive
--- TyCon, to use as a placeholder when it doesn't matter
--- Rubbish literals are handled in GHC.Core.Opt.WorkWrap.Utils, because
---  1. Looking at the TyCon is not enough, we need the actual type
---  2. This would need to return a type application to a literal
-absentLiteralOf tc = lookupUFM absent_lits tc
-
--- We do not use TyConEnv here to avoid import cycles.
-absent_lits :: UniqFM TyCon Literal
-absent_lits = listToUFM_Directly
-                        -- Explicitly construct the mape from the known
-                        -- keys of these tyCons.
-                        [ (addrPrimTyConKey,    LitNullAddr)
-                        , (charPrimTyConKey,    LitChar 'x')
-                        , (intPrimTyConKey,     mkLitIntUnchecked 0)
-                        , (int8PrimTyConKey,    mkLitInt8Unchecked 0)
-                        , (int16PrimTyConKey,   mkLitInt16Unchecked 0)
-                        , (int32PrimTyConKey,   mkLitInt32Unchecked 0)
-                        , (int64PrimTyConKey,   mkLitInt64Unchecked 0)
-                        , (wordPrimTyConKey,    mkLitWordUnchecked 0)
-                        , (word8PrimTyConKey,   mkLitWord8Unchecked 0)
-                        , (word16PrimTyConKey,  mkLitWord16Unchecked 0)
-                        , (word32PrimTyConKey,  mkLitWord32Unchecked 0)
-                        , (word64PrimTyConKey,  mkLitWord64Unchecked 0)
-                        , (floatPrimTyConKey,   LitFloat 0)
-                        , (doublePrimTyConKey,  LitDouble 0)
-                        ]
+    -- See Note [Rubbish values]
+    a = head $ mkTemplateTyVars [tYPE (primRepsToRuntimeRep preps)]
 
 {-
         Comparison
@@ -910,9 +879,8 @@ pprLiteral add_par (LitLabel l mb fod) =
     where b = case mb of
               Nothing -> pprHsString l
               Just x  -> doubleQuotes (text (unpackFS l ++ '@':show x))
-pprLiteral _       (LitRubbish is_lifted)
-  = text "__RUBBISH"
-    <> parens (if is_lifted then text "lifted" else text "unlifted")
+pprLiteral _       (LitRubbish reps)
+  = text "RUBBISH" <> ppr reps
 
 pprIntegerVal :: (SDoc -> SDoc) -> Integer -> SDoc
 -- See Note [Printing of literals in Core].
@@ -954,61 +922,77 @@ LitFloat        -1.0#
 LitDouble       -1.0##
 LitInteger      -1                 (-1)
 LitLabel        "__label" ...      ("__label" ...)
-LitRubbish      "__RUBBISH"
-
-Note [Rubbish literals]
-~~~~~~~~~~~~~~~~~~~~~~~
-During worker/wrapper after demand analysis, where an argument
-is unused (absent) we do the following w/w split (supposing that
-y is absent):
-
-  f x y z = e
-===>
-  f x y z = $wf x z
-  $wf x z = let y = <absent value>
-            in e
-
-Usually the binding for y is ultimately optimised away, and
-even if not it should never be evaluated -- but that's the
-way the w/w split starts off.
-
-What is <absent value>?
-* For lifted values <absent value> can be a call to 'error'.
-* For primitive types like Int# or Word# we can use any random
-  value of that type.
-* But what about /unlifted/ but /boxed/ types like MutVar# or
-  Array#?  Or /lifted/ but /strict/ values, such as a field of
-  a strict data constructor.  For these we use LitRubbish.
-  See Note [Absent errors] in GHC.Core.Opt.WorkWrap.Utils.hs
-
-The literal (LitRubbish is_lifted)
-has type
-  LitRubbish :: forall (a :: TYPE LiftedRep). a     if is_lifted
-  LitRubbish :: forall (a :: TYPE UnliftedRep). a   otherwise
-
-So we might see a w/w split like
-  $wf x z = let y :: Array# Int = (LitRubbish False) @(Array# Int)
-            in e
-
-Here are the moving parts, but see also Note [Absent errors] in
-GHC.Core.Opt.WorkWrap.Utils
-
-* We define LitRubbish as a constructor in GHC.Types.Literal.Literal
-
-* It is given its polymorphic type by Literal.literalType
-
-* GHC.Core.Opt.WorkWrap.Utils.mk_absent_let introduces a LitRubbish for absent
-  arguments of boxed, unlifted type; or boxed, lifted arguments of strict data
-  constructors.
-
-* In CoreToSTG we convert (RubishLit @t) to just ().  STG is untyped, so this
-  will work OK for both lifted and unlifted (but boxed) values. The important
-  thing is that it is a heap pointer, which the garbage collector can follow if
-  it encounters it.
-
-  We considered maintaining LitRubbish in STG, and lowering it in the code
-  generators, but it seems simpler to do it once and for all in CoreToSTG.
-
-  In GHC.ByteCode.Asm we just lower it as a 0 literal, because it's all boxed to
-  the host GC anyway.
--}
+LitRubbish      "RUBBISH[...]"
+
+Note [Rubbish values]
+~~~~~~~~~~~~~~~~~~~~~
+Sometimes, we need to cough up a rubbish value of a certain type that is used
+in place of dead code we thus aim to eliminate. The value of a dead occurrence
+has no effect on the dynamic semantics of the program, so we can pick any value
+of the same representation.
+Exploiting the results of absence analysis in worker/wrapper is a scenario where
+we need such a rubbish value, see Note [Absent fillers] for examples.
+
+It's completely undefined what the *value* of a rubbish value is, e.g., we could
+pick @0#@ for @Int#@ or @42#@; it mustn't matter where it's inserted into a Core
+program. We embed these rubbish values in the 'LitRubbish' case of the 'Literal'
+data type. Here are the moving parts:
+
+  1. Source Haskell: No way to produce rubbish lits in source syntax. Purely
+     an IR feature.
+
+  2. Core: 'LitRubbish' carries a @[PrimRep]@ which represents the monomorphic
+     'RuntimeRep' of the type it is substituting for.
+     We have it that @RUBBISH[IntRep]@ has type @forall (a :: TYPE IntRep). a@,
+     and the type application @RUBBISH[IntRep] \@Int# :: Int#@ represents
+     a rubbish value of type @Int#@. Rubbish lits are completely opaque in Core.
+     In general, @RUBBISH[preps] :: forall (a :: TYPE rep). a@, where @rep@
+     is the 'RuntimeRep' corresponding to @preps :: [PrimRep]@
+     (via 'primRepsToRuntimeRep'). See 'literalType'.
+     Why not encode a 'RuntimeRep' via a @Type@? Thus
+     > data Literal = ... | LitRubbish Type | ...
+     Because
+       * We have to provide an Eq and Ord instance and @Type@ has none
+       * The encoded @Type@ might be polymorphic and we can only emit code for
+         monomorphic 'RuntimeRep's anyway.
+
+  3. STG: The type app in @RUBBISH[IntRep] \@Int# :: Int#@ is erased and we get
+     the (untyped) 'StgLit' @RUBBISH[IntRep] :: Int#@ in STG.
+     It's treated mostly opaque, with the exception of the Unariser, where we
+     take apart a case scrutinisation on, or arg occurrence of, e.g.,
+     @RUBBISH[IntRep,DoubleRep]@ (which may stand in for @(# Int#, Double# #)@)
+     into its sub-parts @RUBBISH[IntRep]@ and @RUBBISH[DoubleRep]@, similar to
+     unboxed tuples. @RUBBISH[VoidRep]@ is erased.
+     See 'unariseRubbish_maybe' and also Note [Post-unarisation invariants].
+
+  4. Cmm: We translate 'LitRubbish' to their actual rubbish value in 'cgLit'.
+     The particulars are boring, and only matter when debugging illicit use of
+     a rubbish value; see Modes of failure below.
+
+  5. Bytecode: In GHC.ByteCode.Asm we just lower it as a 0 literal, because it's
+     all boxed to the host GC anyway.
+
+Why not lower LitRubbish in CoreToStg? Because it enables us to use RubbishLit
+when unarising unboxed sums in the future, and it allows rubbish values of e.g.
+VecRep, for which we can't cough up dummy values in STG.
+
+Modes of failure
+----------------
+Suppose there is a bug in GHC, and a rubbish value is used after all. That is
+undefined behavior, of course, but let us list a few examples for failure modes:
+
+ a) For an value of unboxed numeric type like @Int#@, we just use a silly
+    value like 42#. The error might propoagate indefinitely, hence we better
+    pick a rather unique literal. Same for Word, Floats, Char and VecRep.
+ b) For AddrRep (like String lits), we mit a null pointer, resulting in a
+    definitive segfault when accessed.
+ c) For boxed values, unlifted or not, we use a pointer to a fixed closure,
+    like @()@, so that the GC has a pointer to follow.
+    If we use that pointer as an 'Array#', we will likely access fields of the
+    array that don't exist, and a seg-fault is likely, but not guaranteed.
+    If we use that pointer as @Either Int Bool@, we might try to access the
+    'Int' field of the 'Left' constructor (which has the same ConTag as '()'),
+    which doesn't exists. In the best case, we'll find an invalid pointer in its
+    position and get a seg-fault, in the worst case the error manifests only one
+    or two indirections later.
+ -}