codeGen: Teach unboxed sum rep logic about levity

Previously Unarise would happily project lifted and unlifted fields to lifted slots. This broke horribly in #19645, where a ByteArray# was passed in a lifted slot and consequently entered. The simplest way to fix this is what I've done here, distinguishing between lifted and unlifted slots in unarise. However, one can imagine more clever solutions, where we coerce the binder to the correct levity with respect to the sum's tag. I doubt that this would be worth the effort. Fixes #19645. (cherry picked from commit 721ea018712606b9feddf09c130552ed981b4900)
author: Ben Gamari <ben@smart-cactus.org> 2021-04-06 19:06:14 -0400
committer: Ben Gamari <ben@smart-cactus.org> 2021-05-04 19:15:54 -0400
commit: 52c2f2b9f00d08e8d1c034d2af8f75bae9891adf (patch)
tree: f4a9255a61e4f372ed46f55d8bc6e31cfa13b4bb
parent: 63bc827ed40e66cd305e2162dfbbd3746be7bf21 (diff)
download: haskell-52c2f2b9f00d08e8d1c034d2af8f75bae9891adf.tar.gz
4 files changed, 72 insertions, 28 deletions
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
index 1b66212127..f916fca9c6 100644
--- a/compiler/cmm/CmmUtils.hs
+++ b/compiler/cmm/CmmUtils.hs
@@ -116,11 +116,12 @@ primRepCmmType _      DoubleRep        = f64
 primRepCmmType _      (VecRep len rep) = vec len (primElemRepCmmType rep)
 
 slotCmmType :: DynFlags -> SlotTy -> CmmType
-slotCmmType dflags PtrSlot    = gcWord dflags
-slotCmmType dflags WordSlot   = bWord dflags
-slotCmmType _      Word64Slot = b64
-slotCmmType _      FloatSlot  = f32
-slotCmmType _      DoubleSlot = f64
+slotCmmType dflags PtrLiftedSlot    = gcWord dflags
+slotCmmType dflags PtrUnliftedSlot  = gcWord dflags
+slotCmmType dflags WordSlot         = bWord dflags
+slotCmmType _      Word64Slot       = b64
+slotCmmType _      FloatSlot        = f32
+slotCmmType _      DoubleSlot       = f64
 
 primElemRepCmmType :: PrimElemRep -> CmmType
 primElemRepCmmType Int8ElemRep   = b8
diff --git a/compiler/simplStg/RepType.hs b/compiler/simplStg/RepType.hs
index 588e2ff7be..b58db1cea5 100644
--- a/compiler/simplStg/RepType.hs
+++ b/compiler/simplStg/RepType.hs
@@ -273,7 +273,8 @@ layoutUbxSum sum_slots0 arg_slots0 =
 -- We have 3 kinds of slots:
 --
 --   - Pointer slot: Only shared between actual pointers to Haskell heap (i.e.
---     boxed objects)
+--     boxed objects). These come in two variants: Lifted and unlifted (see
+--     #19645).
 --
 --   - Word slots: Shared between IntRep, WordRep, Int64Rep, Word64Rep, AddrRep.
 --
@@ -283,7 +284,7 @@ layoutUbxSum sum_slots0 arg_slots0 =
 --
 -- TODO(michalt): We should probably introduce `SlotTy`s for 8-/16-/32-bit
 -- values, so that we can pack things more tightly.
-data SlotTy = PtrSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot
+data SlotTy = PtrLiftedSlot | PtrUnliftedSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot
   deriving (Eq, Ord)
     -- Constructor order is important! If slot A could fit into slot B
     -- then slot A must occur first.  E.g.  FloatSlot before DoubleSlot
@@ -292,11 +293,12 @@ data SlotTy = PtrSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot
     -- (would not be true on a 128-bit machine)
 
 instance Outputable SlotTy where
-  ppr PtrSlot    = text "PtrSlot"
-  ppr Word64Slot = text "Word64Slot"
-  ppr WordSlot   = text "WordSlot"
-  ppr DoubleSlot = text "DoubleSlot"
-  ppr FloatSlot  = text "FloatSlot"
+  ppr PtrLiftedSlot   = text "PtrLiftedSlot"
+  ppr PtrUnliftedSlot = text "PtrUnliftedSlot"
+  ppr Word64Slot      = text "Word64Slot"
+  ppr WordSlot        = text "WordSlot"
+  ppr DoubleSlot      = text "DoubleSlot"
+  ppr FloatSlot       = text "FloatSlot"
 
 typeSlotTy :: UnaryType -> Maybe SlotTy
 typeSlotTy ty
@@ -307,8 +309,8 @@ typeSlotTy ty
 
 primRepSlot :: PrimRep -> SlotTy
 primRepSlot VoidRep     = pprPanic "primRepSlot" (text "No slot for VoidRep")
-primRepSlot LiftedRep   = PtrSlot
-primRepSlot UnliftedRep = PtrSlot
+primRepSlot LiftedRep   = PtrLiftedSlot
+primRepSlot UnliftedRep = PtrUnliftedSlot
 primRepSlot IntRep      = WordSlot
 primRepSlot Int8Rep     = WordSlot
 primRepSlot Int16Rep    = WordSlot
@@ -325,27 +327,29 @@ primRepSlot DoubleRep   = DoubleSlot
 primRepSlot VecRep{}    = pprPanic "primRepSlot" (text "No slot for VecRep")
 
 slotPrimRep :: SlotTy -> PrimRep
-slotPrimRep PtrSlot     = LiftedRep   -- choice between lifted & unlifted seems arbitrary
-slotPrimRep Word64Slot  = Word64Rep
-slotPrimRep WordSlot    = WordRep
-slotPrimRep DoubleSlot  = DoubleRep
-slotPrimRep FloatSlot   = FloatRep
+slotPrimRep PtrLiftedSlot   = LiftedRep
+slotPrimRep PtrUnliftedSlot = UnliftedRep
+slotPrimRep Word64Slot      = Word64Rep
+slotPrimRep WordSlot        = WordRep
+slotPrimRep DoubleSlot      = DoubleRep
+slotPrimRep FloatSlot       = FloatRep
 
 -- | Returns the bigger type if one fits into the other. (commutative)
+--
+-- Note that lifted and unlifted pointers are *not* in a fits-in relation for
+-- the reasons described in Note [Don't merge lifted and unlifted slots] in
+-- GHC.Stg.Unarise.
 fitsIn :: SlotTy -> SlotTy -> Maybe SlotTy
 fitsIn ty1 ty2
+  | ty1 == ty2
+  = Just ty1
   | isWordSlot ty1 && isWordSlot ty2
   = Just (max ty1 ty2)
   | isFloatSlot ty1 && isFloatSlot ty2
   = Just (max ty1 ty2)
-  | isPtrSlot ty1 && isPtrSlot ty2
-  = Just PtrSlot
   | otherwise
   = Nothing
   where
-    isPtrSlot PtrSlot = True
-    isPtrSlot _       = False
-
     isWordSlot Word64Slot = True
     isWordSlot WordSlot   = True
     isWordSlot _          = False
diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs
index 5c1d2b5c5d..393455888f 100644
--- a/compiler/simplStg/UnariseStg.hs
+++ b/compiler/simplStg/UnariseStg.hs
@@ -124,6 +124,44 @@ Another example using the same type: (# | (# 2#, 3# #) | #). 2# fits in Word#,
 
   (# 2#, rubbish, 2#, 3# #).
 
+
+Note [Don't merge lifted and unlifted slots]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When merging slots, one might be tempted to collapse lifted and unlifted
+pointers. However, as seen in #19645, this is wrong. Imagine that you have
+the program:
+
+  test :: (# Char | ByteArray# #) -> ByteArray#
+  test (# c | #) = doSomething c
+  test (# | ba #) = ba
+
+Collapsing the Char and ByteArray# slots would produce STG like:
+
+  test :: forall {t}. (# t | GHC.Prim.ByteArray# #) -> GHC.Prim.ByteArray#
+    = {} \r [ (tag :: Int#) (slot0 :: (Any :: Type)) ]
+          case tag of tag'
+            1# -> doSomething slot0
+            2# -> slot0;
+
+Note how `slot0` has a lifted type, despite being bound to an unlifted
+ByteArray# in the 2# alternative. This liftedness would cause the code generator to
+attempt to enter it upon returning. As unlifted objects do not have entry code,
+this causes a runtime crash.
+
+For this reason, Unarise treats unlifted and lifted things as distinct slot
+types, despite both being GC pointers. This approach is a slight pessimisation
+(since we need to pass more arguments) but appears to be the simplest way to
+avoid #19645. Other alternatives considered include:
+
+ a. Giving unlifted objects "trivial" entry code. However, we ultimately
+    concluded that the value of the "unlifted things are never entered" invariant
+    outweighed the simplicity of this approach.
+
+ b. Annotating occurrences with calling convention information instead of
+    relying on the binder's type. This seemed like a very complicated
+    way to fix what is ultimately a corner-case.
+
+
 Note [Types in StgConApp]
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 Suppose we have this unboxed sum term:
@@ -578,7 +616,8 @@ mkUbxSum dc ty_args args0
         = slotRubbishArg slot : mkTupArgs (arg_idx + 1) slots_left arg_map
 
       slotRubbishArg :: SlotTy -> StgArg
-      slotRubbishArg PtrSlot    = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
+      slotRubbishArg PtrLiftedSlot    = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
+      slotRubbishArg PtrUnliftedSlot  = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
                          -- See Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore
       slotRubbishArg WordSlot   = StgLitArg (LitNumber LitNumWord 0 wordPrimTy)
       slotRubbishArg Word64Slot = StgLitArg (LitNumber LitNumWord64 0 word64PrimTy)
diff --git a/testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs b/testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
index 0b6384b6ba..49f66b9168 100644
--- a/testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
+++ b/testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
@@ -61,19 +61,19 @@ layout_tests = sequence_
       assert_layout "layout1"
         [ ubxtup [ intTy, intPrimTy ]
         , ubxtup [ intPrimTy, intTy ] ]
-        [ WordSlot, PtrSlot, WordSlot ]
+        [ WordSlot, PtrLiftedSlot, WordSlot ]
 
     layout2 =
       assert_layout "layout2"
         [ ubxtup [ intTy ]
         , intTy ]
-        [ WordSlot, PtrSlot ]
+        [ WordSlot, PtrLiftedSlot ]
 
     layout3 =
       assert_layout "layout3"
         [ ubxtup [ intTy, intPrimTy, intTy, intPrimTy ]
         , ubxtup [ intPrimTy, intTy, intPrimTy, intTy ] ]
-        [ WordSlot, PtrSlot, PtrSlot, WordSlot, WordSlot ]
+        [ WordSlot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
 
     layout4 =
       assert_layout "layout4"
author	Ben Gamari <ben@smart-cactus.org>	2021-04-06 19:06:14 -0400
committer	Ben Gamari <ben@smart-cactus.org>	2021-05-04 19:15:54 -0400
commit	52c2f2b9f00d08e8d1c034d2af8f75bae9891adf (patch)
tree	f4a9255a61e4f372ed46f55d8bc6e31cfa13b4bb
parent	63bc827ed40e66cd305e2162dfbbd3746be7bf21 (diff)
download	haskell-52c2f2b9f00d08e8d1c034d2af8f75bae9891adf.tar.gz