From b6dc079489950741808b12ee4bafbf60e837ffd5 Mon Sep 17 00:00:00 2001 From: Krzysztof Gogolewski Date: Mon, 20 Feb 2023 19:19:17 +0100 Subject: Fixes around unsafeCoerce# 1. `unsafeCoerce#` was documented in `GHC.Prim`. But since the overhaul in 74ad75e87317, `unsafeCoerce#` is no longer defined there. I've combined the documentation in `GHC.Prim` with the `Unsafe.Coerce` module. 2. The documentation of `unsafeCoerce#` stated that you should not cast a function to an algebraic type, even if you later cast it back before applying it. But ghci was doing that type of cast, as can be seen with 'ghci -ddump-ds' and typing 'x = not'. I've changed it to use Any following the documentation. --- compiler/GHC/Builtin/primops.txt.pp | 44 --------------------------- compiler/GHC/Tc/Module.hs | 26 ++++++++++------ libraries/base/Unsafe/Coerce.hs | 30 +++++++++++++++--- testsuite/tests/ghci/should_run/T16096.stdout | 24 ++++++++++----- testsuite/tests/ghci/should_run/T21052.stdout | 4 +-- 5 files changed, 59 insertions(+), 69 deletions(-) diff --git a/compiler/GHC/Builtin/primops.txt.pp b/compiler/GHC/Builtin/primops.txt.pp index e4283b2dda..e99e5dc3a3 100644 --- a/compiler/GHC/Builtin/primops.txt.pp +++ b/compiler/GHC/Builtin/primops.txt.pp @@ -3845,50 +3845,6 @@ pseudoop "seq" -- This fixity is only the one picked up by Haddock. If you -- change this, do update 'ghcPrimIface' in 'GHC.Iface.Load'. -pseudoop "unsafeCoerce#" - o -> p - { The function 'unsafeCoerce#' allows you to side-step the typechecker entirely. That - is, it allows you to coerce any type into any other type. If you use this function, - you had better get it right, otherwise segmentation faults await. It is generally - used when you want to write a program that you know is well-typed, but where Haskell's - type system is not expressive enough to prove that it is well typed. - - The following uses of 'unsafeCoerce#' are supposed to work (i.e. not lead to - spurious compile-time or run-time crashes): - - * Casting any lifted type to 'Any' - - * Casting 'Any' back to the real type - - * Casting an unboxed type to another unboxed type of the same size. - (Casting between floating-point and integral types does not work. - See the "GHC.Float" module for functions to do work.) - - * Casting between two types that have the same runtime representation. One case is when - the two types differ only in "phantom" type parameters, for example - @'Ptr' 'Int'@ to @'Ptr' 'Float'@, or @['Int']@ to @['Float']@ when the list is - known to be empty. Also, a @newtype@ of a type @T@ has the same representation - at runtime as @T@. - - Other uses of 'unsafeCoerce#' are undefined. In particular, you should not use - 'unsafeCoerce#' to cast a T to an algebraic data type D, unless T is also - an algebraic data type. For example, do not cast @'Int'->'Int'@ to 'Bool', even if - you later cast that 'Bool' back to @'Int'->'Int'@ before applying it. The reasons - have to do with GHC's internal representation details (for the cognoscenti, data values - can be entered but function closures cannot). If you want a safe type to cast things - to, use 'Any', which is not an algebraic data type. - - } - with can_fail = True - --- NB. It is tempting to think that casting a value to a type that it doesn't have is safe --- as long as you don't "do anything" with the value in its cast form, such as seq on it. This --- isn't the case: the compiler can insert seqs itself, and if these happen at the wrong type, --- Bad Things Might Happen. See bug #1616: in this case we cast a function of type (a,b) -> (a,b) --- to () -> () and back again. The strictness analyser saw that the function was strict, but --- the wrapper had type () -> (), and hence the wrapper de-constructed the (), the worker re-constructed --- a new (), with the result that the code ended up with "case () of (a,b) -> ...". - primop TraceEventOp "traceEvent#" GenPrimOp Addr# -> State# s -> State# s { Emits an event via the RTS tracing framework. The contents diff --git a/compiler/GHC/Tc/Module.hs b/compiler/GHC/Tc/Module.hs index f877e006b8..4b2c3969aa 100644 --- a/compiler/GHC/Tc/Module.hs +++ b/compiler/GHC/Tc/Module.hs @@ -103,7 +103,7 @@ import GHC.Iface.Env ( externaliseName ) import GHC.Iface.Make ( coAxiomToIfaceDecl ) import GHC.Iface.Load -import GHC.Builtin.Types ( unitTy, mkListTy ) +import GHC.Builtin.Types ( mkListTy, anyTypeOfKind ) import GHC.Builtin.Names import GHC.Builtin.Utils @@ -2172,8 +2172,8 @@ We don't bother with the tcl_th_bndrs environment either. -- | The returned [Id] is the list of new Ids bound by this statement. It can -- be used to extend the InteractiveContext via extendInteractiveContext. -- --- The returned TypecheckedHsExpr is of type IO [ () ], a list of the bound --- values, coerced to (). +-- The returned TypecheckedHsExpr is of type IO [ Any ], a list of the bound +-- values, coerced to Any. tcRnStmt :: HscEnv -> GhciLStmt GhcPs -> IO (Messages TcRnMessage, Maybe ([Id], LHsExpr GhcTc, FixityEnv)) tcRnStmt hsc_env rdr_stmt @@ -2467,13 +2467,16 @@ The reason for -fno-it is explained in #14336. `it` can lead to the repl leaking memory as it is repeatedly queried. -} +any_lifted :: Type +any_lifted = anyTypeOfKind liftedTypeKind + -- | Typecheck the statements given and then return the results of the --- statement in the form 'IO [()]'. +-- statement in the form 'IO [Any]'. tcGhciStmts :: [GhciLStmt GhcRn] -> TcM PlanResult tcGhciStmts stmts = do { ioTyCon <- tcLookupTyCon ioTyConName ; ret_id <- tcLookupId returnIOName -- return @ IO - ; let ret_ty = mkListTy unitTy + ; let ret_ty = mkListTy any_lifted io_ret_ty = mkTyConApp ioTyCon [ret_ty] tc_io_stmts = tcStmtsAndThen (HsDoStmt GhciStmtCtxt) tcDoStmt stmts (mkCheckExpType io_ret_ty) @@ -2496,28 +2499,31 @@ tcGhciStmts stmts ; traceTc "GHC.Tc.Module.tcGhciStmts: done" empty -- ret_expr is the expression - -- returnIO @[()] [unsafeCoerce# () x, .., unsafeCoerce# () z] + -- returnIO @[Any] [unsafeCoerce# @Any x, .., unsafeCoerce# @Any z] -- -- Despite the inconvenience of building the type applications etc, -- this *has* to be done in type-annotated post-typecheck form -- because we are going to return a list of *polymorphic* values - -- coerced to type (). If we built a *source* stmt + -- coerced to type Any. If we built a *source* stmt -- return [coerce x, ..., coerce z] -- then the type checker would instantiate x..z, and we wouldn't -- get their *polymorphic* values. (And we'd get ambiguity errs -- if they were overloaded, since they aren't applied to anything.) + -- + -- We use Any rather than a dummy type such as () because of + -- the rules of unsafeCoerce#; see Unsafe/Coerce.hs for the details. ; AnId unsafe_coerce_id <- tcLookupGlobal unsafeCoercePrimName -- We use unsafeCoerce# here because of (U11) in -- Note [Implementing unsafeCoerce] in base:Unsafe.Coerce ; let ret_expr = nlHsApp (nlHsTyApp ret_id [ret_ty]) $ - noLocA $ ExplicitList unitTy $ + noLocA $ ExplicitList any_lifted $ map mk_item ids mk_item id = unsafe_coerce_id `nlHsTyApp` [ getRuntimeRep (idType id) - , getRuntimeRep unitTy - , idType id, unitTy] + , getRuntimeRep any_lifted + , idType id, any_lifted] `nlHsApp` nlHsVar id stmts = tc_stmts ++ [noLocA (mkLastStmt ret_expr)] diff --git a/libraries/base/Unsafe/Coerce.hs b/libraries/base/Unsafe/Coerce.hs index 76c010a0bf..23820f0f48 100644 --- a/libraries/base/Unsafe/Coerce.hs +++ b/libraries/base/Unsafe/Coerce.hs @@ -244,11 +244,11 @@ unsafeEqualityProof = case unsafeEqualityProof @a @b of UnsafeRefl -> UnsafeRefl -- Why delay inlining to Phase 1? Because of the RULES for map/unsafeCoerce; -- see (U8) in Note [Implementing unsafeCoerce] --- | Coerce a value from one type to another, bypassing the type-checker. +-- | `unsafeCoerce` coerces a value from one type to another, bypassing the type-checker. -- -- There are several legitimate ways to use 'unsafeCoerce': -- --- 1. To coerce e.g. @Int@ to @HValue@, put it in a list of @HValue@, +-- 1. To coerce a lifted type such as @Int@ to @Any@, put it in a list of @Any@, -- and then later coerce it back to @Int@ before using it. -- -- 2. To produce e.g. @(a+b) :~: (b+a)@ from @unsafeCoerce Refl@. @@ -269,15 +269,35 @@ unsafeEqualityProof = case unsafeEqualityProof @a @b of UnsafeRefl -> UnsafeRefl -- are the same -- but the proof of that relies on the complex, trusted -- implementation of @Typeable@. -- --- 4. The "reflection trick", which takes advantage of the fact that in +-- 4. (superseded) The "reflection trick", which takes advantage of the fact that in -- @class C a where { op :: ty }@, we can safely coerce between @C a@ and @ty@ -- (which have different kinds!) because it's really just a newtype. -- Note: there is /no guarantee, at all/ that this behavior will be supported -- into perpetuity. +-- It is now preferred to use `withDict` in @GHC.Magic.Dict@, which +-- is type-safe. See Note [withDict] in GHC.Tc.Instance.Class for details. -- +-- 5. (superseded) Casting between two types which have exactly the same structure: +-- between a newtype of T and T, or between types which differ only +-- in "phantom" type parameters. +-- It is now preferred to use `coerce` from @Data.Coerce@, which +-- is type-safe. -- --- For safe zero-cost coercions you can instead use the 'Data.Coerce.coerce' function from --- "Data.Coerce". +-- Other uses of 'unsafeCoerce' are undefined. In particular, you should not use +-- 'unsafeCoerce' to cast a T to an algebraic data type D, unless T is also +-- an algebraic data type. For example, do not cast @'Int'->'Int'@ to 'Bool', even if +-- you later cast that 'Bool' back to @'Int'->'Int'@ before applying it. The reasons +-- have to do with GHC's internal representation details (for the cognoscenti, data values +-- can be entered but function closures cannot). If you want a safe type to cast things +-- to, use 'Any', which is not an algebraic data type. + +-- NB. It is tempting to think that casting a value to a type that it doesn't have is safe +-- as long as you don't "do anything" with the value in its cast form, such as seq on it. This +-- isn't the case: the compiler can insert seqs itself, and if these happen at the wrong type, +-- Bad Things Might Happen. See bug #1616: in this case we cast a function of type (a,b) -> (a,b) +-- to () -> () and back again. The strictness analyser saw that the function was strict, but +-- the wrapper had type () -> (), and hence the wrapper de-constructed the (), the worker re-constructed +-- a new (), with the result that the code ended up with "case () of (a,b) -> ...". unsafeCoerce :: forall (a :: Type) (b :: Type) . a -> b unsafeCoerce x = case unsafeEqualityProof @a @b of UnsafeRefl -> x diff --git a/testsuite/tests/ghci/should_run/T16096.stdout b/testsuite/tests/ghci/should_run/T16096.stdout index 2749f06586..d5c11d696b 100644 --- a/testsuite/tests/ghci/should_run/T16096.stdout +++ b/testsuite/tests/ghci/should_run/T16096.stdout @@ -13,12 +13,16 @@ letrec { x = GHC.Enum.enumFrom @GHC.Types.Int $dEnum (GHC.Types.I# 1#); } in x; } in GHC.Base.returnIO - @[()] + @[GHC.Types.Any] (GHC.Types.: - @() + @GHC.Types.Any (Unsafe.Coerce.unsafeCoerce# - @GHC.Types.LiftedRep @GHC.Types.LiftedRep @[GHC.Types.Int] @() x) - (GHC.Types.[] @())) + @GHC.Types.LiftedRep + @GHC.Types.LiftedRep + @[GHC.Types.Int] + @GHC.Types.Any + x) + (GHC.Types.[] @GHC.Types.Any)) @@ -36,11 +40,15 @@ letrec { x = GHC.Enum.enumFrom @GHC.Types.Int $dEnum (GHC.Types.I# 1#); } in x; } in GHC.Base.returnIO - @[()] + @[GHC.Types.Any] (GHC.Types.: - @() + @GHC.Types.Any (Unsafe.Coerce.unsafeCoerce# - @GHC.Types.LiftedRep @GHC.Types.LiftedRep @[GHC.Types.Int] @() x) - (GHC.Types.[] @())) + @GHC.Types.LiftedRep + @GHC.Types.LiftedRep + @[GHC.Types.Int] + @GHC.Types.Any + x) + (GHC.Types.[] @GHC.Types.Any)) diff --git a/testsuite/tests/ghci/should_run/T21052.stdout b/testsuite/tests/ghci/should_run/T21052.stdout index 3822a96b6e..cb1a1a16d4 100644 --- a/testsuite/tests/ghci/should_run/T21052.stdout +++ b/testsuite/tests/ghci/should_run/T21052.stdout @@ -1,10 +1,10 @@ ==================== CodeGenInput STG: ==================== -BCO_toplevel :: GHC.Types.IO [()] +BCO_toplevel :: GHC.Types.IO [GHC.Types.Any] [LclId] = {} \u [] let { - sat :: [()] + sat :: [GHC.Types.Any] [LclId] = :! [GHC.Tuple.Prim.() GHC.Types.[]]; } in GHC.Base.returnIO sat; -- cgit v1.2.1