Fix #15038

We introduce a new Id for unused pointer values in unboxed sums that is not CAFFY. Because the Id is not CAFFY it doesn't make non-CAFFY definitions CAFFY, fixing #15038. To make sure anything referenced by the new id will be retained we get a stable pointer to in on RTS startup. Test Plan: Passes validate Reviewers: simonmar, simonpj, hvr, bgamari, erikd Reviewed By: simonmar Subscribers: rwbarton, thomie, carter GHC Trac Issues: #15038 Differential Revision: https://phabricator.haskell.org/D4680
author: Ömer Sinan Ağacan <omeragacan@gmail.com> 2018-05-10 11:13:37 +0300
committer: Ömer Sinan Ağacan <omeragacan@gmail.com> 2018-05-10 11:53:09 +0300
commit: b2ff5dde399cd012218578945ada1d9ff68daa35 (patch)
tree: 730a83ea4b91622894412f7882d5b2f1d5c7c6f2
parent: 5fe6aaa3756cda654374ebfd883fa8f064ff64a4 (diff)
download: haskell-b2ff5dde399cd012218578945ada1d9ff68daa35.tar.gz
14 files changed, 518 insertions, 9 deletions
diff --git a/compiler/coreSyn/MkCore.hs b/compiler/coreSyn/MkCore.hs
index 8291c01807..aad6d14a90 100644
--- a/compiler/coreSyn/MkCore.hs
+++ b/compiler/coreSyn/MkCore.hs
@@ -46,7 +46,7 @@ module MkCore (
         rEC_CON_ERROR_ID, rUNTIME_ERROR_ID,
         nON_EXHAUSTIVE_GUARDS_ERROR_ID, nO_METHOD_BINDING_ERROR_ID,
         pAT_ERROR_ID, rEC_SEL_ERROR_ID, aBSENT_ERROR_ID,
-        tYPE_ERROR_ID,
+        tYPE_ERROR_ID, aBSENT_SUM_FIELD_ERROR_ID
     ) where
 
 #include "HsVersions.h"
@@ -708,9 +708,12 @@ recSelErrorName, runtimeErrorName, absentErrorName :: Name
 recConErrorName, patErrorName :: Name
 nonExhaustiveGuardsErrorName, noMethodBindingErrorName :: Name
 typeErrorName :: Name
+absentSumFieldErrorName :: Name
 
 recSelErrorName     = err_nm "recSelError"     recSelErrorIdKey     rEC_SEL_ERROR_ID
 absentErrorName     = err_nm "absentError"     absentErrorIdKey     aBSENT_ERROR_ID
+absentSumFieldErrorName = err_nm "absentSumFieldError"  absentSumFieldErrorIdKey
+                            aBSENT_SUM_FIELD_ERROR_ID
 runtimeErrorName    = err_nm "runtimeError"    runtimeErrorIdKey    rUNTIME_ERROR_ID
 recConErrorName     = err_nm "recConError"     recConErrorIdKey     rEC_CON_ERROR_ID
 patErrorName        = err_nm "patError"        patErrorIdKey        pAT_ERROR_ID
@@ -726,7 +729,7 @@ err_nm str uniq id = mkWiredInIdName cONTROL_EXCEPTION_BASE (fsLit str) uniq id
 
 rEC_SEL_ERROR_ID, rUNTIME_ERROR_ID, rEC_CON_ERROR_ID :: Id
 pAT_ERROR_ID, nO_METHOD_BINDING_ERROR_ID, nON_EXHAUSTIVE_GUARDS_ERROR_ID :: Id
-tYPE_ERROR_ID, aBSENT_ERROR_ID :: Id
+tYPE_ERROR_ID, aBSENT_ERROR_ID, aBSENT_SUM_FIELD_ERROR_ID :: Id
 rEC_SEL_ERROR_ID                = mkRuntimeErrorId recSelErrorName
 rUNTIME_ERROR_ID                = mkRuntimeErrorId runtimeErrorName
 rEC_CON_ERROR_ID                = mkRuntimeErrorId recConErrorName
@@ -735,6 +738,35 @@ nO_METHOD_BINDING_ERROR_ID      = mkRuntimeErrorId noMethodBindingErrorName
 nON_EXHAUSTIVE_GUARDS_ERROR_ID  = mkRuntimeErrorId nonExhaustiveGuardsErrorName
 tYPE_ERROR_ID                   = mkRuntimeErrorId typeErrorName
 
+-- Note [aBSENT_SUM_FIELD_ERROR_ID]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Absent argument error for unused unboxed sum fields are different than absent
+-- error used in dummy worker functions (see `mkAbsentErrorApp`):
+--
+-- - `absentSumFieldError` can't take arguments because it's used in unarise for
+--   unused pointer fields in unboxed sums, and applying an argument would
+--   require allocating a thunk.
+--
+-- - `absentSumFieldError` can't be CAFFY because that would mean making some
+--   non-CAFFY definitions that use unboxed sums CAFFY in unarise.
+--
+--   To make `absentSumFieldError` non-CAFFY we get a stable pointer to it in
+--   RtsStartup.c and mark it as non-CAFFY here.
+--
+-- Getting this wrong causes hard-to-debug runtime issues, see #15038.
+--
+-- TODO: Remove stable pointer hack after fixing #9718.
+--       However, we should still be careful about not making things CAFFY just
+--       because they use unboxed sums. Unboxed objects are supposed to be
+--       efficient, and none of the other unboxed literals make things CAFFY.
+
+aBSENT_SUM_FIELD_ERROR_ID
+  = mkVanillaGlobalWithInfo absentSumFieldErrorName
+      (mkSpecForAllTys [alphaTyVar] (mkTyVarTy alphaTyVar)) -- forall a . a
+      (vanillaIdInfo `setStrictnessInfo` mkClosedStrictSig [] exnRes
+                     `setArityInfo` 0
+                     `setCafInfo` NoCafRefs) -- #15038
+
 mkRuntimeErrorId :: Name -> Id
 -- Error function
 --   with type:  forall (r:RuntimeRep) (a:TYPE r). Addr# -> a
diff --git a/compiler/prelude/PrelNames.hs b/compiler/prelude/PrelNames.hs
index 3f419ebeac..fd324cb43e 100644
--- a/compiler/prelude/PrelNames.hs
+++ b/compiler/prelude/PrelNames.hs
@@ -880,9 +880,6 @@ dataQual_RDR mod str = mkOrig mod (mkOccNameFS dataName str)
 Many of these Names are not really "built in", but some parts of the
 compiler (notably the deriving mechanism) need to mention their names,
 and it's convenient to write them all down in one place.
-
---MetaHaskell Extension  add the constrs and the lower case case
--- guys as well (perhaps) e.g. see  trueDataConName     below
 -}
 
 wildCardName :: Name
@@ -2084,7 +2081,8 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
     realWorldPrimIdKey, recConErrorIdKey,
     unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
     unpackCStringFoldrIdKey, unpackCStringIdKey,
-    typeErrorIdKey, divIntIdKey, modIntIdKey :: Unique
+    typeErrorIdKey, divIntIdKey, modIntIdKey,
+    absentSumFieldErrorIdKey :: Unique
 
 wildCardKey                   = mkPreludeMiscIdUnique  0  -- See Note [WildCard binders]
 absentErrorIdKey              = mkPreludeMiscIdUnique  1
@@ -2110,6 +2108,7 @@ voidPrimIdKey                 = mkPreludeMiscIdUnique 21
 typeErrorIdKey                = mkPreludeMiscIdUnique 22
 divIntIdKey                   = mkPreludeMiscIdUnique 23
 modIntIdKey                   = mkPreludeMiscIdUnique 24
+absentSumFieldErrorIdKey      = mkPreludeMiscIdUnique 9
 
 unsafeCoerceIdKey, concatIdKey, filterIdKey, zipIdKey, bindIOIdKey,
     returnIOIdKey, newStablePtrIdKey,
diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs
index 57dd699f70..bcf699b369 100644
--- a/compiler/simplStg/UnariseStg.hs
+++ b/compiler/simplStg/UnariseStg.hs
@@ -204,7 +204,7 @@ import DataCon
 import FastString (FastString, mkFastString)
 import Id
 import Literal (Literal (..), literalType)
-import MkCore (aBSENT_ERROR_ID)
+import MkCore (aBSENT_SUM_FIELD_ERROR_ID)
 import MkId (voidPrimId, voidArgId)
 import MonadUtils (mapAccumLM)
 import Outputable
@@ -577,7 +577,8 @@ mkUbxSum dc ty_args args0
         = slotRubbishArg slot : mkTupArgs (arg_idx + 1) slots_left arg_map
 
       slotRubbishArg :: SlotTy -> StgArg
-      slotRubbishArg PtrSlot    = StgVarArg aBSENT_ERROR_ID
+      slotRubbishArg PtrSlot    = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
+                         -- See Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore
       slotRubbishArg WordSlot   = StgLitArg (MachWord 0)
       slotRubbishArg Word64Slot = StgLitArg (MachWord64 0)
       slotRubbishArg FloatSlot  = StgLitArg (MachFloat 0)
diff --git a/libraries/base/Control/Exception/Base.hs b/libraries/base/Control/Exception/Base.hs
index e675e0c017..4b5d94ece7 100644
--- a/libraries/base/Control/Exception/Base.hs
+++ b/libraries/base/Control/Exception/Base.hs
@@ -95,7 +95,7 @@ module Control.Exception.Base (
         -- * Calls for GHC runtime
         recSelError, recConError, runtimeError,
         nonExhaustiveGuardsError, patError, noMethodBindingError,
-        absentError, typeError,
+        absentError, absentSumFieldError, typeError,
         nonTermination, nestedAtomically,
   ) where
 
@@ -398,3 +398,7 @@ nonTermination = toException NonTermination
 -- GHC's RTS calls this
 nestedAtomically :: SomeException
 nestedAtomically = toException NestedAtomically
+
+-- Introduced by unarise for unused unboxed sum fields
+absentSumFieldError :: a
+absentSumFieldError = absentError " in unboxed sum."#
diff --git a/rts/Prelude.h b/rts/Prelude.h
index 74b6b08227..6e5bf03bd6 100644
--- a/rts/Prelude.h
+++ b/rts/Prelude.h
@@ -45,6 +45,7 @@ PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactPinned_closure);
 PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactMutable_closure);
 PRELUDE_CLOSURE(base_ControlziExceptionziBase_nonTermination_closure);
 PRELUDE_CLOSURE(base_ControlziExceptionziBase_nestedAtomically_closure);
+PRELUDE_CLOSURE(base_ControlziExceptionziBase_absentSumFieldError_closure);
 PRELUDE_CLOSURE(base_GHCziEventziThread_blockedOnBadFD_closure);
 
 PRELUDE_CLOSURE(base_GHCziConcziSync_runSparks_closure);
@@ -99,6 +100,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info);
 #define nonTermination_closure    DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure)
 #define nestedAtomically_closure  DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nestedAtomically_closure)
 #define blockedOnBadFD_closure    DLL_IMPORT_DATA_REF(base_GHCziEventziThread_blockedOnBadFD_closure)
+#define absentSumFieldError_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_absentSumFieldError_closure)
 
 #define Czh_con_info              DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Czh_con_info)
 #define Izh_con_info              DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Izh_con_info)
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index e4ca6b906d..7eb98a8ba0 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -247,6 +247,10 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
     /* Add some GC roots for things in the base package that the RTS
      * knows about.  We don't know whether these turn out to be CAFs
      * or refer to CAFs, but we have to assume that they might.
+     *
+     * Because these stable pointers will retain any CAF references in
+     * these closures `Id`s of these can be safely marked as non-CAFFY
+     * in the compiler.
      */
     getStablePtr((StgPtr)runIO_closure);
     getStablePtr((StgPtr)runNonIO_closure);
@@ -265,6 +269,9 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
     getStablePtr((StgPtr)cannotCompactPinned_closure);
     getStablePtr((StgPtr)cannotCompactMutable_closure);
     getStablePtr((StgPtr)nestedAtomically_closure);
+    getStablePtr((StgPtr)absentSumFieldError_closure);
+        // `Id` for this closure is marked as non-CAFFY,
+        // see Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore.
 
     getStablePtr((StgPtr)runSparks_closure);
     getStablePtr((StgPtr)ensureIOManagerIsRunning_closure);
diff --git a/testsuite/tests/codeGen/should_run/T15038/Makefile b/testsuite/tests/codeGen/should_run/T15038/Makefile
new file mode 100644
index 0000000000..508494975b
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/Makefile
@@ -0,0 +1,9 @@
+TOP=../../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+.PHONY: T15038
+T15038:
+	'$(TEST_HC)' $(TEST_HC_OPTS) $(shell find . -iname '*.hs') -O2 -o Main \
+		-package containers -package ghc-prim -package primitive
+	./Main
diff --git a/testsuite/tests/codeGen/should_run/T15038/all.T b/testsuite/tests/codeGen/should_run/T15038/all.T
new file mode 100644
index 0000000000..6b284784ae
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/all.T
@@ -0,0 +1,4 @@
+test('T15038',
+     [reqlib('containers'), reqlib('ghc-prim'), reqlib('primitive')],
+     run_command,
+     ['$MAKE -s --no-print-directory T15038'])
diff --git a/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs b/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs
new file mode 100644
index 0000000000..a138615b2d
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs
@@ -0,0 +1,80 @@
+{-# LANGUAGE DeriveFunctor #-}
+
+module Data.Trie.Naive
+  ( Trie
+  , singleton
+  , singletonString
+  , lookup
+  , parser
+  , fromList
+  , fromListAppend
+  , fromStringList
+  ) where
+
+import Prelude hiding (lookup)
+
+import Data.Semigroup (Semigroup)
+import Data.Word (Word8)
+import Data.Map (Map)
+import Data.Bifunctor (second)
+import Packed.Bytes (Bytes)
+import qualified Data.Char
+import qualified GHC.OldList as L
+import qualified Packed.Bytes.Parser as P
+import qualified Packed.Bytes as B
+import qualified Data.Semigroup as SG
+import qualified Data.Map.Strict as M
+
+data Trie a = Trie (Maybe a) (Map Word8 (Trie a))
+  deriving (Functor)
+
+instance Semigroup a => Semigroup (Trie a) where
+  (<>) = append
+
+instance Semigroup a => Monoid (Trie a) where
+  mempty = Trie Nothing M.empty
+  mappend = (SG.<>)
+
+append :: Semigroup a => Trie a -> Trie a -> Trie a
+append (Trie v1 m1) (Trie v2 m2) = Trie
+  (SG.getOption (SG.Option v1 SG.<> SG.Option v2))
+  (M.unionWith append m1 m2)
+
+singleton :: Bytes -> a -> Trie a
+singleton k v = B.foldr (\b r -> Trie Nothing (M.singleton b r)) (Trie (Just v) M.empty) k
+
+singletonString :: String -> a -> Trie a
+singletonString k v = L.foldr (\c r -> Trie Nothing (M.singleton (c2w c) r)) (Trie (Just v) M.empty) k
+
+lookup :: Bytes -> Trie a -> Maybe a
+lookup k t0 = case B.foldr lookupStep (Just t0) k of
+  Nothing -> Nothing
+  Just (Trie v _) -> v
+
+lookupStep :: Word8 -> Maybe (Trie a) -> Maybe (Trie a)
+lookupStep w Nothing = Nothing
+lookupStep w (Just (Trie _ m)) = M.lookup w m
+
+parser :: Trie (P.Parser a) -> P.Parser a
+parser (Trie mp m) = case mp of
+  Just p -> p
+  Nothing -> do
+    w <- P.any
+    case M.lookup w m of
+      Nothing -> P.failure
+      Just t -> parser t
+
+fromList :: [(Bytes,a)] -> Trie a
+fromList = fmap SG.getFirst . fromListAppend . map (second SG.First)
+
+fromListAppend :: Semigroup a => [(Bytes,a)] -> Trie a
+fromListAppend = foldMap (uncurry singleton)
+
+fromStringList :: [(String,a)] -> Trie a
+fromStringList = fmap SG.getFirst . fromStringListAppend . map (second SG.First)
+
+fromStringListAppend :: Semigroup a => [(String,a)] -> Trie a
+fromStringListAppend = foldMap (uncurry singletonString)
+
+c2w :: Char -> Word8
+c2w = fromIntegral . Data.Char.ord
diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs
new file mode 100644
index 0000000000..224e03f75d
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs
@@ -0,0 +1,80 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+{-# OPTIONS_GHC
+ -fno-warn-unsafe
+ -fno-warn-implicit-prelude
+ -fno-warn-missing-import-lists
+ -O2
+#-}
+
+module Packed.Bytes
+  ( Bytes(..)
+  , pack
+  , unpack
+  , length
+    -- * Folds
+  , foldr
+    -- * Unsliced Byte Arrays
+  , fromByteArray
+  ) where
+
+import Prelude hiding (take,length,replicate,drop,null,concat,foldr)
+
+import Data.Primitive (ByteArray(..))
+import Data.Word (Word8)
+import Control.Monad.ST (runST, ST)
+import qualified Data.Primitive as PM
+import qualified GHC.OldList as L
+
+data Bytes = Bytes
+  {-# UNPACK #-} !ByteArray -- payload
+  {-# UNPACK #-} !Int -- offset
+  {-# UNPACK #-} !Int -- length
+
+instance Show Bytes where
+  show x = "pack " ++ show (unpack x)
+
+pack :: [Word8] -> Bytes
+pack bs = let arr = packByteArray bs in Bytes arr 0 (lengthByteArray arr)
+
+unpack :: Bytes -> [Word8]
+unpack (Bytes arr off len) = go off
+  where
+  go :: Int -> [Word8]
+  go !ix = if ix < len + off
+    then PM.indexByteArray arr ix : go (ix + 1)
+    else []
+
+fromByteArray :: ByteArray -> Bytes
+fromByteArray ba = Bytes ba 0 (lengthByteArray ba)
+
+length :: Bytes -> Int
+length (Bytes _ _ len) = len
+
+foldr :: (Word8 -> a -> a) -> a -> Bytes -> a
+foldr f a0 (Bytes arr off0 len) = go off0 where
+  !end = off0 + len
+  go !ix = if ix < end
+    then f (PM.indexByteArray arr ix) (go (ix + 1))
+    else a0
+
+packByteArray :: [Word8] -> ByteArray
+packByteArray ws0 = runST $ do
+  marr <- PM.newByteArray (L.length ws0)
+  let go [] !_ = return ()
+      go (w : ws) !ix = PM.writeByteArray marr ix w >> go ws (ix + 1)
+  go ws0 0
+  PM.unsafeFreezeByteArray marr
+
+unpackByteArray :: ByteArray -> [Word8]
+unpackByteArray arr = go 0 where
+  go :: Int -> [Word8]
+  go !ix = if ix < lengthByteArray arr
+    then PM.indexByteArray arr ix : go (ix + 1)
+    else []
+
+lengthByteArray :: ByteArray -> Int
+lengthByteArray = PM.sizeofByteArray
diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs
new file mode 100644
index 0000000000..3f9c42ad52
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs
@@ -0,0 +1,165 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeInType #-}
+{-# LANGUAGE UnboxedSums #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+{-# OPTIONS_GHC
+ -Weverything
+ -fno-warn-unsafe
+ -fno-warn-implicit-prelude
+ -fno-warn-missing-import-lists
+ -fno-warn-noncanonical-monoid-instances
+ -O2
+#-}
+
+module Packed.Bytes.Parser
+  ( Parser(..)
+  , Result(..)
+  , Leftovers(..)
+  , parseStreamST
+  , any
+  , failure
+  ) where
+
+import Control.Applicative
+import Data.Primitive (ByteArray(..))
+import GHC.Int (Int(I#))
+import GHC.ST (ST(..),runST)
+import GHC.Types (TYPE)
+import GHC.Word (Word8(W8#))
+import Packed.Bytes (Bytes(..))
+import Packed.Bytes.Stream.ST (ByteStream(..))
+import Prelude hiding (any,replicate)
+
+import qualified Data.Primitive as PM
+import qualified Control.Monad
+
+import GHC.Exts (Int#,ByteArray#,Word#,State#,(+#),(-#),(>#),indexWord8Array#)
+
+type Bytes# = (# ByteArray#, Int#, Int# #)
+type Maybe# (a :: TYPE r) = (# (# #) | a #)
+type Leftovers# s = (# Bytes# , ByteStream s #)
+type Result# s a = (# Maybe# (Leftovers# s), Maybe# a #)
+
+data Result s a = Result
+  { resultLeftovers :: !(Maybe (Leftovers s))
+  , resultValue :: !(Maybe a)
+  }
+
+data Leftovers s = Leftovers
+  { leftoversChunk :: {-# UNPACK #-} !Bytes
+    -- ^ The last chunk pulled from the stream
+  , leftoversStream :: ByteStream s
+    -- ^ The remaining stream
+  }
+
+data PureResult a = PureResult
+  { pureResultLeftovers :: {-# UNPACK #-} !Bytes
+  , pureResultValue :: !(Maybe a)
+  } deriving (Show)
+
+emptyByteArray :: ByteArray
+emptyByteArray = runST (PM.newByteArray 0 >>= PM.unsafeFreezeByteArray)
+
+parseStreamST :: ByteStream s -> Parser a -> ST s (Result s a)
+parseStreamST stream (Parser f) = ST $ \s0 ->
+  case f (# | (# (# unboxByteArray emptyByteArray, 0#, 0# #), stream #) #) s0 of
+    (# s1, r #) -> (# s1, boxResult r #)
+
+boxResult :: Result# s a -> Result s a
+boxResult (# leftovers, val #) = case val of
+  (# (# #) | #) -> Result (boxLeftovers leftovers) Nothing
+  (# | a #) -> Result (boxLeftovers leftovers) (Just a)
+
+boxLeftovers :: Maybe# (Leftovers# s) -> Maybe (Leftovers s)
+boxLeftovers (# (# #) | #) = Nothing
+boxLeftovers (# | (# theBytes, stream #) #) = Just (Leftovers (boxBytes theBytes) stream)
+
+instance Functor Parser where
+  fmap = mapParser
+
+-- Remember to write liftA2 by hand at some point.
+instance Applicative Parser where
+  pure = pureParser
+  (<*>) = Control.Monad.ap
+
+instance Monad Parser where
+  return = pure
+  (>>=) = bindLifted
+
+newtype Parser a = Parser
+  { getParser :: forall s.
+       Maybe# (Leftovers# s)
+    -> State# s
+    -> (# State# s, Result# s a #)
+  }
+
+nextNonEmpty :: ByteStream s -> State# s -> (# State# s, Maybe# (Leftovers# s) #)
+nextNonEmpty (ByteStream f) s0 = case f s0 of
+  (# s1, r #) -> case r of
+    (# (# #) | #) -> (# s1, (# (# #) | #) #)
+    (# | (# theBytes@(# _,_,len #), stream #) #) -> case len of
+      0# -> nextNonEmpty stream s1
+      _ -> (# s1, (# | (# theBytes, stream #) #) #)
+
+withNonEmpty :: forall s b.
+     Maybe# (Leftovers# s)
+  -> State# s
+  -> (State# s -> (# State# s, Result# s b #))
+  -> (Word# -> Bytes# -> ByteStream s -> State# s -> (# State# s, Result# s b #))
+     -- The first argument is a Word8, not a full machine word.
+     -- The second argument is the complete,non-empty chunk
+     -- with the head byte still intact.
+  -> (# State# s, Result# s b #)
+withNonEmpty (# (# #) | #) s0 g _ = g s0
+withNonEmpty (# | (# bytes0@(# arr0,off0,len0 #), stream0 #) #) s0 g f = case len0 ># 0# of
+  1# -> f (indexWord8Array# arr0 off0) bytes0 stream0 s0
+  _ -> case nextNonEmpty stream0 s0 of
+    (# s1, r #) -> case r of
+      (# (# #) | #) -> g s1
+      (# | (# bytes1@(# arr1, off1, _ #), stream1 #) #) ->
+        f (indexWord8Array# arr1 off1) bytes1 stream1 s1
+
+-- | Consume the next byte from the input.
+any :: Parser Word8
+any = Parser go where
+  go :: Maybe# (Leftovers# s) -> State# s -> (# State# s, Result# s Word8 #)
+  go m s0 = withNonEmpty m s0
+    (\s -> (# s, (# (# (# #) | #), (# (# #) | #) #) #))
+    (\theByte theBytes stream s ->
+      (# s, (# (# | (# unsafeDrop# 1# theBytes, stream #) #), (# | W8# theByte #) #) #)
+    )
+
+-- TODO: improve this
+mapParser :: (a -> b) -> Parser a -> Parser b
+mapParser f p = bindLifted p (pureParser . f)
+
+pureParser :: a -> Parser a
+pureParser a = Parser $ \leftovers0 s0 ->
+  (# s0, (# leftovers0, (# | a #) #) #)
+
+bindLifted :: Parser a -> (a -> Parser b) -> Parser b
+bindLifted (Parser f) g = Parser $ \leftovers0 s0 -> case f leftovers0 s0 of
+  (# s1, (# leftovers1, val #) #) -> case val of
+    (# (# #) | #) -> (# s1, (# leftovers1, (# (# #) | #) #) #)
+    (# | x #) -> case g x of
+      Parser k -> k leftovers1 s1
+
+-- This assumes that the Bytes is longer than the index. It also does
+-- not eliminate zero-length references to byte arrays.
+unsafeDrop# :: Int# -> Bytes# -> Bytes#
+unsafeDrop# i (# arr, off, len #) = (# arr, off +# i, len -# i #)
+
+unboxByteArray :: ByteArray -> ByteArray#
+unboxByteArray (ByteArray arr) = arr
+
+boxBytes :: Bytes# -> Bytes
+boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c)
+
+failure :: Parser a
+failure = Parser (\m s -> (# s, (# m, (# (# #) | #) #) #))
diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs
new file mode 100644
index 0000000000..ffba9c2596
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs
@@ -0,0 +1,61 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnboxedSums #-}
+
+{-# OPTIONS_GHC -O2 #-}
+
+module Packed.Bytes.Stream.ST
+  ( ByteStream(..)
+  , empty
+  , unpack
+  , fromBytes
+  ) where
+
+import Data.Primitive (Array,ByteArray(..))
+import Data.Semigroup (Semigroup)
+import Data.Word (Word8)
+import GHC.Exts (RealWorld,State#,Int#,ByteArray#)
+import GHC.Int (Int(I#))
+import GHC.ST (ST(..))
+import Packed.Bytes (Bytes(..))
+import System.IO (Handle)
+import qualified Data.Primitive as PM
+import qualified Data.Semigroup as SG
+import qualified Packed.Bytes as B
+
+type Bytes# = (# ByteArray#, Int#, Int# #)
+
+newtype ByteStream s = ByteStream
+  (State# s -> (# State# s, (# (# #) | (# Bytes# , ByteStream s #) #) #) )
+
+fromBytes :: Bytes -> ByteStream s
+fromBytes b = ByteStream
+  (\s0 -> (# s0, (# | (# unboxBytes b, empty #) #) #))
+
+nextChunk :: ByteStream s -> ST s (Maybe (Bytes,ByteStream s))
+nextChunk (ByteStream f) = ST $ \s0 -> case f s0 of
+  (# s1, r #) -> case r of
+    (# (# #) | #) -> (# s1, Nothing #)
+    (# | (# theBytes, theStream #) #) -> (# s1, Just (boxBytes theBytes, theStream) #)
+
+empty :: ByteStream s
+empty = ByteStream (\s -> (# s, (# (# #) | #) #) )
+
+boxBytes :: Bytes# -> Bytes
+boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c)
+
+unboxBytes :: Bytes -> Bytes#
+unboxBytes (Bytes (ByteArray a) (I# b) (I# c)) = (# a,b,c #)
+
+unpack :: ByteStream s -> ST s [Word8]
+unpack stream = ST (unpackInternal stream)
+
+unpackInternal :: ByteStream s -> State# s -> (# State# s, [Word8] #)
+unpackInternal (ByteStream f) s0 = case f s0 of
+  (# s1, r #) -> case r of
+    (# (# #) | #) -> (# s1, [] #)
+    (# | (# bytes, stream #) #) -> case unpackInternal stream s1 of
+      (# s2, ws #) -> (# s2, B.unpack (boxBytes bytes) ++ ws #)
diff --git a/testsuite/tests/codeGen/should_run/T15038/test/Main.hs b/testsuite/tests/codeGen/should_run/T15038/test/Main.hs
new file mode 100644
index 0000000000..56acd042db
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/test/Main.hs
@@ -0,0 +1,4 @@
+import qualified Parser as Parser
+
+main :: IO ()
+main = print (iterate Parser.byteParserBadOnce 5 !! 100000)
diff --git a/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs b/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs
new file mode 100644
index 0000000000..70f9f3336b
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs
@@ -0,0 +1,61 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeInType #-}
+{-# LANGUAGE UnboxedSums #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+{-# OPTIONS_GHC -Wall #-}
+
+module Parser
+  ( byteParserBadOnce
+  ) where
+
+import Control.Monad.ST (runST)
+import Data.Word (Word8)
+import Packed.Bytes (Bytes)
+import Packed.Bytes.Parser (Parser)
+import Packed.Bytes.Stream.ST (ByteStream)
+import qualified Data.Char
+import qualified Packed.Bytes as B
+import qualified Packed.Bytes.Parser as P
+import qualified Packed.Bytes.Stream.ST as Stream
+
+-- from common directory
+import qualified Data.Trie.Naive as Naive
+
+snmptrapdNaive :: Naive.Trie (Parser Word)
+snmptrapdNaive = Naive.fromStringList
+  [ ("STRING: ", P.any >>= \_ -> return 5)
+  ]
+
+runExampleParser :: Parser a -> (forall s. ByteStream s) -> (Maybe a, Maybe String)
+runExampleParser parser stream = runST $ do
+  P.Result mleftovers r <- P.parseStreamST stream parser
+  mextra <- case mleftovers of
+    Nothing -> return Nothing
+    Just (P.Leftovers chunk remainingStream) -> do
+      bs <- Stream.unpack remainingStream
+      return (Just (map word8ToChar (B.unpack chunk ++ bs)))
+  return (r,mextra)
+
+byteParserBadOnce :: Int -> Int
+byteParserBadOnce x = do
+  let sample = ("STRING: _6_ " ++ show x)
+      stream = Stream.fromBytes (s2b sample)
+      expected = 6
+      (r,mextra) = runExampleParser (Naive.parser snmptrapdNaive) stream
+      a1 = if Nothing == mextra then 1 else 0
+      a2 = if Just expected == r then 1 else 0
+   in a1 + (a2 + x)
+
+s2b :: String -> Bytes
+s2b = B.pack . map charToWord8
+
+charToWord8 :: Char -> Word8
+charToWord8 = fromIntegral . Data.Char.ord
+
+word8ToChar :: Word8 -> Char
+word8ToChar = Data.Char.chr . fromIntegral
author	Ömer Sinan Ağacan <omeragacan@gmail.com>	2018-05-10 11:13:37 +0300
committer	Ömer Sinan Ağacan <omeragacan@gmail.com>	2018-05-10 11:53:09 +0300
commit	b2ff5dde399cd012218578945ada1d9ff68daa35 (patch)
tree	730a83ea4b91622894412f7882d5b2f1d5c7c6f2
parent	5fe6aaa3756cda654374ebfd883fa8f064ff64a4 (diff)
download	haskell-b2ff5dde399cd012218578945ada1d9ff68daa35.tar.gz