diff options
author | Ömer Sinan Ağacan <omeragacan@gmail.com> | 2018-05-10 11:13:37 +0300 |
---|---|---|
committer | Ömer Sinan Ağacan <omeragacan@gmail.com> | 2018-05-10 11:53:09 +0300 |
commit | b2ff5dde399cd012218578945ada1d9ff68daa35 (patch) | |
tree | 730a83ea4b91622894412f7882d5b2f1d5c7c6f2 | |
parent | 5fe6aaa3756cda654374ebfd883fa8f064ff64a4 (diff) | |
download | haskell-b2ff5dde399cd012218578945ada1d9ff68daa35.tar.gz |
Fix #15038
We introduce a new Id for unused pointer values in unboxed sums that is
not CAFFY. Because the Id is not CAFFY it doesn't make non-CAFFY
definitions CAFFY, fixing #15038.
To make sure anything referenced by the new id will be retained we get a
stable pointer to in on RTS startup.
Test Plan: Passes validate
Reviewers: simonmar, simonpj, hvr, bgamari, erikd
Reviewed By: simonmar
Subscribers: rwbarton, thomie, carter
GHC Trac Issues: #15038
Differential Revision: https://phabricator.haskell.org/D4680
-rw-r--r-- | compiler/coreSyn/MkCore.hs | 36 | ||||
-rw-r--r-- | compiler/prelude/PrelNames.hs | 7 | ||||
-rw-r--r-- | compiler/simplStg/UnariseStg.hs | 5 | ||||
-rw-r--r-- | libraries/base/Control/Exception/Base.hs | 6 | ||||
-rw-r--r-- | rts/Prelude.h | 2 | ||||
-rw-r--r-- | rts/RtsStartup.c | 7 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T15038/Makefile | 9 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T15038/all.T | 4 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs | 80 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs | 80 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs | 165 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs | 61 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T15038/test/Main.hs | 4 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T15038/test/Parser.hs | 61 |
14 files changed, 518 insertions, 9 deletions
diff --git a/compiler/coreSyn/MkCore.hs b/compiler/coreSyn/MkCore.hs index 8291c01807..aad6d14a90 100644 --- a/compiler/coreSyn/MkCore.hs +++ b/compiler/coreSyn/MkCore.hs @@ -46,7 +46,7 @@ module MkCore ( rEC_CON_ERROR_ID, rUNTIME_ERROR_ID, nON_EXHAUSTIVE_GUARDS_ERROR_ID, nO_METHOD_BINDING_ERROR_ID, pAT_ERROR_ID, rEC_SEL_ERROR_ID, aBSENT_ERROR_ID, - tYPE_ERROR_ID, + tYPE_ERROR_ID, aBSENT_SUM_FIELD_ERROR_ID ) where #include "HsVersions.h" @@ -708,9 +708,12 @@ recSelErrorName, runtimeErrorName, absentErrorName :: Name recConErrorName, patErrorName :: Name nonExhaustiveGuardsErrorName, noMethodBindingErrorName :: Name typeErrorName :: Name +absentSumFieldErrorName :: Name recSelErrorName = err_nm "recSelError" recSelErrorIdKey rEC_SEL_ERROR_ID absentErrorName = err_nm "absentError" absentErrorIdKey aBSENT_ERROR_ID +absentSumFieldErrorName = err_nm "absentSumFieldError" absentSumFieldErrorIdKey + aBSENT_SUM_FIELD_ERROR_ID runtimeErrorName = err_nm "runtimeError" runtimeErrorIdKey rUNTIME_ERROR_ID recConErrorName = err_nm "recConError" recConErrorIdKey rEC_CON_ERROR_ID patErrorName = err_nm "patError" patErrorIdKey pAT_ERROR_ID @@ -726,7 +729,7 @@ err_nm str uniq id = mkWiredInIdName cONTROL_EXCEPTION_BASE (fsLit str) uniq id rEC_SEL_ERROR_ID, rUNTIME_ERROR_ID, rEC_CON_ERROR_ID :: Id pAT_ERROR_ID, nO_METHOD_BINDING_ERROR_ID, nON_EXHAUSTIVE_GUARDS_ERROR_ID :: Id -tYPE_ERROR_ID, aBSENT_ERROR_ID :: Id +tYPE_ERROR_ID, aBSENT_ERROR_ID, aBSENT_SUM_FIELD_ERROR_ID :: Id rEC_SEL_ERROR_ID = mkRuntimeErrorId recSelErrorName rUNTIME_ERROR_ID = mkRuntimeErrorId runtimeErrorName rEC_CON_ERROR_ID = mkRuntimeErrorId recConErrorName @@ -735,6 +738,35 @@ nO_METHOD_BINDING_ERROR_ID = mkRuntimeErrorId noMethodBindingErrorName nON_EXHAUSTIVE_GUARDS_ERROR_ID = mkRuntimeErrorId nonExhaustiveGuardsErrorName tYPE_ERROR_ID = mkRuntimeErrorId typeErrorName +-- Note [aBSENT_SUM_FIELD_ERROR_ID] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Absent argument error for unused unboxed sum fields are different than absent +-- error used in dummy worker functions (see `mkAbsentErrorApp`): +-- +-- - `absentSumFieldError` can't take arguments because it's used in unarise for +-- unused pointer fields in unboxed sums, and applying an argument would +-- require allocating a thunk. +-- +-- - `absentSumFieldError` can't be CAFFY because that would mean making some +-- non-CAFFY definitions that use unboxed sums CAFFY in unarise. +-- +-- To make `absentSumFieldError` non-CAFFY we get a stable pointer to it in +-- RtsStartup.c and mark it as non-CAFFY here. +-- +-- Getting this wrong causes hard-to-debug runtime issues, see #15038. +-- +-- TODO: Remove stable pointer hack after fixing #9718. +-- However, we should still be careful about not making things CAFFY just +-- because they use unboxed sums. Unboxed objects are supposed to be +-- efficient, and none of the other unboxed literals make things CAFFY. + +aBSENT_SUM_FIELD_ERROR_ID + = mkVanillaGlobalWithInfo absentSumFieldErrorName + (mkSpecForAllTys [alphaTyVar] (mkTyVarTy alphaTyVar)) -- forall a . a + (vanillaIdInfo `setStrictnessInfo` mkClosedStrictSig [] exnRes + `setArityInfo` 0 + `setCafInfo` NoCafRefs) -- #15038 + mkRuntimeErrorId :: Name -> Id -- Error function -- with type: forall (r:RuntimeRep) (a:TYPE r). Addr# -> a diff --git a/compiler/prelude/PrelNames.hs b/compiler/prelude/PrelNames.hs index 3f419ebeac..fd324cb43e 100644 --- a/compiler/prelude/PrelNames.hs +++ b/compiler/prelude/PrelNames.hs @@ -880,9 +880,6 @@ dataQual_RDR mod str = mkOrig mod (mkOccNameFS dataName str) Many of these Names are not really "built in", but some parts of the compiler (notably the deriving mechanism) need to mention their names, and it's convenient to write them all down in one place. - ---MetaHaskell Extension add the constrs and the lower case case --- guys as well (perhaps) e.g. see trueDataConName below -} wildCardName :: Name @@ -2084,7 +2081,8 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey, realWorldPrimIdKey, recConErrorIdKey, unpackCStringUtf8IdKey, unpackCStringAppendIdKey, unpackCStringFoldrIdKey, unpackCStringIdKey, - typeErrorIdKey, divIntIdKey, modIntIdKey :: Unique + typeErrorIdKey, divIntIdKey, modIntIdKey, + absentSumFieldErrorIdKey :: Unique wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard binders] absentErrorIdKey = mkPreludeMiscIdUnique 1 @@ -2110,6 +2108,7 @@ voidPrimIdKey = mkPreludeMiscIdUnique 21 typeErrorIdKey = mkPreludeMiscIdUnique 22 divIntIdKey = mkPreludeMiscIdUnique 23 modIntIdKey = mkPreludeMiscIdUnique 24 +absentSumFieldErrorIdKey = mkPreludeMiscIdUnique 9 unsafeCoerceIdKey, concatIdKey, filterIdKey, zipIdKey, bindIOIdKey, returnIOIdKey, newStablePtrIdKey, diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs index 57dd699f70..bcf699b369 100644 --- a/compiler/simplStg/UnariseStg.hs +++ b/compiler/simplStg/UnariseStg.hs @@ -204,7 +204,7 @@ import DataCon import FastString (FastString, mkFastString) import Id import Literal (Literal (..), literalType) -import MkCore (aBSENT_ERROR_ID) +import MkCore (aBSENT_SUM_FIELD_ERROR_ID) import MkId (voidPrimId, voidArgId) import MonadUtils (mapAccumLM) import Outputable @@ -577,7 +577,8 @@ mkUbxSum dc ty_args args0 = slotRubbishArg slot : mkTupArgs (arg_idx + 1) slots_left arg_map slotRubbishArg :: SlotTy -> StgArg - slotRubbishArg PtrSlot = StgVarArg aBSENT_ERROR_ID + slotRubbishArg PtrSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID + -- See Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore slotRubbishArg WordSlot = StgLitArg (MachWord 0) slotRubbishArg Word64Slot = StgLitArg (MachWord64 0) slotRubbishArg FloatSlot = StgLitArg (MachFloat 0) diff --git a/libraries/base/Control/Exception/Base.hs b/libraries/base/Control/Exception/Base.hs index e675e0c017..4b5d94ece7 100644 --- a/libraries/base/Control/Exception/Base.hs +++ b/libraries/base/Control/Exception/Base.hs @@ -95,7 +95,7 @@ module Control.Exception.Base ( -- * Calls for GHC runtime recSelError, recConError, runtimeError, nonExhaustiveGuardsError, patError, noMethodBindingError, - absentError, typeError, + absentError, absentSumFieldError, typeError, nonTermination, nestedAtomically, ) where @@ -398,3 +398,7 @@ nonTermination = toException NonTermination -- GHC's RTS calls this nestedAtomically :: SomeException nestedAtomically = toException NestedAtomically + +-- Introduced by unarise for unused unboxed sum fields +absentSumFieldError :: a +absentSumFieldError = absentError " in unboxed sum."# diff --git a/rts/Prelude.h b/rts/Prelude.h index 74b6b08227..6e5bf03bd6 100644 --- a/rts/Prelude.h +++ b/rts/Prelude.h @@ -45,6 +45,7 @@ PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactPinned_closure); PRELUDE_CLOSURE(base_GHCziIOziException_cannotCompactMutable_closure); PRELUDE_CLOSURE(base_ControlziExceptionziBase_nonTermination_closure); PRELUDE_CLOSURE(base_ControlziExceptionziBase_nestedAtomically_closure); +PRELUDE_CLOSURE(base_ControlziExceptionziBase_absentSumFieldError_closure); PRELUDE_CLOSURE(base_GHCziEventziThread_blockedOnBadFD_closure); PRELUDE_CLOSURE(base_GHCziConcziSync_runSparks_closure); @@ -99,6 +100,7 @@ PRELUDE_INFO(base_GHCziStable_StablePtr_con_info); #define nonTermination_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nonTermination_closure) #define nestedAtomically_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_nestedAtomically_closure) #define blockedOnBadFD_closure DLL_IMPORT_DATA_REF(base_GHCziEventziThread_blockedOnBadFD_closure) +#define absentSumFieldError_closure DLL_IMPORT_DATA_REF(base_ControlziExceptionziBase_absentSumFieldError_closure) #define Czh_con_info DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Czh_con_info) #define Izh_con_info DLL_IMPORT_DATA_REF(ghczmprim_GHCziTypes_Izh_con_info) diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c index e4ca6b906d..7eb98a8ba0 100644 --- a/rts/RtsStartup.c +++ b/rts/RtsStartup.c @@ -247,6 +247,10 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config) /* Add some GC roots for things in the base package that the RTS * knows about. We don't know whether these turn out to be CAFs * or refer to CAFs, but we have to assume that they might. + * + * Because these stable pointers will retain any CAF references in + * these closures `Id`s of these can be safely marked as non-CAFFY + * in the compiler. */ getStablePtr((StgPtr)runIO_closure); getStablePtr((StgPtr)runNonIO_closure); @@ -265,6 +269,9 @@ hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config) getStablePtr((StgPtr)cannotCompactPinned_closure); getStablePtr((StgPtr)cannotCompactMutable_closure); getStablePtr((StgPtr)nestedAtomically_closure); + getStablePtr((StgPtr)absentSumFieldError_closure); + // `Id` for this closure is marked as non-CAFFY, + // see Note [aBSENT_SUM_FIELD_ERROR_ID] in MkCore. getStablePtr((StgPtr)runSparks_closure); getStablePtr((StgPtr)ensureIOManagerIsRunning_closure); diff --git a/testsuite/tests/codeGen/should_run/T15038/Makefile b/testsuite/tests/codeGen/should_run/T15038/Makefile new file mode 100644 index 0000000000..508494975b --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T15038/Makefile @@ -0,0 +1,9 @@ +TOP=../../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk + +.PHONY: T15038 +T15038: + '$(TEST_HC)' $(TEST_HC_OPTS) $(shell find . -iname '*.hs') -O2 -o Main \ + -package containers -package ghc-prim -package primitive + ./Main diff --git a/testsuite/tests/codeGen/should_run/T15038/all.T b/testsuite/tests/codeGen/should_run/T15038/all.T new file mode 100644 index 0000000000..6b284784ae --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T15038/all.T @@ -0,0 +1,4 @@ +test('T15038', + [reqlib('containers'), reqlib('ghc-prim'), reqlib('primitive')], + run_command, + ['$MAKE -s --no-print-directory T15038']) diff --git a/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs b/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs new file mode 100644 index 0000000000..a138615b2d --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T15038/common/Data/Trie/Naive.hs @@ -0,0 +1,80 @@ +{-# LANGUAGE DeriveFunctor #-} + +module Data.Trie.Naive + ( Trie + , singleton + , singletonString + , lookup + , parser + , fromList + , fromListAppend + , fromStringList + ) where + +import Prelude hiding (lookup) + +import Data.Semigroup (Semigroup) +import Data.Word (Word8) +import Data.Map (Map) +import Data.Bifunctor (second) +import Packed.Bytes (Bytes) +import qualified Data.Char +import qualified GHC.OldList as L +import qualified Packed.Bytes.Parser as P +import qualified Packed.Bytes as B +import qualified Data.Semigroup as SG +import qualified Data.Map.Strict as M + +data Trie a = Trie (Maybe a) (Map Word8 (Trie a)) + deriving (Functor) + +instance Semigroup a => Semigroup (Trie a) where + (<>) = append + +instance Semigroup a => Monoid (Trie a) where + mempty = Trie Nothing M.empty + mappend = (SG.<>) + +append :: Semigroup a => Trie a -> Trie a -> Trie a +append (Trie v1 m1) (Trie v2 m2) = Trie + (SG.getOption (SG.Option v1 SG.<> SG.Option v2)) + (M.unionWith append m1 m2) + +singleton :: Bytes -> a -> Trie a +singleton k v = B.foldr (\b r -> Trie Nothing (M.singleton b r)) (Trie (Just v) M.empty) k + +singletonString :: String -> a -> Trie a +singletonString k v = L.foldr (\c r -> Trie Nothing (M.singleton (c2w c) r)) (Trie (Just v) M.empty) k + +lookup :: Bytes -> Trie a -> Maybe a +lookup k t0 = case B.foldr lookupStep (Just t0) k of + Nothing -> Nothing + Just (Trie v _) -> v + +lookupStep :: Word8 -> Maybe (Trie a) -> Maybe (Trie a) +lookupStep w Nothing = Nothing +lookupStep w (Just (Trie _ m)) = M.lookup w m + +parser :: Trie (P.Parser a) -> P.Parser a +parser (Trie mp m) = case mp of + Just p -> p + Nothing -> do + w <- P.any + case M.lookup w m of + Nothing -> P.failure + Just t -> parser t + +fromList :: [(Bytes,a)] -> Trie a +fromList = fmap SG.getFirst . fromListAppend . map (second SG.First) + +fromListAppend :: Semigroup a => [(Bytes,a)] -> Trie a +fromListAppend = foldMap (uncurry singleton) + +fromStringList :: [(String,a)] -> Trie a +fromStringList = fmap SG.getFirst . fromStringListAppend . map (second SG.First) + +fromStringListAppend :: Semigroup a => [(String,a)] -> Trie a +fromStringListAppend = foldMap (uncurry singletonString) + +c2w :: Char -> Word8 +c2w = fromIntegral . Data.Char.ord diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs new file mode 100644 index 0000000000..224e03f75d --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes.hs @@ -0,0 +1,80 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE ScopedTypeVariables #-} + +{-# OPTIONS_GHC + -fno-warn-unsafe + -fno-warn-implicit-prelude + -fno-warn-missing-import-lists + -O2 +#-} + +module Packed.Bytes + ( Bytes(..) + , pack + , unpack + , length + -- * Folds + , foldr + -- * Unsliced Byte Arrays + , fromByteArray + ) where + +import Prelude hiding (take,length,replicate,drop,null,concat,foldr) + +import Data.Primitive (ByteArray(..)) +import Data.Word (Word8) +import Control.Monad.ST (runST, ST) +import qualified Data.Primitive as PM +import qualified GHC.OldList as L + +data Bytes = Bytes + {-# UNPACK #-} !ByteArray -- payload + {-# UNPACK #-} !Int -- offset + {-# UNPACK #-} !Int -- length + +instance Show Bytes where + show x = "pack " ++ show (unpack x) + +pack :: [Word8] -> Bytes +pack bs = let arr = packByteArray bs in Bytes arr 0 (lengthByteArray arr) + +unpack :: Bytes -> [Word8] +unpack (Bytes arr off len) = go off + where + go :: Int -> [Word8] + go !ix = if ix < len + off + then PM.indexByteArray arr ix : go (ix + 1) + else [] + +fromByteArray :: ByteArray -> Bytes +fromByteArray ba = Bytes ba 0 (lengthByteArray ba) + +length :: Bytes -> Int +length (Bytes _ _ len) = len + +foldr :: (Word8 -> a -> a) -> a -> Bytes -> a +foldr f a0 (Bytes arr off0 len) = go off0 where + !end = off0 + len + go !ix = if ix < end + then f (PM.indexByteArray arr ix) (go (ix + 1)) + else a0 + +packByteArray :: [Word8] -> ByteArray +packByteArray ws0 = runST $ do + marr <- PM.newByteArray (L.length ws0) + let go [] !_ = return () + go (w : ws) !ix = PM.writeByteArray marr ix w >> go ws (ix + 1) + go ws0 0 + PM.unsafeFreezeByteArray marr + +unpackByteArray :: ByteArray -> [Word8] +unpackByteArray arr = go 0 where + go :: Int -> [Word8] + go !ix = if ix < lengthByteArray arr + then PM.indexByteArray arr ix : go (ix + 1) + else [] + +lengthByteArray :: ByteArray -> Int +lengthByteArray = PM.sizeofByteArray diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs new file mode 100644 index 0000000000..3f9c42ad52 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Parser.hs @@ -0,0 +1,165 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeInType #-} +{-# LANGUAGE UnboxedSums #-} +{-# LANGUAGE UnboxedTuples #-} + +{-# OPTIONS_GHC + -Weverything + -fno-warn-unsafe + -fno-warn-implicit-prelude + -fno-warn-missing-import-lists + -fno-warn-noncanonical-monoid-instances + -O2 +#-} + +module Packed.Bytes.Parser + ( Parser(..) + , Result(..) + , Leftovers(..) + , parseStreamST + , any + , failure + ) where + +import Control.Applicative +import Data.Primitive (ByteArray(..)) +import GHC.Int (Int(I#)) +import GHC.ST (ST(..),runST) +import GHC.Types (TYPE) +import GHC.Word (Word8(W8#)) +import Packed.Bytes (Bytes(..)) +import Packed.Bytes.Stream.ST (ByteStream(..)) +import Prelude hiding (any,replicate) + +import qualified Data.Primitive as PM +import qualified Control.Monad + +import GHC.Exts (Int#,ByteArray#,Word#,State#,(+#),(-#),(>#),indexWord8Array#) + +type Bytes# = (# ByteArray#, Int#, Int# #) +type Maybe# (a :: TYPE r) = (# (# #) | a #) +type Leftovers# s = (# Bytes# , ByteStream s #) +type Result# s a = (# Maybe# (Leftovers# s), Maybe# a #) + +data Result s a = Result + { resultLeftovers :: !(Maybe (Leftovers s)) + , resultValue :: !(Maybe a) + } + +data Leftovers s = Leftovers + { leftoversChunk :: {-# UNPACK #-} !Bytes + -- ^ The last chunk pulled from the stream + , leftoversStream :: ByteStream s + -- ^ The remaining stream + } + +data PureResult a = PureResult + { pureResultLeftovers :: {-# UNPACK #-} !Bytes + , pureResultValue :: !(Maybe a) + } deriving (Show) + +emptyByteArray :: ByteArray +emptyByteArray = runST (PM.newByteArray 0 >>= PM.unsafeFreezeByteArray) + +parseStreamST :: ByteStream s -> Parser a -> ST s (Result s a) +parseStreamST stream (Parser f) = ST $ \s0 -> + case f (# | (# (# unboxByteArray emptyByteArray, 0#, 0# #), stream #) #) s0 of + (# s1, r #) -> (# s1, boxResult r #) + +boxResult :: Result# s a -> Result s a +boxResult (# leftovers, val #) = case val of + (# (# #) | #) -> Result (boxLeftovers leftovers) Nothing + (# | a #) -> Result (boxLeftovers leftovers) (Just a) + +boxLeftovers :: Maybe# (Leftovers# s) -> Maybe (Leftovers s) +boxLeftovers (# (# #) | #) = Nothing +boxLeftovers (# | (# theBytes, stream #) #) = Just (Leftovers (boxBytes theBytes) stream) + +instance Functor Parser where + fmap = mapParser + +-- Remember to write liftA2 by hand at some point. +instance Applicative Parser where + pure = pureParser + (<*>) = Control.Monad.ap + +instance Monad Parser where + return = pure + (>>=) = bindLifted + +newtype Parser a = Parser + { getParser :: forall s. + Maybe# (Leftovers# s) + -> State# s + -> (# State# s, Result# s a #) + } + +nextNonEmpty :: ByteStream s -> State# s -> (# State# s, Maybe# (Leftovers# s) #) +nextNonEmpty (ByteStream f) s0 = case f s0 of + (# s1, r #) -> case r of + (# (# #) | #) -> (# s1, (# (# #) | #) #) + (# | (# theBytes@(# _,_,len #), stream #) #) -> case len of + 0# -> nextNonEmpty stream s1 + _ -> (# s1, (# | (# theBytes, stream #) #) #) + +withNonEmpty :: forall s b. + Maybe# (Leftovers# s) + -> State# s + -> (State# s -> (# State# s, Result# s b #)) + -> (Word# -> Bytes# -> ByteStream s -> State# s -> (# State# s, Result# s b #)) + -- The first argument is a Word8, not a full machine word. + -- The second argument is the complete,non-empty chunk + -- with the head byte still intact. + -> (# State# s, Result# s b #) +withNonEmpty (# (# #) | #) s0 g _ = g s0 +withNonEmpty (# | (# bytes0@(# arr0,off0,len0 #), stream0 #) #) s0 g f = case len0 ># 0# of + 1# -> f (indexWord8Array# arr0 off0) bytes0 stream0 s0 + _ -> case nextNonEmpty stream0 s0 of + (# s1, r #) -> case r of + (# (# #) | #) -> g s1 + (# | (# bytes1@(# arr1, off1, _ #), stream1 #) #) -> + f (indexWord8Array# arr1 off1) bytes1 stream1 s1 + +-- | Consume the next byte from the input. +any :: Parser Word8 +any = Parser go where + go :: Maybe# (Leftovers# s) -> State# s -> (# State# s, Result# s Word8 #) + go m s0 = withNonEmpty m s0 + (\s -> (# s, (# (# (# #) | #), (# (# #) | #) #) #)) + (\theByte theBytes stream s -> + (# s, (# (# | (# unsafeDrop# 1# theBytes, stream #) #), (# | W8# theByte #) #) #) + ) + +-- TODO: improve this +mapParser :: (a -> b) -> Parser a -> Parser b +mapParser f p = bindLifted p (pureParser . f) + +pureParser :: a -> Parser a +pureParser a = Parser $ \leftovers0 s0 -> + (# s0, (# leftovers0, (# | a #) #) #) + +bindLifted :: Parser a -> (a -> Parser b) -> Parser b +bindLifted (Parser f) g = Parser $ \leftovers0 s0 -> case f leftovers0 s0 of + (# s1, (# leftovers1, val #) #) -> case val of + (# (# #) | #) -> (# s1, (# leftovers1, (# (# #) | #) #) #) + (# | x #) -> case g x of + Parser k -> k leftovers1 s1 + +-- This assumes that the Bytes is longer than the index. It also does +-- not eliminate zero-length references to byte arrays. +unsafeDrop# :: Int# -> Bytes# -> Bytes# +unsafeDrop# i (# arr, off, len #) = (# arr, off +# i, len -# i #) + +unboxByteArray :: ByteArray -> ByteArray# +unboxByteArray (ByteArray arr) = arr + +boxBytes :: Bytes# -> Bytes +boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c) + +failure :: Parser a +failure = Parser (\m s -> (# s, (# m, (# (# #) | #) #) #)) diff --git a/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs new file mode 100644 index 0000000000..ffba9c2596 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T15038/src/Packed/Bytes/Stream/ST.hs @@ -0,0 +1,61 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnboxedSums #-} + +{-# OPTIONS_GHC -O2 #-} + +module Packed.Bytes.Stream.ST + ( ByteStream(..) + , empty + , unpack + , fromBytes + ) where + +import Data.Primitive (Array,ByteArray(..)) +import Data.Semigroup (Semigroup) +import Data.Word (Word8) +import GHC.Exts (RealWorld,State#,Int#,ByteArray#) +import GHC.Int (Int(I#)) +import GHC.ST (ST(..)) +import Packed.Bytes (Bytes(..)) +import System.IO (Handle) +import qualified Data.Primitive as PM +import qualified Data.Semigroup as SG +import qualified Packed.Bytes as B + +type Bytes# = (# ByteArray#, Int#, Int# #) + +newtype ByteStream s = ByteStream + (State# s -> (# State# s, (# (# #) | (# Bytes# , ByteStream s #) #) #) ) + +fromBytes :: Bytes -> ByteStream s +fromBytes b = ByteStream + (\s0 -> (# s0, (# | (# unboxBytes b, empty #) #) #)) + +nextChunk :: ByteStream s -> ST s (Maybe (Bytes,ByteStream s)) +nextChunk (ByteStream f) = ST $ \s0 -> case f s0 of + (# s1, r #) -> case r of + (# (# #) | #) -> (# s1, Nothing #) + (# | (# theBytes, theStream #) #) -> (# s1, Just (boxBytes theBytes, theStream) #) + +empty :: ByteStream s +empty = ByteStream (\s -> (# s, (# (# #) | #) #) ) + +boxBytes :: Bytes# -> Bytes +boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c) + +unboxBytes :: Bytes -> Bytes# +unboxBytes (Bytes (ByteArray a) (I# b) (I# c)) = (# a,b,c #) + +unpack :: ByteStream s -> ST s [Word8] +unpack stream = ST (unpackInternal stream) + +unpackInternal :: ByteStream s -> State# s -> (# State# s, [Word8] #) +unpackInternal (ByteStream f) s0 = case f s0 of + (# s1, r #) -> case r of + (# (# #) | #) -> (# s1, [] #) + (# | (# bytes, stream #) #) -> case unpackInternal stream s1 of + (# s2, ws #) -> (# s2, B.unpack (boxBytes bytes) ++ ws #) diff --git a/testsuite/tests/codeGen/should_run/T15038/test/Main.hs b/testsuite/tests/codeGen/should_run/T15038/test/Main.hs new file mode 100644 index 0000000000..56acd042db --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T15038/test/Main.hs @@ -0,0 +1,4 @@ +import qualified Parser as Parser + +main :: IO () +main = print (iterate Parser.byteParserBadOnce 5 !! 100000) diff --git a/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs b/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs new file mode 100644 index 0000000000..70f9f3336b --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T15038/test/Parser.hs @@ -0,0 +1,61 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeInType #-} +{-# LANGUAGE UnboxedSums #-} +{-# LANGUAGE UnboxedTuples #-} + +{-# OPTIONS_GHC -Wall #-} + +module Parser + ( byteParserBadOnce + ) where + +import Control.Monad.ST (runST) +import Data.Word (Word8) +import Packed.Bytes (Bytes) +import Packed.Bytes.Parser (Parser) +import Packed.Bytes.Stream.ST (ByteStream) +import qualified Data.Char +import qualified Packed.Bytes as B +import qualified Packed.Bytes.Parser as P +import qualified Packed.Bytes.Stream.ST as Stream + +-- from common directory +import qualified Data.Trie.Naive as Naive + +snmptrapdNaive :: Naive.Trie (Parser Word) +snmptrapdNaive = Naive.fromStringList + [ ("STRING: ", P.any >>= \_ -> return 5) + ] + +runExampleParser :: Parser a -> (forall s. ByteStream s) -> (Maybe a, Maybe String) +runExampleParser parser stream = runST $ do + P.Result mleftovers r <- P.parseStreamST stream parser + mextra <- case mleftovers of + Nothing -> return Nothing + Just (P.Leftovers chunk remainingStream) -> do + bs <- Stream.unpack remainingStream + return (Just (map word8ToChar (B.unpack chunk ++ bs))) + return (r,mextra) + +byteParserBadOnce :: Int -> Int +byteParserBadOnce x = do + let sample = ("STRING: _6_ " ++ show x) + stream = Stream.fromBytes (s2b sample) + expected = 6 + (r,mextra) = runExampleParser (Naive.parser snmptrapdNaive) stream + a1 = if Nothing == mextra then 1 else 0 + a2 = if Just expected == r then 1 else 0 + in a1 + (a2 + x) + +s2b :: String -> Bytes +s2b = B.pack . map charToWord8 + +charToWord8 :: Char -> Word8 +charToWord8 = fromIntegral . Data.Char.ord + +word8ToChar :: Word8 -> Char +word8ToChar = Data.Char.chr . fromIntegral |