diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2019-01-18 12:30:31 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2019-03-08 14:05:10 -0500 |
commit | 224a6b864c6aa0d851fcbf79469e5702b1116dbc (patch) | |
tree | 888b79e9f177c988d06365d0a218c41878225467 | |
parent | 5be7ad7861c8d39f60b7101fd8d8e816ff50353a (diff) | |
download | haskell-224a6b864c6aa0d851fcbf79469e5702b1116dbc.tar.gz |
TH: support raw bytes literals (#14741)
GHC represents String literals as ByteString internally for efficiency
reasons. However, until now it wasn't possible to efficiently create
large string literals with TH (e.g. to embed a file in a binary, cf #14741):
TH code had to unpack the bytes into a [Word8] that GHC then had to re-pack
into a ByteString.
This patch adds the possibility to efficiently create a "string" literal
from raw bytes. We get the following compile times for different sizes
of TH created literals:
|| Size || Before || After || Gain ||
|| 30K || 2.307s || 2.299 || 0% ||
|| 3M || 3.073s || 2.400s || 21% ||
|| 30M || 8.517s || 3.390s || 60% ||
Ticket #14741 can be fixed if the original code uses this new TH feature.
-rw-r--r-- | compiler/basicTypes/Literal.hs | 14 | ||||
-rw-r--r-- | compiler/hsSyn/Convert.hs | 8 | ||||
-rw-r--r-- | libraries/ghci/GHCi/TH/Binary.hs | 8 | ||||
-rw-r--r-- | libraries/template-haskell/Language/Haskell/TH/Lib.hs | 18 | ||||
-rw-r--r-- | libraries/template-haskell/Language/Haskell/TH/Lib/Internal.hs | 2 | ||||
-rw-r--r-- | libraries/template-haskell/Language/Haskell/TH/Ppr.hs | 1 | ||||
-rw-r--r-- | libraries/template-haskell/Language/Haskell/TH/Syntax.hs | 20 | ||||
-rw-r--r-- | libraries/template-haskell/changelog.md | 3 | ||||
-rw-r--r-- | testsuite/tests/th/T14741.hs | 28 | ||||
-rw-r--r-- | testsuite/tests/th/T14741.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/th/all.T | 1 |
11 files changed, 103 insertions, 1 deletions
diff --git a/compiler/basicTypes/Literal.hs b/compiler/basicTypes/Literal.hs index bfc3783d2b..8dd6708eda 100644 --- a/compiler/basicTypes/Literal.hs +++ b/compiler/basicTypes/Literal.hs @@ -188,6 +188,20 @@ Note [Natural literals] ~~~~~~~~~~~~~~~~~~~~~~~ Similar to Integer literals. +Note [String literals] +~~~~~~~~~~~~~~~~~~~~~~ + +String literals are UTF-8 encoded and stored into ByteStrings in the following +ASTs: Haskell, Core, Stg, Cmm. TH can also emit ByteString based string literals +with the BytesPrimL constructor (see #14741). + +It wasn't true before as [Word8] was used in Cmm AST and in TH which was quite +bad for performance with large strings (see #16198 and #14741). + +To include string literals into output objects, the assembler code generator has +to embed the UTF-8 encoded binary blob. See Note [Embedding large binary blobs] +for more details. + -} instance Binary LitNumType where diff --git a/compiler/hsSyn/Convert.hs b/compiler/hsSyn/Convert.hs index 364bcb06c2..7113905bd9 100644 --- a/compiler/hsSyn/Convert.hs +++ b/compiler/hsSyn/Convert.hs @@ -45,6 +45,9 @@ import Control.Monad( unless, liftM, ap ) import Data.Maybe( catMaybes, isNothing ) import Language.Haskell.TH as TH hiding (sigP) import Language.Haskell.TH.Syntax as TH +import Foreign.ForeignPtr +import Foreign.Ptr +import System.IO.Unsafe ------------------------------------------------------------------- -- The external interface @@ -1189,6 +1192,11 @@ cvtLit (StringL s) = do { let { s' = mkFastString s } cvtLit (StringPrimL s) = do { let { s' = BS.pack s } ; force s' ; return $ HsStringPrim NoSourceText s' } +cvtLit (BytesPrimL (Bytes fptr off sz)) = do + let bs = unsafePerformIO $ withForeignPtr fptr $ \ptr -> + BS.packCStringLen (ptr `plusPtr` fromIntegral off, fromIntegral sz) + force bs + return $ HsStringPrim NoSourceText bs cvtLit _ = panic "Convert.cvtLit: Unexpected literal" -- cvtLit should not be called on IntegerL, RationalL -- That precondition is established right here in diff --git a/libraries/ghci/GHCi/TH/Binary.hs b/libraries/ghci/GHCi/TH/Binary.hs index 22a2847660..6f7aaca3e2 100644 --- a/libraries/ghci/GHCi/TH/Binary.hs +++ b/libraries/ghci/GHCi/TH/Binary.hs @@ -10,6 +10,7 @@ module GHCi.TH.Binary () where import Prelude -- See note [Why do we import Prelude here?] import Data.Binary import qualified Data.ByteString as B +import qualified Data.ByteString.Internal as B import GHC.Serialized import qualified Language.Haskell.TH as TH import qualified Language.Haskell.TH.Syntax as TH @@ -72,3 +73,10 @@ instance Binary TH.PatSynArgs instance Binary Serialized where put (Serialized tyrep wds) = put tyrep >> put (B.pack wds) get = Serialized <$> get <*> (B.unpack <$> get) + +instance Binary TH.Bytes where + put (TH.Bytes ptr off sz) = put bs + where bs = B.PS ptr (fromIntegral off) (fromIntegral sz) + get = do + B.PS ptr off sz <- get + return (TH.Bytes ptr (fromIntegral off) (fromIntegral sz)) diff --git a/libraries/template-haskell/Language/Haskell/TH/Lib.hs b/libraries/template-haskell/Language/Haskell/TH/Lib.hs index 69a40428b8..0a9e11b936 100644 --- a/libraries/template-haskell/Language/Haskell/TH/Lib.hs +++ b/libraries/template-haskell/Language/Haskell/TH/Lib.hs @@ -26,7 +26,7 @@ module Language.Haskell.TH.Lib ( -- ** Constructors lifted to 'Q' -- *** Literals intPrimL, wordPrimL, floatPrimL, doublePrimL, integerL, rationalL, - charL, stringL, stringPrimL, charPrimL, + charL, stringL, stringPrimL, charPrimL, bytesPrimL, mkBytes, -- *** Patterns litP, varP, tupP, unboxedTupP, unboxedSumP, conP, uInfixP, parensP, infixP, tildeP, bangP, asP, wildP, recP, @@ -157,6 +157,8 @@ import Language.Haskell.TH.Lib.Internal hiding import Language.Haskell.TH.Syntax import Control.Monad (liftM2) +import Foreign.ForeignPtr +import Data.Word import Prelude -- All definitions below represent the "old" API, since their definitions are @@ -303,3 +305,17 @@ standaloneDerivWithStrategyD mds ctxt ty = do ctxt' <- ctxt ty' <- ty return $ StandaloneDerivD mds ctxt' ty' + +------------------------------------------------------------------------------- +-- * Bytes literals + +-- | Create a Bytes datatype representing raw bytes to be embedded into the +-- program/library binary. +-- +-- @since 2.16.0.0 +mkBytes + :: ForeignPtr Word8 -- ^ Pointer to the data + -> Word -- ^ Offset from the pointer + -> Word -- ^ Number of bytes + -> Bytes +mkBytes = Bytes diff --git a/libraries/template-haskell/Language/Haskell/TH/Lib/Internal.hs b/libraries/template-haskell/Language/Haskell/TH/Lib/Internal.hs index 14ef0a02a8..b08b31c4fe 100644 --- a/libraries/template-haskell/Language/Haskell/TH/Lib/Internal.hs +++ b/libraries/template-haskell/Language/Haskell/TH/Lib/Internal.hs @@ -86,6 +86,8 @@ stringL :: String -> Lit stringL = StringL stringPrimL :: [Word8] -> Lit stringPrimL = StringPrimL +bytesPrimL :: Bytes -> Lit +bytesPrimL = BytesPrimL rationalL :: Rational -> Lit rationalL = RationalL diff --git a/libraries/template-haskell/Language/Haskell/TH/Ppr.hs b/libraries/template-haskell/Language/Haskell/TH/Ppr.hs index fa00c8c537..bc9efe6e3d 100644 --- a/libraries/template-haskell/Language/Haskell/TH/Ppr.hs +++ b/libraries/template-haskell/Language/Haskell/TH/Ppr.hs @@ -268,6 +268,7 @@ pprLit _ (CharL c) = text (show c) pprLit _ (CharPrimL c) = text (show c) <> char '#' pprLit _ (StringL s) = pprString s pprLit _ (StringPrimL s) = pprString (bytesToString s) <> char '#' +pprLit _ (BytesPrimL {}) = pprString "<binary data>" pprLit i (RationalL rat) = parensIf (i > noPrec) $ integer (numerator rat) <+> char '/' <+> integer (denominator rat) diff --git a/libraries/template-haskell/Language/Haskell/TH/Syntax.hs b/libraries/template-haskell/Language/Haskell/TH/Syntax.hs index 22c6cd1def..690d63807c 100644 --- a/libraries/template-haskell/Language/Haskell/TH/Syntax.hs +++ b/libraries/template-haskell/Language/Haskell/TH/Syntax.hs @@ -44,6 +44,7 @@ import GHC.ForeignSrcLang.Type import Language.Haskell.TH.LanguageExtensions import Numeric.Natural import Prelude +import Foreign.ForeignPtr import qualified Control.Monad.Fail as Fail @@ -1619,6 +1620,7 @@ data Lit = CharL Char | FloatPrimL Rational | DoublePrimL Rational | StringPrimL [Word8] -- ^ A primitive C-style string, type Addr# + | BytesPrimL Bytes -- ^ Some raw bytes, type Addr#: | CharPrimL Char deriving( Show, Eq, Ord, Data, Generic ) @@ -1626,6 +1628,24 @@ data Lit = CharL Char -- but that could complicate the -- supposedly-simple TH.Syntax literal type +-- | Raw bytes embedded into the binary. +-- +-- Avoid using Bytes constructor directly as it is likely to change in the +-- future. Use helpers such as `mkBytes` in Language.Haskell.TH.Lib instead. +data Bytes = Bytes + { bytesPtr :: ForeignPtr Word8 -- ^ Pointer to the data + , bytesOffset :: Word -- ^ Offset from the pointer + , bytesSize :: Word -- ^ Number of bytes + -- Maybe someday: + -- , bytesAlignement :: Word -- ^ Alignement constraint + -- , bytesReadOnly :: Bool -- ^ Shall we embed into a read-only + -- -- section or not + -- , bytesInitialized :: Bool -- ^ False: only use `bytesSize` to allocate + -- -- an uninitialized region + } + deriving (Eq,Ord,Data,Generic,Show) + + -- | Pattern in Haskell given in @{}@ data Pat = LitP Lit -- ^ @{ 5 or \'c\' }@ diff --git a/libraries/template-haskell/changelog.md b/libraries/template-haskell/changelog.md index a64795b5b9..9928df9ba9 100644 --- a/libraries/template-haskell/changelog.md +++ b/libraries/template-haskell/changelog.md @@ -8,6 +8,9 @@ * Add a `ForallVisT` constructor to `Type` to represent visible, dependent quantification. + * Introduce support for `Bytes` literals (raw bytes embedded into the output + binary) + ## 2.15.0.0 *TBA* * In `Language.Haskell.TH.Syntax`, `DataInstD`, `NewTypeInstD`, `TySynEqn`, diff --git a/testsuite/tests/th/T14741.hs b/testsuite/tests/th/T14741.hs new file mode 100644 index 0000000000..3e27bb4996 --- /dev/null +++ b/testsuite/tests/th/T14741.hs @@ -0,0 +1,28 @@ +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE MagicHash #-} + +import Language.Haskell.TH.Lib +import Data.Word +import Foreign.ForeignPtr +import Foreign.Marshal.Array +import GHC.Exts +import System.Mem +import Control.Monad.IO.Class +import GHC.CString + +ptr :: Ptr () +ptr = Ptr $(do + -- create a buffer containing the "Hello World!" string + let xs = [72,101,108,108,111,32,87,111,114,108,100,33] :: [Word8] + fp <- liftIO $ mallocForeignPtrArray 25 + liftIO $ withForeignPtr fp $ \p -> do + pokeArray p xs + -- create a "Bytes" literal with an offset and size to only include "World" + let bys = mkBytes fp 6 5 + liftIO performGC -- check that the GC doesn't release our buffer too early + litE (bytesPrimL bys)) + +main :: IO () +main = do + let s = case ptr of Ptr addr -> unpackNBytes# addr 5# + putStrLn s diff --git a/testsuite/tests/th/T14741.stdout b/testsuite/tests/th/T14741.stdout new file mode 100644 index 0000000000..216e97ce08 --- /dev/null +++ b/testsuite/tests/th/T14741.stdout @@ -0,0 +1 @@ +World diff --git a/testsuite/tests/th/all.T b/testsuite/tests/th/all.T index 70070a4687..c9f2065c7a 100644 --- a/testsuite/tests/th/all.T +++ b/testsuite/tests/th/all.T @@ -472,3 +472,4 @@ test('T16183', normal, compile, ['-v0 -ddump-splices -dsuppress-uniques']) test('T16195', normal, multimod_compile, ['T16195.hs', '-v0']) test('T16293b', normal, compile, ['']) test('T16326_TH', normal, compile, ['-v0 -ddump-splices -dsuppress-uniques']) +test('T14741', normal, compile_and_run, ['']) |