diff options
author | Andrew Martin <andrew.thaddeus@gmail.com> | 2019-11-13 11:20:05 -0500 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-05-23 13:37:01 -0400 |
commit | 49301ad6226d9a83d110bee8c419615dd94f5ded (patch) | |
tree | 907c00e2c81d1f2025ad569cedf2bc39833bcb07 /compiler/GHC | |
parent | d830bbc9921bcc59164a0a18f0e0874ae4ce226e (diff) | |
download | haskell-49301ad6226d9a83d110bee8c419615dd94f5ded.tar.gz |
Implement cstringLength# and FinalPtr
This function and its accompanying rule resolve issue #5218.
A future PR to the bytestring library will make the internal
Data.ByteString.Internal.unsafePackAddress compute string length
with cstringLength#. This will improve the status quo because it is
eligible for constant folding.
Additionally, introduce a new data constructor to ForeignPtrContents
named FinalPtr. This additional data constructor, when used in the
IsString instance for ByteString, leads to more Core-to-Core
optimization opportunities, fewer runtime allocations, and smaller
binaries.
Also, this commit re-exports all the functions from GHC.CString
(including cstringLength#) in GHC.Exts. It also adds a new test
driver. This test driver is used to perform substring matches on Core
that is dumped after all the simplifier passes. In this commit, it is
used to check that constant folding of cstringLength# works.
Diffstat (limited to 'compiler/GHC')
-rw-r--r-- | compiler/GHC/Builtin/Names.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/ConstantFold.hs | 27 | ||||
-rw-r--r-- | compiler/GHC/Data/FastString.hs | 5 | ||||
-rw-r--r-- | compiler/GHC/Hs/Lit.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/ThToHs.hs | 4 | ||||
-rw-r--r-- | compiler/GHC/Types/Literal.hs | 2 |
6 files changed, 39 insertions, 8 deletions
diff --git a/compiler/GHC/Builtin/Names.hs b/compiler/GHC/Builtin/Names.hs index 14cfc22cc1..f7275e4698 100644 --- a/compiler/GHC/Builtin/Names.hs +++ b/compiler/GHC/Builtin/Names.hs @@ -349,6 +349,7 @@ basicKnownKeyNames -- Strings and lists unpackCStringName, unpackCStringFoldrName, unpackCStringUtf8Name, + cstringLengthName, -- Overloaded lists isListClassName, @@ -1014,10 +1015,11 @@ modIntName = varQual gHC_CLASSES (fsLit "modInt#") modIntIdKey -- Base strings Strings unpackCStringName, unpackCStringFoldrName, - unpackCStringUtf8Name, eqStringName :: Name + unpackCStringUtf8Name, eqStringName, cstringLengthName :: Name unpackCStringName = varQual gHC_CSTRING (fsLit "unpackCString#") unpackCStringIdKey unpackCStringFoldrName = varQual gHC_CSTRING (fsLit "unpackFoldrCString#") unpackCStringFoldrIdKey unpackCStringUtf8Name = varQual gHC_CSTRING (fsLit "unpackCStringUtf8#") unpackCStringUtf8IdKey +cstringLengthName = varQual gHC_CSTRING (fsLit "cstringLength#") cstringLengthIdKey eqStringName = varQual gHC_BASE (fsLit "eqString") eqStringIdKey -- The 'inline' function @@ -2097,7 +2099,7 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey, unpackCStringUtf8IdKey, unpackCStringAppendIdKey, unpackCStringFoldrIdKey, unpackCStringIdKey, typeErrorIdKey, divIntIdKey, modIntIdKey, - absentSumFieldErrorIdKey :: Unique + absentSumFieldErrorIdKey, cstringLengthIdKey :: Unique wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard binders] absentErrorIdKey = mkPreludeMiscIdUnique 1 @@ -2124,6 +2126,7 @@ voidPrimIdKey = mkPreludeMiscIdUnique 21 typeErrorIdKey = mkPreludeMiscIdUnique 22 divIntIdKey = mkPreludeMiscIdUnique 23 modIntIdKey = mkPreludeMiscIdUnique 24 +cstringLengthIdKey = mkPreludeMiscIdUnique 25 concatIdKey, filterIdKey, zipIdKey, bindIOIdKey, returnIOIdKey, newStablePtrIdKey, diff --git a/compiler/GHC/Core/Opt/ConstantFold.hs b/compiler/GHC/Core/Opt/ConstantFold.hs index 7c18f27003..65c9ed3896 100644 --- a/compiler/GHC/Core/Opt/ConstantFold.hs +++ b/compiler/GHC/Core/Opt/ConstantFold.hs @@ -66,6 +66,7 @@ import qualified Data.ByteString as BS import Data.Int import Data.Ratio import Data.Word +import Data.Maybe (fromMaybe) {- Note [Constant folding] @@ -1257,6 +1258,8 @@ builtinRules ru_nargs = 4, ru_try = match_append_lit }, BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName, ru_nargs = 2, ru_try = match_eq_string }, + BuiltinRule { ru_name = fsLit "CStringLength", ru_fn = cstringLengthName, + ru_nargs = 1, ru_try = match_cstring_length }, BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName, ru_nargs = 2, ru_try = \_ _ _ -> match_inline }, BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId, @@ -1477,6 +1480,30 @@ match_eq_string _ id_unf _ match_eq_string _ _ _ _ = Nothing +----------------------------------------------------------------------- +-- Illustration of this rule: +-- +-- cstringLength# "foobar"# --> 6 +-- cstringLength# "fizz\NULzz"# --> 4 +-- +-- Nota bene: Addr# literals are suffixed by a NUL byte when they are +-- compiled to read-only data sections. That's why cstringLength# is +-- well defined on Addr# literals that do not explicitly have an embedded +-- NUL byte. +-- +-- See GHC issue #5218, MR 2165, and bytestring PR 191. This is particularly +-- helpful when using OverloadedStrings to create a ByteString since the +-- function computing the length of such ByteStrings can often be constant +-- folded. +match_cstring_length :: RuleFun +match_cstring_length env id_unf _ [lit1] + | Just (LitString str) <- exprIsLiteral_maybe id_unf lit1 + -- If elemIndex returns Just, it has the index of the first embedded NUL + -- in the string. If no NUL bytes are present (the common case) then use + -- full length of the byte string. + = let len = fromMaybe (BS.length str) (BS.elemIndex 0 str) + in Just (Lit (mkLitInt (roPlatform env) (fromIntegral len))) +match_cstring_length _ _ _ _ = Nothing --------------------------------------------------- -- The rule is this: diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs index 82f38601f5..a8ffaff619 100644 --- a/compiler/GHC/Data/FastString.hs +++ b/compiler/GHC/Data/FastString.hs @@ -128,8 +128,9 @@ import Foreign import GHC.Conc.Sync (sharedCAF) #endif -import GHC.Base ( unpackCString#, unpackNBytes# ) - +#if __GLASGOW_HASKELL__ < 811 +import GHC.Base (unpackCString#,unpackNBytes#) +#endif -- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' bytesFS :: FastString -> ByteString diff --git a/compiler/GHC/Hs/Lit.hs b/compiler/GHC/Hs/Lit.hs index 75e5c1d315..78155289d0 100644 --- a/compiler/GHC/Hs/Lit.hs +++ b/compiler/GHC/Hs/Lit.hs @@ -53,7 +53,7 @@ data HsLit x -- ^ Unboxed character | HsString (XHsString x) {- SourceText -} FastString -- ^ String - | HsStringPrim (XHsStringPrim x) {- SourceText -} ByteString + | HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString -- ^ Packed bytes | HsInt (XHsInt x) IntegralLit -- ^ Genuinely an Int; arises from diff --git a/compiler/GHC/ThToHs.hs b/compiler/GHC/ThToHs.hs index 453106eaec..359f8d0606 100644 --- a/compiler/GHC/ThToHs.hs +++ b/compiler/GHC/ThToHs.hs @@ -6,6 +6,7 @@ This module converts Template Haskell syntax into Hs syntax -} +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -1232,8 +1233,7 @@ cvtLit (CharPrimL c) = do { force c; return $ HsCharPrim NoSourceText c } cvtLit (StringL s) = do { let { s' = mkFastString s } ; force s' ; return $ HsString (quotedSourceText s) s' } -cvtLit (StringPrimL s) = do { let { s' = BS.pack s } - ; force s' +cvtLit (StringPrimL s) = do { let { !s' = BS.pack s } ; return $ HsStringPrim NoSourceText s' } cvtLit (BytesPrimL (Bytes fptr off sz)) = do let bs = unsafePerformIO $ withForeignPtr fptr $ \ptr -> diff --git a/compiler/GHC/Types/Literal.hs b/compiler/GHC/Types/Literal.hs index c31f6349db..c57cc2bb97 100644 --- a/compiler/GHC/Types/Literal.hs +++ b/compiler/GHC/Types/Literal.hs @@ -114,7 +114,7 @@ data Literal -- See Note [Types of LitNumbers] below for the -- Type field. - | LitString ByteString -- ^ A string-literal: stored and emitted + | LitString !ByteString -- ^ A string-literal: stored and emitted -- UTF-8 encoded, we'll arrange to decode it -- at runtime. Also emitted with a @\'\\0\'@ -- terminator. Create with 'mkLitString' |