summaryrefslogtreecommitdiff
path: root/compiler/GHC
diff options
context:
space:
mode:
authorAndrew Martin <andrew.thaddeus@gmail.com>2019-11-13 11:20:05 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-05-23 13:37:01 -0400
commit49301ad6226d9a83d110bee8c419615dd94f5ded (patch)
tree907c00e2c81d1f2025ad569cedf2bc39833bcb07 /compiler/GHC
parentd830bbc9921bcc59164a0a18f0e0874ae4ce226e (diff)
downloadhaskell-49301ad6226d9a83d110bee8c419615dd94f5ded.tar.gz
Implement cstringLength# and FinalPtr
This function and its accompanying rule resolve issue #5218. A future PR to the bytestring library will make the internal Data.ByteString.Internal.unsafePackAddress compute string length with cstringLength#. This will improve the status quo because it is eligible for constant folding. Additionally, introduce a new data constructor to ForeignPtrContents named FinalPtr. This additional data constructor, when used in the IsString instance for ByteString, leads to more Core-to-Core optimization opportunities, fewer runtime allocations, and smaller binaries. Also, this commit re-exports all the functions from GHC.CString (including cstringLength#) in GHC.Exts. It also adds a new test driver. This test driver is used to perform substring matches on Core that is dumped after all the simplifier passes. In this commit, it is used to check that constant folding of cstringLength# works.
Diffstat (limited to 'compiler/GHC')
-rw-r--r--compiler/GHC/Builtin/Names.hs7
-rw-r--r--compiler/GHC/Core/Opt/ConstantFold.hs27
-rw-r--r--compiler/GHC/Data/FastString.hs5
-rw-r--r--compiler/GHC/Hs/Lit.hs2
-rw-r--r--compiler/GHC/ThToHs.hs4
-rw-r--r--compiler/GHC/Types/Literal.hs2
6 files changed, 39 insertions, 8 deletions
diff --git a/compiler/GHC/Builtin/Names.hs b/compiler/GHC/Builtin/Names.hs
index 14cfc22cc1..f7275e4698 100644
--- a/compiler/GHC/Builtin/Names.hs
+++ b/compiler/GHC/Builtin/Names.hs
@@ -349,6 +349,7 @@ basicKnownKeyNames
-- Strings and lists
unpackCStringName,
unpackCStringFoldrName, unpackCStringUtf8Name,
+ cstringLengthName,
-- Overloaded lists
isListClassName,
@@ -1014,10 +1015,11 @@ modIntName = varQual gHC_CLASSES (fsLit "modInt#") modIntIdKey
-- Base strings Strings
unpackCStringName, unpackCStringFoldrName,
- unpackCStringUtf8Name, eqStringName :: Name
+ unpackCStringUtf8Name, eqStringName, cstringLengthName :: Name
unpackCStringName = varQual gHC_CSTRING (fsLit "unpackCString#") unpackCStringIdKey
unpackCStringFoldrName = varQual gHC_CSTRING (fsLit "unpackFoldrCString#") unpackCStringFoldrIdKey
unpackCStringUtf8Name = varQual gHC_CSTRING (fsLit "unpackCStringUtf8#") unpackCStringUtf8IdKey
+cstringLengthName = varQual gHC_CSTRING (fsLit "cstringLength#") cstringLengthIdKey
eqStringName = varQual gHC_BASE (fsLit "eqString") eqStringIdKey
-- The 'inline' function
@@ -2097,7 +2099,7 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
unpackCStringFoldrIdKey, unpackCStringIdKey,
typeErrorIdKey, divIntIdKey, modIntIdKey,
- absentSumFieldErrorIdKey :: Unique
+ absentSumFieldErrorIdKey, cstringLengthIdKey :: Unique
wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard binders]
absentErrorIdKey = mkPreludeMiscIdUnique 1
@@ -2124,6 +2126,7 @@ voidPrimIdKey = mkPreludeMiscIdUnique 21
typeErrorIdKey = mkPreludeMiscIdUnique 22
divIntIdKey = mkPreludeMiscIdUnique 23
modIntIdKey = mkPreludeMiscIdUnique 24
+cstringLengthIdKey = mkPreludeMiscIdUnique 25
concatIdKey, filterIdKey, zipIdKey,
bindIOIdKey, returnIOIdKey, newStablePtrIdKey,
diff --git a/compiler/GHC/Core/Opt/ConstantFold.hs b/compiler/GHC/Core/Opt/ConstantFold.hs
index 7c18f27003..65c9ed3896 100644
--- a/compiler/GHC/Core/Opt/ConstantFold.hs
+++ b/compiler/GHC/Core/Opt/ConstantFold.hs
@@ -66,6 +66,7 @@ import qualified Data.ByteString as BS
import Data.Int
import Data.Ratio
import Data.Word
+import Data.Maybe (fromMaybe)
{-
Note [Constant folding]
@@ -1257,6 +1258,8 @@ builtinRules
ru_nargs = 4, ru_try = match_append_lit },
BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName,
ru_nargs = 2, ru_try = match_eq_string },
+ BuiltinRule { ru_name = fsLit "CStringLength", ru_fn = cstringLengthName,
+ ru_nargs = 1, ru_try = match_cstring_length },
BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName,
ru_nargs = 2, ru_try = \_ _ _ -> match_inline },
BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId,
@@ -1477,6 +1480,30 @@ match_eq_string _ id_unf _
match_eq_string _ _ _ _ = Nothing
+-----------------------------------------------------------------------
+-- Illustration of this rule:
+--
+-- cstringLength# "foobar"# --> 6
+-- cstringLength# "fizz\NULzz"# --> 4
+--
+-- Nota bene: Addr# literals are suffixed by a NUL byte when they are
+-- compiled to read-only data sections. That's why cstringLength# is
+-- well defined on Addr# literals that do not explicitly have an embedded
+-- NUL byte.
+--
+-- See GHC issue #5218, MR 2165, and bytestring PR 191. This is particularly
+-- helpful when using OverloadedStrings to create a ByteString since the
+-- function computing the length of such ByteStrings can often be constant
+-- folded.
+match_cstring_length :: RuleFun
+match_cstring_length env id_unf _ [lit1]
+ | Just (LitString str) <- exprIsLiteral_maybe id_unf lit1
+ -- If elemIndex returns Just, it has the index of the first embedded NUL
+ -- in the string. If no NUL bytes are present (the common case) then use
+ -- full length of the byte string.
+ = let len = fromMaybe (BS.length str) (BS.elemIndex 0 str)
+ in Just (Lit (mkLitInt (roPlatform env) (fromIntegral len)))
+match_cstring_length _ _ _ _ = Nothing
---------------------------------------------------
-- The rule is this:
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index 82f38601f5..a8ffaff619 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -128,8 +128,9 @@ import Foreign
import GHC.Conc.Sync (sharedCAF)
#endif
-import GHC.Base ( unpackCString#, unpackNBytes# )
-
+#if __GLASGOW_HASKELL__ < 811
+import GHC.Base (unpackCString#,unpackNBytes#)
+#endif
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> ByteString
diff --git a/compiler/GHC/Hs/Lit.hs b/compiler/GHC/Hs/Lit.hs
index 75e5c1d315..78155289d0 100644
--- a/compiler/GHC/Hs/Lit.hs
+++ b/compiler/GHC/Hs/Lit.hs
@@ -53,7 +53,7 @@ data HsLit x
-- ^ Unboxed character
| HsString (XHsString x) {- SourceText -} FastString
-- ^ String
- | HsStringPrim (XHsStringPrim x) {- SourceText -} ByteString
+ | HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString
-- ^ Packed bytes
| HsInt (XHsInt x) IntegralLit
-- ^ Genuinely an Int; arises from
diff --git a/compiler/GHC/ThToHs.hs b/compiler/GHC/ThToHs.hs
index 453106eaec..359f8d0606 100644
--- a/compiler/GHC/ThToHs.hs
+++ b/compiler/GHC/ThToHs.hs
@@ -6,6 +6,7 @@
This module converts Template Haskell syntax into Hs syntax
-}
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
@@ -1232,8 +1233,7 @@ cvtLit (CharPrimL c) = do { force c; return $ HsCharPrim NoSourceText c }
cvtLit (StringL s) = do { let { s' = mkFastString s }
; force s'
; return $ HsString (quotedSourceText s) s' }
-cvtLit (StringPrimL s) = do { let { s' = BS.pack s }
- ; force s'
+cvtLit (StringPrimL s) = do { let { !s' = BS.pack s }
; return $ HsStringPrim NoSourceText s' }
cvtLit (BytesPrimL (Bytes fptr off sz)) = do
let bs = unsafePerformIO $ withForeignPtr fptr $ \ptr ->
diff --git a/compiler/GHC/Types/Literal.hs b/compiler/GHC/Types/Literal.hs
index c31f6349db..c57cc2bb97 100644
--- a/compiler/GHC/Types/Literal.hs
+++ b/compiler/GHC/Types/Literal.hs
@@ -114,7 +114,7 @@ data Literal
-- See Note [Types of LitNumbers] below for the
-- Type field.
- | LitString ByteString -- ^ A string-literal: stored and emitted
+ | LitString !ByteString -- ^ A string-literal: stored and emitted
-- UTF-8 encoded, we'll arrange to decode it
-- at runtime. Also emitted with a @\'\\0\'@
-- terminator. Create with 'mkLitString'