summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Martin <andrew.thaddeus@gmail.com>2019-11-13 11:20:05 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-05-23 13:37:01 -0400
commit49301ad6226d9a83d110bee8c419615dd94f5ded (patch)
tree907c00e2c81d1f2025ad569cedf2bc39833bcb07
parentd830bbc9921bcc59164a0a18f0e0874ae4ce226e (diff)
downloadhaskell-49301ad6226d9a83d110bee8c419615dd94f5ded.tar.gz
Implement cstringLength# and FinalPtr
This function and its accompanying rule resolve issue #5218. A future PR to the bytestring library will make the internal Data.ByteString.Internal.unsafePackAddress compute string length with cstringLength#. This will improve the status quo because it is eligible for constant folding. Additionally, introduce a new data constructor to ForeignPtrContents named FinalPtr. This additional data constructor, when used in the IsString instance for ByteString, leads to more Core-to-Core optimization opportunities, fewer runtime allocations, and smaller binaries. Also, this commit re-exports all the functions from GHC.CString (including cstringLength#) in GHC.Exts. It also adds a new test driver. This test driver is used to perform substring matches on Core that is dumped after all the simplifier passes. In this commit, it is used to check that constant folding of cstringLength# works.
-rw-r--r--compiler/GHC/Builtin/Names.hs7
-rw-r--r--compiler/GHC/Core/Opt/ConstantFold.hs27
-rw-r--r--compiler/GHC/Data/FastString.hs5
-rw-r--r--compiler/GHC/Hs/Lit.hs2
-rw-r--r--compiler/GHC/ThToHs.hs4
-rw-r--r--compiler/GHC/Types/Literal.hs2
-rw-r--r--docs/users_guide/8.12.1-notes.rst12
-rwxr-xr-xlibraries/base/GHC/Exts.hs8
-rw-r--r--libraries/base/GHC/ForeignPtr.hs190
-rw-r--r--libraries/ghc-prim/GHC/CString.hs19
-rw-r--r--libraries/ghc-prim/changelog.md8
-rw-r--r--testsuite/.gitignore1
-rw-r--r--testsuite/driver/testlib.py20
-rw-r--r--testsuite/tests/primops/should_gen_core/CStringLength_core.hs11
-rw-r--r--testsuite/tests/primops/should_gen_core/CStringLength_core.substr-simpl1
-rw-r--r--testsuite/tests/primops/should_gen_core/all.T1
-rw-r--r--testsuite/tests/primops/should_run/CStringLength.hs33
-rw-r--r--testsuite/tests/primops/should_run/CStringLength.stdout4
-rw-r--r--testsuite/tests/primops/should_run/all.T1
19 files changed, 328 insertions, 28 deletions
diff --git a/compiler/GHC/Builtin/Names.hs b/compiler/GHC/Builtin/Names.hs
index 14cfc22cc1..f7275e4698 100644
--- a/compiler/GHC/Builtin/Names.hs
+++ b/compiler/GHC/Builtin/Names.hs
@@ -349,6 +349,7 @@ basicKnownKeyNames
-- Strings and lists
unpackCStringName,
unpackCStringFoldrName, unpackCStringUtf8Name,
+ cstringLengthName,
-- Overloaded lists
isListClassName,
@@ -1014,10 +1015,11 @@ modIntName = varQual gHC_CLASSES (fsLit "modInt#") modIntIdKey
-- Base strings Strings
unpackCStringName, unpackCStringFoldrName,
- unpackCStringUtf8Name, eqStringName :: Name
+ unpackCStringUtf8Name, eqStringName, cstringLengthName :: Name
unpackCStringName = varQual gHC_CSTRING (fsLit "unpackCString#") unpackCStringIdKey
unpackCStringFoldrName = varQual gHC_CSTRING (fsLit "unpackFoldrCString#") unpackCStringFoldrIdKey
unpackCStringUtf8Name = varQual gHC_CSTRING (fsLit "unpackCStringUtf8#") unpackCStringUtf8IdKey
+cstringLengthName = varQual gHC_CSTRING (fsLit "cstringLength#") cstringLengthIdKey
eqStringName = varQual gHC_BASE (fsLit "eqString") eqStringIdKey
-- The 'inline' function
@@ -2097,7 +2099,7 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
unpackCStringFoldrIdKey, unpackCStringIdKey,
typeErrorIdKey, divIntIdKey, modIntIdKey,
- absentSumFieldErrorIdKey :: Unique
+ absentSumFieldErrorIdKey, cstringLengthIdKey :: Unique
wildCardKey = mkPreludeMiscIdUnique 0 -- See Note [WildCard binders]
absentErrorIdKey = mkPreludeMiscIdUnique 1
@@ -2124,6 +2126,7 @@ voidPrimIdKey = mkPreludeMiscIdUnique 21
typeErrorIdKey = mkPreludeMiscIdUnique 22
divIntIdKey = mkPreludeMiscIdUnique 23
modIntIdKey = mkPreludeMiscIdUnique 24
+cstringLengthIdKey = mkPreludeMiscIdUnique 25
concatIdKey, filterIdKey, zipIdKey,
bindIOIdKey, returnIOIdKey, newStablePtrIdKey,
diff --git a/compiler/GHC/Core/Opt/ConstantFold.hs b/compiler/GHC/Core/Opt/ConstantFold.hs
index 7c18f27003..65c9ed3896 100644
--- a/compiler/GHC/Core/Opt/ConstantFold.hs
+++ b/compiler/GHC/Core/Opt/ConstantFold.hs
@@ -66,6 +66,7 @@ import qualified Data.ByteString as BS
import Data.Int
import Data.Ratio
import Data.Word
+import Data.Maybe (fromMaybe)
{-
Note [Constant folding]
@@ -1257,6 +1258,8 @@ builtinRules
ru_nargs = 4, ru_try = match_append_lit },
BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName,
ru_nargs = 2, ru_try = match_eq_string },
+ BuiltinRule { ru_name = fsLit "CStringLength", ru_fn = cstringLengthName,
+ ru_nargs = 1, ru_try = match_cstring_length },
BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName,
ru_nargs = 2, ru_try = \_ _ _ -> match_inline },
BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId,
@@ -1477,6 +1480,30 @@ match_eq_string _ id_unf _
match_eq_string _ _ _ _ = Nothing
+-----------------------------------------------------------------------
+-- Illustration of this rule:
+--
+-- cstringLength# "foobar"# --> 6
+-- cstringLength# "fizz\NULzz"# --> 4
+--
+-- Nota bene: Addr# literals are suffixed by a NUL byte when they are
+-- compiled to read-only data sections. That's why cstringLength# is
+-- well defined on Addr# literals that do not explicitly have an embedded
+-- NUL byte.
+--
+-- See GHC issue #5218, MR 2165, and bytestring PR 191. This is particularly
+-- helpful when using OverloadedStrings to create a ByteString since the
+-- function computing the length of such ByteStrings can often be constant
+-- folded.
+match_cstring_length :: RuleFun
+match_cstring_length env id_unf _ [lit1]
+ | Just (LitString str) <- exprIsLiteral_maybe id_unf lit1
+ -- If elemIndex returns Just, it has the index of the first embedded NUL
+ -- in the string. If no NUL bytes are present (the common case) then use
+ -- full length of the byte string.
+ = let len = fromMaybe (BS.length str) (BS.elemIndex 0 str)
+ in Just (Lit (mkLitInt (roPlatform env) (fromIntegral len)))
+match_cstring_length _ _ _ _ = Nothing
---------------------------------------------------
-- The rule is this:
diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs
index 82f38601f5..a8ffaff619 100644
--- a/compiler/GHC/Data/FastString.hs
+++ b/compiler/GHC/Data/FastString.hs
@@ -128,8 +128,9 @@ import Foreign
import GHC.Conc.Sync (sharedCAF)
#endif
-import GHC.Base ( unpackCString#, unpackNBytes# )
-
+#if __GLASGOW_HASKELL__ < 811
+import GHC.Base (unpackCString#,unpackNBytes#)
+#endif
-- | Gives the UTF-8 encoded bytes corresponding to a 'FastString'
bytesFS :: FastString -> ByteString
diff --git a/compiler/GHC/Hs/Lit.hs b/compiler/GHC/Hs/Lit.hs
index 75e5c1d315..78155289d0 100644
--- a/compiler/GHC/Hs/Lit.hs
+++ b/compiler/GHC/Hs/Lit.hs
@@ -53,7 +53,7 @@ data HsLit x
-- ^ Unboxed character
| HsString (XHsString x) {- SourceText -} FastString
-- ^ String
- | HsStringPrim (XHsStringPrim x) {- SourceText -} ByteString
+ | HsStringPrim (XHsStringPrim x) {- SourceText -} !ByteString
-- ^ Packed bytes
| HsInt (XHsInt x) IntegralLit
-- ^ Genuinely an Int; arises from
diff --git a/compiler/GHC/ThToHs.hs b/compiler/GHC/ThToHs.hs
index 453106eaec..359f8d0606 100644
--- a/compiler/GHC/ThToHs.hs
+++ b/compiler/GHC/ThToHs.hs
@@ -6,6 +6,7 @@
This module converts Template Haskell syntax into Hs syntax
-}
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
@@ -1232,8 +1233,7 @@ cvtLit (CharPrimL c) = do { force c; return $ HsCharPrim NoSourceText c }
cvtLit (StringL s) = do { let { s' = mkFastString s }
; force s'
; return $ HsString (quotedSourceText s) s' }
-cvtLit (StringPrimL s) = do { let { s' = BS.pack s }
- ; force s'
+cvtLit (StringPrimL s) = do { let { !s' = BS.pack s }
; return $ HsStringPrim NoSourceText s' }
cvtLit (BytesPrimL (Bytes fptr off sz)) = do
let bs = unsafePerformIO $ withForeignPtr fptr $ \ptr ->
diff --git a/compiler/GHC/Types/Literal.hs b/compiler/GHC/Types/Literal.hs
index c31f6349db..c57cc2bb97 100644
--- a/compiler/GHC/Types/Literal.hs
+++ b/compiler/GHC/Types/Literal.hs
@@ -114,7 +114,7 @@ data Literal
-- See Note [Types of LitNumbers] below for the
-- Type field.
- | LitString ByteString -- ^ A string-literal: stored and emitted
+ | LitString !ByteString -- ^ A string-literal: stored and emitted
-- UTF-8 encoded, we'll arrange to decode it
-- at runtime. Also emitted with a @\'\\0\'@
-- terminator. Create with 'mkLitString'
diff --git a/docs/users_guide/8.12.1-notes.rst b/docs/users_guide/8.12.1-notes.rst
index b98b1f283b..dc666f8064 100644
--- a/docs/users_guide/8.12.1-notes.rst
+++ b/docs/users_guide/8.12.1-notes.rst
@@ -144,6 +144,9 @@ Arrow notation
``ghc-prim`` library
~~~~~~~~~~~~~~~~~~~~
+- Add a known-key ``cstringLength#`` to ``GHC.CString`` that is eligible
+ for constant folding by a built-in rule.
+
``ghc`` library
~~~~~~~~~~~~~~~
@@ -181,6 +184,15 @@ Arrow notation
``base`` library
~~~~~~~~~~~~~~~~
+- ``ForeignPtrContents`` has a new nullary data constructor ``FinalPtr``.
+ ``FinalPtr`` is intended for turning a primitive string literal into a
+ ``ForeignPtr``. Unlike ``PlainForeignPtr``, ``FinalPtr`` does not have
+ a finalizer. Replacing ``PlainForeignPtr`` that has ``NoFinalizers`` with
+ ``FinalPtr`` reduces allocations, reduces the size of compiled binaries,
+ and unlocks important Core-to-Core optimizations. ``FinalPtr`` will be used
+ in an upcoming ``bytestring`` release to improve the performance of
+ ``ByteString`` literals created with ``OverloadedStrings``.
+
Build system
~~~~~~~~~~~~
diff --git a/libraries/base/GHC/Exts.hs b/libraries/base/GHC/Exts.hs
index 894ffad509..8f878b813b 100755
--- a/libraries/base/GHC/Exts.hs
+++ b/libraries/base/GHC/Exts.hs
@@ -54,6 +54,14 @@ module GHC.Exts
-- * Overloaded string literals
IsString(..),
+ -- * CString
+ unpackCString#,
+ unpackAppendCString#,
+ unpackFoldrCString#,
+ unpackCStringUtf8#,
+ unpackNBytes#,
+ cstringLength#,
+
-- * Debugging
breakpoint, breakpointCond,
diff --git a/libraries/base/GHC/ForeignPtr.hs b/libraries/base/GHC/ForeignPtr.hs
index 92aef540d1..9ba6a2b017 100644
--- a/libraries/base/GHC/ForeignPtr.hs
+++ b/libraries/base/GHC/ForeignPtr.hs
@@ -23,11 +23,13 @@
module GHC.ForeignPtr
(
+ -- * Types
ForeignPtr(..),
ForeignPtrContents(..),
Finalizers(..),
FinalizerPtr,
FinalizerEnvPtr,
+ -- * Create
newForeignPtr_,
mallocForeignPtr,
mallocPlainForeignPtr,
@@ -35,15 +37,20 @@ module GHC.ForeignPtr
mallocPlainForeignPtrBytes,
mallocForeignPtrAlignedBytes,
mallocPlainForeignPtrAlignedBytes,
+ newConcForeignPtr,
+ -- * Add Finalizers
addForeignPtrFinalizer,
addForeignPtrFinalizerEnv,
- touchForeignPtr,
+ addForeignPtrConcFinalizer,
+ -- * Conversion
unsafeForeignPtrToPtr,
castForeignPtr,
plusForeignPtr,
- newConcForeignPtr,
- addForeignPtrConcFinalizer,
+ -- * Finalization
+ touchForeignPtr,
finalizeForeignPtr
+ -- * Commentary
+ -- $commentary
) where
import Foreign.Storable
@@ -86,15 +93,121 @@ data ForeignPtr a = ForeignPtr Addr# ForeignPtrContents
-- object, because that ensures that whatever the finalizer is
-- attached to is kept alive.
+-- | Functions called when a 'ForeignPtr' is finalized. Note that
+-- C finalizers and Haskell finalizers cannot be mixed.
data Finalizers
= NoFinalizers
+ -- ^ No finalizer. If there is no intent to add a finalizer at
+ -- any point in the future, consider 'FinalPtr' or 'PlainPtr' instead
+ -- since these perform fewer allocations.
| CFinalizers (Weak# ())
+ -- ^ Finalizers are all C functions.
| HaskellFinalizers [IO ()]
+ -- ^ Finalizers are all Haskell functions.
+-- | Controls finalization of a 'ForeignPtr', that is, what should happen
+-- if the 'ForeignPtr' becomes unreachable. Visually, these data constructors
+-- are appropriate in these scenarios:
+--
+-- > Memory backing pointer is
+-- > GC-Managed Unmanaged
+-- > Finalizer functions are: +------------+-----------------+
+-- > Allowed | MallocPtr | PlainForeignPtr |
+-- > +------------+-----------------+
+-- > Prohibited | PlainPtr | FinalPtr |
+-- > +------------+-----------------+
data ForeignPtrContents
= PlainForeignPtr !(IORef Finalizers)
- | MallocPtr (MutableByteArray# RealWorld) !(IORef Finalizers)
- | PlainPtr (MutableByteArray# RealWorld)
+ -- ^ The pointer refers to unmanaged memory that was allocated by
+ -- a foreign function (typically using @malloc@). The finalizer
+ -- frequently calls the C function @free@ or some variant of it.
+ | FinalPtr
+ -- ^ The pointer refers to unmanaged memory that should not be freed when
+ -- the 'ForeignPtr' becomes unreachable. Functions that add finalizers
+ -- to a 'ForeignPtr' throw exceptions when the 'ForeignPtr' is backed by
+ -- 'PlainPtr'Most commonly, this is used with @Addr#@ literals.
+ -- See Note [Why FinalPtr].
+ --
+ -- @since 4.15
+ | MallocPtr (MutableByteArray# RealWorld) !(IORef Finalizers)
+ -- ^ The pointer refers to a byte array.
+ -- The 'MutableByteArray#' field means that the 'MutableByteArray#' is
+ -- reachable (by GC) whenever the 'ForeignPtr' is reachable. When the
+ -- 'ForeignPtr' becomes unreachable, the runtime\'s normal GC recovers
+ -- the memory backing it. Here, the finalizer function intended to be used
+ -- to @free()@ any ancilliary *unmanaged* memory pointed to by the
+ -- 'MutableByteArray#'. See the @zlib@ library for an example of this use.
+ --
+ -- 1. Invariant: The 'Addr#' in the parent 'ForeignPtr' is an interior
+ -- pointer into this 'MutableByteArray#'.
+ -- 2. Invariant: The 'MutableByteArray#' is pinned, so the 'Addr#' does not
+ -- get invalidated by the GC moving the byte array.
+ -- 3. Invariant: A 'MutableByteArray#' must not be associated with more than
+ -- one set of finalizers. For example, this is sound:
+ --
+ -- > incrGood :: ForeignPtr Word8 -> ForeignPtr Word8
+ -- > incrGood (ForeignPtr p (MallocPtr m f)) = ForeignPtr (plusPtr p 1) (MallocPtr m f)
+ --
+ -- But this is unsound:
+ --
+ -- > incrBad :: ForeignPtr Word8 -> IO (ForeignPtr Word8)
+ -- > incrBad (ForeignPtr p (MallocPtr m _)) = do
+ -- > f <- newIORef NoFinalizers
+ -- > pure (ForeignPtr p (MallocPtr m f))
+ | PlainPtr (MutableByteArray# RealWorld)
+ -- ^ The pointer refers to a byte array. Finalization is not
+ -- supported. This optimizes @MallocPtr@ by avoiding the allocation
+ -- of a @MutVar#@ when it is known that no one will add finalizers to
+ -- the @ForeignPtr@. Functions that add finalizers to a 'ForeignPtr'
+ -- throw exceptions when the 'ForeignPtr' is backed by 'PlainPtr'.
+ -- The invariants that apply to 'MallocPtr' apply to 'PlainPtr' as well.
+
+-- Note [Why FinalPtr]
+--
+-- FinalPtr exists as an optimization for foreign pointers created
+-- from Addr# literals. Most commonly, this happens in the bytestring
+-- library, where the combination of OverloadedStrings and a rewrite
+-- rule overloads String literals as ByteString literals. See the
+-- rule "ByteString packChars/packAddress" in
+-- bytestring:Data.ByteString.Internal. Prior to the
+-- introduction of FinalPtr, bytestring used PlainForeignPtr (in
+-- Data.ByteString.Internal.unsafePackAddress) to handle such literals.
+-- With O2 optimization, the resulting Core from a GHC patched with a
+-- known-key cstringLength# function but without FinalPtr looked like:
+--
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+-- stringOne1 = "hello beautiful world"#
+-- RHS size: {terms: 11, types: 17, coercions: 0, joins: 0/0}
+-- stringOne
+-- = case newMutVar# NoFinalizers realWorld# of
+-- { (# ipv_i7b6, ipv1_i7b7 #) ->
+-- PS stringOne1 (PlainForeignPtr ipv1_i7b7) 0# 21#
+-- }
+--
+-- After the introduction of FinalPtr, the bytestring library was modified
+-- so that the resulting Core was instead:
+--
+-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
+-- stringOne1 = "hello beautiful world"#
+-- RHS size: {terms: 5, types: 0, coercions: 0, joins: 0/0}
+-- stringOne = PS stringOne1 FinalPtr 0# 21#
+--
+-- This improves performance in three ways:
+--
+-- 1. More optimization opportunities. GHC is willing to inline the FinalPtr
+-- variant of stringOne into its use sites. This means the offset and length
+-- are eligible for case-of-known-literal. Previously, this never happened.
+-- 2. Smaller binaries. Setting up the thunk to call newMutVar# required
+-- machine instruction in the generated code. On x86_64, FinalPtr reduces
+-- the size of binaries by about 450 bytes per ByteString literal.
+-- 3. Smaller memory footprint. Previously, every ByteString literal resulted
+-- in the allocation of a MutVar# and a PlainForeignPtr data constructor.
+-- These both hang around until the ByteString goes out of scope. FinalPtr
+-- eliminates both of these sources of allocations. The MutVar# is not
+-- allocated because FinalPtr does not allow it, and the data constructor
+-- is not allocated because FinalPtr is a nullary data constructor.
+--
+-- For more discussion of FinalPtr, see GHC MR #2165 and bytestring PR #191.
-- | @since 2.01
instance Eq (ForeignPtr a) where
@@ -259,7 +372,7 @@ addForeignPtrFinalizer :: FinalizerPtr a -> ForeignPtr a -> IO ()
addForeignPtrFinalizer (FunPtr fp) (ForeignPtr p c) = case c of
PlainForeignPtr r -> insertCFinalizer r fp 0# nullAddr# p ()
MallocPtr _ r -> insertCFinalizer r fp 0# nullAddr# p c
- _ -> errorWithoutStackTrace "GHC.ForeignPtr: attempt to add a finalizer to a plain pointer"
+ _ -> errorWithoutStackTrace "GHC.ForeignPtr: attempt to add a finalizer to a plain pointer or a final pointer"
-- Note [MallocPtr finalizers] (#10904)
--
@@ -277,7 +390,7 @@ addForeignPtrFinalizerEnv ::
addForeignPtrFinalizerEnv (FunPtr fp) (Ptr ep) (ForeignPtr p c) = case c of
PlainForeignPtr r -> insertCFinalizer r fp 1# ep p ()
MallocPtr _ r -> insertCFinalizer r fp 1# ep p c
- _ -> errorWithoutStackTrace "GHC.ForeignPtr: attempt to add a finalizer to a plain pointer"
+ _ -> errorWithoutStackTrace "GHC.ForeignPtr: attempt to add a finalizer to a plain pointer or a final pointer"
addForeignPtrConcFinalizer :: ForeignPtr a -> IO () -> IO ()
-- ^This function adds a finalizer to the given @ForeignPtr@. The
@@ -319,7 +432,7 @@ addForeignPtrConcFinalizer_ f@(MallocPtr fo r) finalizer = do
finalizer' = unIO (foreignPtrFinalizer r >> touch f)
addForeignPtrConcFinalizer_ _ _ =
- errorWithoutStackTrace "GHC.ForeignPtr: attempt to add a finalizer to plain pointer"
+ errorWithoutStackTrace "GHC.ForeignPtr: attempt to add a finalizer to plain pointer or a final pointer"
insertHaskellFinalizer :: IORef Finalizers -> IO () -> IO Bool
insertHaskellFinalizer r f = do
@@ -345,6 +458,8 @@ insertCFinalizer r fp flag ep p val = do
-- replaced the content of r before calling finalizeWeak#.
(# s1, _ #) -> unIO (insertCFinalizer r fp flag ep p val) s1
+-- Read the weak reference from an IORef Finalizers, creating it if necessary.
+-- Throws an exception if HaskellFinalizers is encountered.
ensureCFinalizerWeak :: IORef Finalizers -> value -> IO MyWeak
ensureCFinalizerWeak ref@(IORef (STRef r#)) value = do
fin <- readIORef ref
@@ -370,6 +485,7 @@ noMixingError = errorWithoutStackTrace $
"GHC.ForeignPtr: attempt to mix Haskell and C finalizers " ++
"in the same ForeignPtr"
+-- Swap out the finalizers with NoFinalizers and then run them.
foreignPtrFinalizer :: IORef Finalizers -> IO ()
foreignPtrFinalizer r = do
fs <- atomicSwapIORef r NoFinalizers
@@ -455,13 +571,53 @@ plusForeignPtr :: ForeignPtr a -> Int -> ForeignPtr b
plusForeignPtr (ForeignPtr addr c) (I# d) = ForeignPtr (plusAddr# addr d) c
-- | Causes the finalizers associated with a foreign pointer to be run
--- immediately.
+-- immediately. The foreign pointer must not be used again after this
+-- function is called.
finalizeForeignPtr :: ForeignPtr a -> IO ()
-finalizeForeignPtr (ForeignPtr _ (PlainPtr _)) = return () -- no effect
-finalizeForeignPtr (ForeignPtr _ foreignPtr) = foreignPtrFinalizer refFinalizers
- where
- refFinalizers = case foreignPtr of
- (PlainForeignPtr ref) -> ref
- (MallocPtr _ ref) -> ref
- PlainPtr _ ->
- errorWithoutStackTrace "finalizeForeignPtr PlainPtr"
+finalizeForeignPtr (ForeignPtr _ c) = case c of
+ PlainForeignPtr ref -> foreignPtrFinalizer ref
+ MallocPtr _ ref -> foreignPtrFinalizer ref
+ _ -> errorWithoutStackTrace "finalizeForeignPtr PlainPtr"
+
+{- $commentary
+
+This is a high-level overview of how 'ForeignPtr' works.
+The implementation of 'ForeignPtr' must accomplish several goals:
+
+1. Invoke a finalizer once a foreign pointer becomes unreachable.
+2. Support augmentation of finalizers, i.e. 'addForeignPtrFinalizer'.
+ As a motivating example, suppose that the payload of a foreign
+ pointer is C struct @bar@ that has an optionally NULL pointer field
+ @foo@ to an unmanaged heap object. Initially, @foo@ is NULL, and
+ later the program uses @malloc@, initializes the object, and assigns
+ @foo@ the address returned by @malloc@. When the foreign pointer
+ becomes unreachable, it is now necessary to first @free@ the object
+ pointed to by @foo@ and then invoke whatever finalizer was associated
+ with @bar@. That is, finalizers must be invoked in the opposite order
+ they are added.
+3. Allow users to invoke a finalizer promptly if they know that the
+ foreign pointer is unreachable, i.e. 'finalizeForeignPtr'.
+
+How can these goals be accomplished? Goal 1 suggests that weak references
+and finalizers (via 'Weak#' and 'mkWeak#') are necessary. But how should
+they be used and what should their key be? Certainly not 'ForeignPtr' or
+'ForeignPtrContents'. See the warning in "GHC.Weak" about weak pointers with
+lifted (non-primitive) keys. The two finalizer-supporting data constructors of
+'ForeignPtr' have an @'IORef' 'Finalizers'@ (backed by 'MutVar#') field.
+This gets used in two different ways depending on the kind of finalizer:
+
+* 'HaskellFinalizers': The first @addForeignPtrConcFinalizer_@ call uses
+ 'mkWeak#' to attach the finalizer @foreignPtrFinalizer@ to the 'MutVar#'.
+ The resulting 'Weak#' is discarded (see @addForeignPtrConcFinalizer_@).
+ Subsequent calls to @addForeignPtrConcFinalizer_@ (goal 2) just add
+ finalizers onto the list in the 'HaskellFinalizers' data constructor.
+* 'CFinalizers': The first 'addForeignPtrFinalizer' call uses
+ 'mkWeakNoFinalizer#' to create a 'Weak#'. The 'Weak#' is preserved in the
+ 'CFinalizers' data constructor. Both the first call and subsequent
+ calls (goal 2) use 'addCFinalizerToWeak#' to attach finalizers to the
+ 'Weak#' itself. Also, see Note [MallocPtr finalizers] for discussion of
+ the key and value of this 'Weak#'.
+
+In either case, the runtime invokes the appropriate finalizers when the
+'ForeignPtr' becomes unreachable.
+-}
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index 8c0d272a67..514fb0e9f9 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -1,5 +1,4 @@
-{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns #-}
-
+{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns, UnliftedFFITypes #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.CString
@@ -18,7 +17,7 @@
module GHC.CString (
unpackCString#, unpackAppendCString#, unpackFoldrCString#,
- unpackCStringUtf8#, unpackNBytes#
+ unpackCStringUtf8#, unpackNBytes#, cstringLength#
) where
import GHC.Types
@@ -174,3 +173,17 @@ unpackNBytes# addr len# = unpack [] (len# -# 1#)
case indexCharOffAddr# addr i# of
ch -> unpack (C# ch : acc) (i# -# 1#)
+-- The return type is not correct here. We really want CSize,
+-- but that type is defined in base. However, CSize should always
+-- match the size of a machine word (I hope), so this is probably
+-- alright on all platforms that GHC supports.
+foreign import ccall unsafe "strlen" c_strlen :: Addr# -> Int#
+
+-- | Compute the length of a NUL-terminated string. This address
+-- must refer to immutable memory. GHC includes a built-in rule for
+-- constant folding when the argument is a statically-known literal.
+-- That is, a core-to-core pass reduces the expression
+-- @cstringLength# "hello"#@ to the constant @5#@.
+cstringLength# :: Addr# -> Int#
+{-# INLINE[0] cstringLength# #-}
+cstringLength# = c_strlen
diff --git a/libraries/ghc-prim/changelog.md b/libraries/ghc-prim/changelog.md
index cf14d21c81..9cfbe99dbe 100644
--- a/libraries/ghc-prim/changelog.md
+++ b/libraries/ghc-prim/changelog.md
@@ -1,3 +1,11 @@
+## 0.6.2 (edit as necessary)
+
+- Shipped with GHC 8.12.1
+
+- Add known-key `cstringLength#` to `GHC.CString`. This is just the
+ C function `strlen`, but a built-in rewrite rule allows GHC to
+ compute the result at compile time when the argument is known.
+
## 0.6.1 (edit as necessary)
- Shipped with GHC 8.10.1
diff --git a/testsuite/.gitignore b/testsuite/.gitignore
index 737c9f2385..76980608c2 100644
--- a/testsuite/.gitignore
+++ b/testsuite/.gitignore
@@ -43,6 +43,7 @@ Thumbs.db
*.prof.sample.normalised
*.run.stdout
*.run.stderr
+*.dump-simpl
*.hp
tests/**/*.ps
diff --git a/testsuite/driver/testlib.py b/testsuite/driver/testlib.py
index abc01fdf95..ad9c0852e9 100644
--- a/testsuite/driver/testlib.py
+++ b/testsuite/driver/testlib.py
@@ -1344,6 +1344,26 @@ def compile_grep_asm(name: TestName,
# no problems found, this test passed
return passed()
+def compile_grep_core(name: TestName,
+ way: WayName,
+ extra_hc_opts: str
+ ) -> PassFail:
+ print('Compile only, extra args = ', extra_hc_opts)
+ result = simple_build(name + '.hs', way, '-ddump-to-file -dsuppress-all -ddump-simpl -O ' + extra_hc_opts, False, None, False, False)
+
+ if badResult(result):
+ return result
+
+ expected_pat_file = find_expected_file(name, 'substr-simpl')
+ actual_core_file = add_suffix(name, 'dump-simpl')
+
+ if not grep_output(join_normalisers(normalise_errmsg),
+ expected_pat_file, actual_core_file):
+ return failBecause('simplified core mismatch')
+
+ # no problems found, this test passed
+ return passed()
+
# -----------------------------------------------------------------------------
# Compile-and-run tests
diff --git a/testsuite/tests/primops/should_gen_core/CStringLength_core.hs b/testsuite/tests/primops/should_gen_core/CStringLength_core.hs
new file mode 100644
index 0000000000..98d33d5f51
--- /dev/null
+++ b/testsuite/tests/primops/should_gen_core/CStringLength_core.hs
@@ -0,0 +1,11 @@
+{-# language MagicHash #-}
+
+module CStringLengthCore
+ ( ozymandias
+ ) where
+
+import GHC.Exts
+
+ozymandias :: Int
+ozymandias =
+ I# (cstringLength# "I met a traveller from an antique land"#)
diff --git a/testsuite/tests/primops/should_gen_core/CStringLength_core.substr-simpl b/testsuite/tests/primops/should_gen_core/CStringLength_core.substr-simpl
new file mode 100644
index 0000000000..4b33d6629d
--- /dev/null
+++ b/testsuite/tests/primops/should_gen_core/CStringLength_core.substr-simpl
@@ -0,0 +1 @@
+I# 38#
diff --git a/testsuite/tests/primops/should_gen_core/all.T b/testsuite/tests/primops/should_gen_core/all.T
new file mode 100644
index 0000000000..d66255d8a1
--- /dev/null
+++ b/testsuite/tests/primops/should_gen_core/all.T
@@ -0,0 +1 @@
+test('CStringLength_core', normal, compile_grep_core, [''])
diff --git a/testsuite/tests/primops/should_run/CStringLength.hs b/testsuite/tests/primops/should_run/CStringLength.hs
new file mode 100644
index 0000000000..b580e61934
--- /dev/null
+++ b/testsuite/tests/primops/should_run/CStringLength.hs
@@ -0,0 +1,33 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+import GHC.Exts
+
+main :: IO ()
+main = do
+ putStr "A: "
+ print $
+ I# (cstringLength# "hello_world"#)
+ ==
+ naiveStrlen "hello_world"# 0
+ putStr "B: "
+ print $
+ I# (cstringLength# "aaaaaaaaaaaaa\x00b"#)
+ ==
+ naiveStrlen "aaaaaaaaaaaaa\x00b"# 0
+ putStr "C: "
+ print $
+ I# (cstringLength# "cccccccccccccccccc\x00b"#)
+ ==
+ naiveStrlen "cccccccccccccccccc\x00b"# 0
+ putStr "D: "
+ print $
+ I# (cstringLength# "araña\NULb"#)
+ ==
+ naiveStrlen "araña\NULb"# 0
+
+naiveStrlen :: Addr# -> Int -> Int
+naiveStrlen addr !n = case indexWord8OffAddr# addr 0# of
+ 0## -> n
+ _ -> naiveStrlen (plusAddr# addr 1#) (n + 1)
diff --git a/testsuite/tests/primops/should_run/CStringLength.stdout b/testsuite/tests/primops/should_run/CStringLength.stdout
new file mode 100644
index 0000000000..9413913c01
--- /dev/null
+++ b/testsuite/tests/primops/should_run/CStringLength.stdout
@@ -0,0 +1,4 @@
+A: True
+B: True
+C: True
+D: True
diff --git a/testsuite/tests/primops/should_run/all.T b/testsuite/tests/primops/should_run/all.T
index 16579207fa..952145fd49 100644
--- a/testsuite/tests/primops/should_run/all.T
+++ b/testsuite/tests/primops/should_run/all.T
@@ -29,3 +29,4 @@ test('CmpWord16', normal, compile_and_run, [''])
test('ShrinkSmallMutableArrayA', normal, compile_and_run, [''])
test('ShrinkSmallMutableArrayB', normal, compile_and_run, [''])
test('T14664', normal, compile_and_run, [''])
+test('CStringLength', normal, compile_and_run, ['-O2'])