summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2017-10-24 12:19:08 -0400
committerBen Gamari <ben@smart-cactus.org>2018-07-30 17:58:42 -0400
commit56590db07a776ce81eb89d4a4d86bd0f953fb44e (patch)
treec691444111b78ea86bd79ccfec147c6ba2c3e477
parenta698bbfe47c4ae5c93fc54c033072d1bbd7abf17 (diff)
downloadhaskell-56590db07a776ce81eb89d4a4d86bd0f953fb44e.tar.gz
base: Make Foreign.Marshal.Alloc.allocBytes[Aligned] NOINLINE
As noted in #14346, touch# may be optimized away when the simplifier can see that the continuation passed to allocaBytes will not return. Marking CPS-style functions with NOINLINE ensures that the simplier can't draw any unsound conclusions. Ultimately the right solution here will be to do away with touch# and instead introduce a scoped primitive as is suggested in #14375. Note: This was present in 8.2 but was never merged to 8.4 in hopes that we would have #14375 implemented in time. This meant that the issue regressed again in 8.4. Thankfully we caught it in time to fix it for 8.6. (cherry picked from commit 404bf05ed3193e918875cd2f6c95ae0da5989be2)
-rw-r--r--libraries/base/Foreign/Marshal/Alloc.hs17
-rw-r--r--testsuite/tests/perf/should_run/all.T3
2 files changed, 19 insertions, 1 deletions
diff --git a/libraries/base/Foreign/Marshal/Alloc.hs b/libraries/base/Foreign/Marshal/Alloc.hs
index 48ed7fb497..c32f0b62d7 100644
--- a/libraries/base/Foreign/Marshal/Alloc.hs
+++ b/libraries/base/Foreign/Marshal/Alloc.hs
@@ -116,6 +116,19 @@ alloca :: forall a b . Storable a => (Ptr a -> IO b) -> IO b
alloca =
allocaBytesAligned (sizeOf (undefined :: a)) (alignment (undefined :: a))
+-- Note [NOINLINE for touch#]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Both allocaBytes and allocaBytesAligned use the touch#, which is notoriously
+-- fragile in the presence of simplification (see #14346). In particular, the
+-- simplifier may drop the continuation containing the touch# if it can prove
+-- that the action passed to allocaBytes will not return. The hack introduced to
+-- fix this for 8.2.2 is to mark allocaBytes as NOINLINE, ensuring that the
+-- simplifier can't see the divergence.
+--
+-- These can be removed once #14375 is fixed, which suggests that we instead do
+-- away with touch# in favor of a primitive that will capture the scoping left
+-- implicit in the case of touch#.
+
-- |@'allocaBytes' n f@ executes the computation @f@, passing as argument
-- a pointer to a temporarily allocated block of memory of @n@ bytes.
-- The block of memory is sufficiently aligned for any of the basic
@@ -134,6 +147,8 @@ allocaBytes (I# size) action = IO $ \ s0 ->
case touch# barr# s3 of { s4 ->
(# s4, r #)
}}}}}
+-- See Note [NOINLINE for touch#]
+{-# NOINLINE allocaBytes #-}
allocaBytesAligned :: Int -> Int -> (Ptr a -> IO b) -> IO b
allocaBytesAligned (I# size) (I# align) action = IO $ \ s0 ->
@@ -145,6 +160,8 @@ allocaBytesAligned (I# size) (I# align) action = IO $ \ s0 ->
case touch# barr# s3 of { s4 ->
(# s4, r #)
}}}}}
+-- See Note [NOINLINE for touch#]
+{-# NOINLINE allocaBytesAligned #-}
-- |Resize a memory area that was allocated with 'malloc' or 'mallocBytes'
-- to the size needed to store values of type @b@. The returned pointer
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 7a52492f45..9705a08f80 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -466,11 +466,12 @@ test('T9203',
# 2016-04-06 84345136 (i386/Debian) not sure
# 2017-03-24 77969268 (x86/Linux, 64-bit machine) probably join points
- , (wordsize(64), 84620888, 5) ]),
+ , (wordsize(64), 98360576, 5) ]),
# was 95747304
# 2019-09-10 94547280 post-AMP cleanup
# 2015-10-28 95451192 emit Typeable at definition site
# 2016-12-19 84620888 Join points
+ # 2018-07-30 98360576 it's unclear
only_ways(['normal'])],
compile_and_run,
['-O2'])