summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <bgamari.foss@gmail.com>2018-03-19 12:02:43 -0400
committerBen Gamari <ben@smart-cactus.org>2018-03-19 12:05:12 -0400
commit20cbb0165e4d18df510e707791e761942d3c10f0 (patch)
tree7f11e3bb32433b64db760bf1b2d65d281d53f9ab
parentd27336edcf2667c3c3908694247861beec3fc29b (diff)
downloadhaskell-20cbb0165e4d18df510e707791e761942d3c10f0.tar.gz
Improve accuracy of get/setAllocationCounter
Summary: get/setAllocationCounter didn't take into account allocations in the current block. This was known at the time, but it turns out to be important to have more accuracy when using these in a fine-grained way. Test Plan: New unit test to test incrementally larger allocaitons. Before I got results like this: ``` +0 +0 +0 +0 +0 +4096 +0 +0 +0 +0 +0 +4064 +0 +0 +4088 +4056 +0 +0 +0 +4088 +4096 +4056 +4096 ``` Notice how the results aren't always monotonically increasing. After this patch: ``` +344 +416 +488 +560 +632 +704 +776 +848 +920 +992 +1064 +1136 +1208 +1280 +1352 +1424 +1496 +1568 +1640 +1712 +1784 +1856 +1928 +2000 +2072 +2144 ``` Reviewers: hvr, erikd, simonmar, jrtc27, trommler Reviewed By: simonmar Subscribers: trommler, jrtc27, rwbarton, thomie, carter Differential Revision: https://phabricator.haskell.org/D4363
-rw-r--r--compiler/codeGen/StgCmmForeign.hs4
-rw-r--r--compiler/prelude/primops.txt.pp14
-rw-r--r--includes/Cmm.h2
-rw-r--r--includes/rts/Threads.h2
-rw-r--r--includes/stg/MiscClosures.h3
-rw-r--r--libraries/base/GHC/Conc/Sync.hs21
-rw-r--r--rts/PrimOps.cmm20
-rw-r--r--rts/RtsSymbols.c4
-rw-r--r--rts/Threads.c13
-rw-r--r--testsuite/tests/rts/all.T6
-rw-r--r--testsuite/tests/rts/alloccounter1.hs19
-rw-r--r--testsuite/tests/rts/alloccounter1.stdout1
12 files changed, 75 insertions, 34 deletions
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index b518c0790a..c1103e7d77 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -404,8 +404,8 @@ Opening the nursery corresponds to the following code:
@
tso = CurrentTSO;
cn = CurrentNursery;
- bdfree = CurrentNuresry->free;
- bdstart = CurrentNuresry->start;
+ bdfree = CurrentNursery->free;
+ bdstart = CurrentNursery->start;
// We *add* the currently occupied portion of the nursery block to
// the allocation limit, because we will subtract it again in
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 038d350a76..e580f989c1 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -2942,6 +2942,20 @@ primop TraceMarkerOp "traceMarker#" GenPrimOp
has_side_effects = True
out_of_line = True
+primop GetThreadAllocationCounter "getThreadAllocationCounter#" GenPrimOp
+ State# RealWorld -> (# State# RealWorld, INT64 #)
+ { Retrieves the allocation counter for the current thread. }
+ with
+ has_side_effects = True
+ out_of_line = True
+
+primop SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp
+ INT64 -> State# RealWorld -> State# RealWorld
+ { Sets the allocation counter for the current thread to the given value. }
+ with
+ has_side_effects = True
+ out_of_line = True
+
------------------------------------------------------------------------
section "Safe coercions"
------------------------------------------------------------------------
diff --git a/includes/Cmm.h b/includes/Cmm.h
index 57d78ccaa5..18b2aaf324 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -161,9 +161,11 @@
/* TO_W_(n) converts n to W_ type from a smaller type */
#if SIZEOF_W == 4
+#define TO_I64(x) %sx64(x)
#define TO_W_(x) %sx32(x)
#define HALF_W_(x) %lobits16(x)
#elif SIZEOF_W == 8
+#define TO_I64(x) (x)
#define TO_W_(x) %sx64(x)
#define HALF_W_(x) %lobits32(x)
#endif
diff --git a/includes/rts/Threads.h b/includes/rts/Threads.h
index fceacdc75d..f72f5ed121 100644
--- a/includes/rts/Threads.h
+++ b/includes/rts/Threads.h
@@ -43,8 +43,6 @@ StgRegTable * resumeThread (void *);
//
int cmp_thread (StgPtr tso1, StgPtr tso2);
int rts_getThreadId (StgPtr tso);
-HsInt64 rts_getThreadAllocationCounter (StgPtr tso);
-void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i);
void rts_enableThreadAllocationLimit (StgPtr tso);
void rts_disableThreadAllocationLimit (StgPtr tso);
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index 76cfbd6c8c..1fbfab9fbe 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -468,6 +468,9 @@ RTS_FUN_DECL(stg_traceCcszh);
RTS_FUN_DECL(stg_clearCCSzh);
RTS_FUN_DECL(stg_traceEventzh);
RTS_FUN_DECL(stg_traceMarkerzh);
+RTS_FUN_DECL(stg_getThreadAllocationCounterzh);
+RTS_FUN_DECL(stg_setThreadAllocationCounterzh);
+
/* Other misc stuff */
// See wiki:Commentary/Compiler/Backends/PprC#Prototypes
diff --git a/libraries/base/GHC/Conc/Sync.hs b/libraries/base/GHC/Conc/Sync.hs
index 517c20e45f..94601f356d 100644
--- a/libraries/base/GHC/Conc/Sync.hs
+++ b/libraries/base/GHC/Conc/Sync.hs
@@ -105,6 +105,7 @@ import Data.Maybe
import GHC.Base
import {-# SOURCE #-} GHC.IO.Handle ( hFlush )
import {-# SOURCE #-} GHC.IO.Handle.FD ( stdout )
+import GHC.Int
import GHC.IO
import GHC.IO.Encoding.UTF8
import GHC.IO.Exception
@@ -194,18 +195,16 @@ instance Ord ThreadId where
--
-- @since 4.8.0.0
setAllocationCounter :: Int64 -> IO ()
-setAllocationCounter i = do
- ThreadId t <- myThreadId
- rts_setThreadAllocationCounter t i
+setAllocationCounter (I64# i) = IO $ \s ->
+ case setThreadAllocationCounter# i s of s' -> (# s', () #)
-- | Return the current value of the allocation counter for the
-- current thread.
--
-- @since 4.8.0.0
getAllocationCounter :: IO Int64
-getAllocationCounter = do
- ThreadId t <- myThreadId
- rts_getThreadAllocationCounter t
+getAllocationCounter = IO $ \s ->
+ case getThreadAllocationCounter# s of (# s', ctr #) -> (# s', I64# ctr #)
-- | Enables the allocation counter to be treated as a limit for the
-- current thread. When the allocation limit is enabled, if the
@@ -242,16 +241,6 @@ disableAllocationLimit = do
ThreadId t <- myThreadId
rts_disableThreadAllocationLimit t
--- We cannot do these operations safely on another thread, because on
--- a 32-bit machine we cannot do atomic operations on a 64-bit value.
--- Therefore, we only expose APIs that allow getting and setting the
--- limit of the current thread.
-foreign import ccall unsafe "rts_setThreadAllocationCounter"
- rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO ()
-
-foreign import ccall unsafe "rts_getThreadAllocationCounter"
- rts_getThreadAllocationCounter :: ThreadId# -> IO Int64
-
foreign import ccall unsafe "rts_enableThreadAllocationLimit"
rts_enableThreadAllocationLimit :: ThreadId# -> IO ()
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 67a238488c..e3f6e4cd19 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -2491,3 +2491,23 @@ stg_traceMarkerzh ( W_ msg )
return ();
}
+
+stg_getThreadAllocationCounterzh ()
+{
+ // Account for the allocation in the current block
+ W_ offset;
+ offset = Hp - bdescr_start(CurrentNursery);
+ return (StgTSO_alloc_limit(CurrentTSO) - TO_I64(offset));
+}
+
+stg_setThreadAllocationCounterzh ( I64 counter )
+{
+ // Allocation in the current block will be subtracted by
+ // getThreadAllocationCounter#, so we have to offset any existing
+ // allocation here. See also openNursery/closeNursery in
+ // compiler/codeGen/StgCmmForeign.hs.
+ W_ offset;
+ offset = Hp - bdescr_start(CurrentNursery);
+ StgTSO_alloc_limit(CurrentTSO) = counter + TO_I64(offset);
+ return ();
+}
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index e53a056a4c..d5800fd336 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -743,8 +743,6 @@
SymI_HasProto(rts_isProfiled) \
SymI_HasProto(rts_isDynamic) \
SymI_HasProto(rts_setInCallCapability) \
- SymI_HasProto(rts_getThreadAllocationCounter) \
- SymI_HasProto(rts_setThreadAllocationCounter) \
SymI_HasProto(rts_enableThreadAllocationLimit) \
SymI_HasProto(rts_disableThreadAllocationLimit) \
SymI_HasProto(rts_setMainThread) \
@@ -895,6 +893,8 @@
SymI_HasProto(stg_traceCcszh) \
SymI_HasProto(stg_traceEventzh) \
SymI_HasProto(stg_traceMarkerzh) \
+ SymI_HasProto(stg_getThreadAllocationCounterzh) \
+ SymI_HasProto(stg_setThreadAllocationCounterzh) \
SymI_HasProto(getMonotonicNSec) \
SymI_HasProto(lockFile) \
SymI_HasProto(unlockFile) \
diff --git a/rts/Threads.c b/rts/Threads.c
index b76917773a..be6962246d 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso)
}
/* ---------------------------------------------------------------------------
- * Getting & setting the thread allocation limit
+ * Enabling and disabling the thread allocation limit
* ------------------------------------------------------------------------ */
-HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
-{
- // NB. doesn't take into account allocation in the current nursery
- // block, so it might be off by up to 4k.
- return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit));
-}
-
-void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
-{
- ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i);
-}
void rts_enableThreadAllocationLimit(StgPtr tso)
{
diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T
index cd70132610..ffbd05c745 100644
--- a/testsuite/tests/rts/all.T
+++ b/testsuite/tests/rts/all.T
@@ -392,3 +392,9 @@ test('T14702', [ ignore_stdout
test('T14900', normal, compile_and_run, ['-package ghc-compact'])
test('InternalCounters', normal, run_command,
['$MAKE -s --no-print-directory InternalCounters'])
+test('alloccounter1', normal, compile_and_run,
+ [
+ # avoid allocating stack chunks, which counts as
+ # allocation and messes up the results:
+ '-with-rtsopts=-k1m'
+ ])
diff --git a/testsuite/tests/rts/alloccounter1.hs b/testsuite/tests/rts/alloccounter1.hs
new file mode 100644
index 0000000000..4b81896d2c
--- /dev/null
+++ b/testsuite/tests/rts/alloccounter1.hs
@@ -0,0 +1,19 @@
+module Main where
+
+import Control.Exception
+import Control.Monad
+import Data.List
+import System.Mem
+
+main = do
+ let
+ testAlloc n = do
+ let start = 999999
+ setAllocationCounter start
+ evaluate (last [1..n])
+ c <- getAllocationCounter
+ -- print (start - c)
+ return (start - c)
+ results <- forM [1..1000] testAlloc
+ print (sort results == results)
+ -- results better be in ascending order
diff --git a/testsuite/tests/rts/alloccounter1.stdout b/testsuite/tests/rts/alloccounter1.stdout
new file mode 100644
index 0000000000..0ca95142bb
--- /dev/null
+++ b/testsuite/tests/rts/alloccounter1.stdout
@@ -0,0 +1 @@
+True