diff options
author | Ben Gamari <bgamari.foss@gmail.com> | 2018-03-19 12:02:43 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-03-19 12:05:12 -0400 |
commit | 20cbb0165e4d18df510e707791e761942d3c10f0 (patch) | |
tree | 7f11e3bb32433b64db760bf1b2d65d281d53f9ab | |
parent | d27336edcf2667c3c3908694247861beec3fc29b (diff) | |
download | haskell-20cbb0165e4d18df510e707791e761942d3c10f0.tar.gz |
Improve accuracy of get/setAllocationCounter
Summary:
get/setAllocationCounter didn't take into account allocations in the
current block. This was known at the time, but it turns out to be
important to have more accuracy when using these in a fine-grained
way.
Test Plan:
New unit test to test incrementally larger allocaitons. Before I got
results like this:
```
+0
+0
+0
+0
+0
+4096
+0
+0
+0
+0
+0
+4064
+0
+0
+4088
+4056
+0
+0
+0
+4088
+4096
+4056
+4096
```
Notice how the results aren't always monotonically increasing. After
this patch:
```
+344
+416
+488
+560
+632
+704
+776
+848
+920
+992
+1064
+1136
+1208
+1280
+1352
+1424
+1496
+1568
+1640
+1712
+1784
+1856
+1928
+2000
+2072
+2144
```
Reviewers: hvr, erikd, simonmar, jrtc27, trommler
Reviewed By: simonmar
Subscribers: trommler, jrtc27, rwbarton, thomie, carter
Differential Revision: https://phabricator.haskell.org/D4363
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 4 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 14 | ||||
-rw-r--r-- | includes/Cmm.h | 2 | ||||
-rw-r--r-- | includes/rts/Threads.h | 2 | ||||
-rw-r--r-- | includes/stg/MiscClosures.h | 3 | ||||
-rw-r--r-- | libraries/base/GHC/Conc/Sync.hs | 21 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 20 | ||||
-rw-r--r-- | rts/RtsSymbols.c | 4 | ||||
-rw-r--r-- | rts/Threads.c | 13 | ||||
-rw-r--r-- | testsuite/tests/rts/all.T | 6 | ||||
-rw-r--r-- | testsuite/tests/rts/alloccounter1.hs | 19 | ||||
-rw-r--r-- | testsuite/tests/rts/alloccounter1.stdout | 1 |
12 files changed, 75 insertions, 34 deletions
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index b518c0790a..c1103e7d77 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -404,8 +404,8 @@ Opening the nursery corresponds to the following code: @ tso = CurrentTSO; cn = CurrentNursery; - bdfree = CurrentNuresry->free; - bdstart = CurrentNuresry->start; + bdfree = CurrentNursery->free; + bdstart = CurrentNursery->start; // We *add* the currently occupied portion of the nursery block to // the allocation limit, because we will subtract it again in diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 038d350a76..e580f989c1 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -2942,6 +2942,20 @@ primop TraceMarkerOp "traceMarker#" GenPrimOp has_side_effects = True out_of_line = True +primop GetThreadAllocationCounter "getThreadAllocationCounter#" GenPrimOp + State# RealWorld -> (# State# RealWorld, INT64 #) + { Retrieves the allocation counter for the current thread. } + with + has_side_effects = True + out_of_line = True + +primop SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp + INT64 -> State# RealWorld -> State# RealWorld + { Sets the allocation counter for the current thread to the given value. } + with + has_side_effects = True + out_of_line = True + ------------------------------------------------------------------------ section "Safe coercions" ------------------------------------------------------------------------ diff --git a/includes/Cmm.h b/includes/Cmm.h index 57d78ccaa5..18b2aaf324 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -161,9 +161,11 @@ /* TO_W_(n) converts n to W_ type from a smaller type */ #if SIZEOF_W == 4 +#define TO_I64(x) %sx64(x) #define TO_W_(x) %sx32(x) #define HALF_W_(x) %lobits16(x) #elif SIZEOF_W == 8 +#define TO_I64(x) (x) #define TO_W_(x) %sx64(x) #define HALF_W_(x) %lobits32(x) #endif diff --git a/includes/rts/Threads.h b/includes/rts/Threads.h index fceacdc75d..f72f5ed121 100644 --- a/includes/rts/Threads.h +++ b/includes/rts/Threads.h @@ -43,8 +43,6 @@ StgRegTable * resumeThread (void *); // int cmp_thread (StgPtr tso1, StgPtr tso2); int rts_getThreadId (StgPtr tso); -HsInt64 rts_getThreadAllocationCounter (StgPtr tso); -void rts_setThreadAllocationCounter (StgPtr tso, HsInt64 i); void rts_enableThreadAllocationLimit (StgPtr tso); void rts_disableThreadAllocationLimit (StgPtr tso); diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 76cfbd6c8c..1fbfab9fbe 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -468,6 +468,9 @@ RTS_FUN_DECL(stg_traceCcszh); RTS_FUN_DECL(stg_clearCCSzh); RTS_FUN_DECL(stg_traceEventzh); RTS_FUN_DECL(stg_traceMarkerzh); +RTS_FUN_DECL(stg_getThreadAllocationCounterzh); +RTS_FUN_DECL(stg_setThreadAllocationCounterzh); + /* Other misc stuff */ // See wiki:Commentary/Compiler/Backends/PprC#Prototypes diff --git a/libraries/base/GHC/Conc/Sync.hs b/libraries/base/GHC/Conc/Sync.hs index 517c20e45f..94601f356d 100644 --- a/libraries/base/GHC/Conc/Sync.hs +++ b/libraries/base/GHC/Conc/Sync.hs @@ -105,6 +105,7 @@ import Data.Maybe import GHC.Base import {-# SOURCE #-} GHC.IO.Handle ( hFlush ) import {-# SOURCE #-} GHC.IO.Handle.FD ( stdout ) +import GHC.Int import GHC.IO import GHC.IO.Encoding.UTF8 import GHC.IO.Exception @@ -194,18 +195,16 @@ instance Ord ThreadId where -- -- @since 4.8.0.0 setAllocationCounter :: Int64 -> IO () -setAllocationCounter i = do - ThreadId t <- myThreadId - rts_setThreadAllocationCounter t i +setAllocationCounter (I64# i) = IO $ \s -> + case setThreadAllocationCounter# i s of s' -> (# s', () #) -- | Return the current value of the allocation counter for the -- current thread. -- -- @since 4.8.0.0 getAllocationCounter :: IO Int64 -getAllocationCounter = do - ThreadId t <- myThreadId - rts_getThreadAllocationCounter t +getAllocationCounter = IO $ \s -> + case getThreadAllocationCounter# s of (# s', ctr #) -> (# s', I64# ctr #) -- | Enables the allocation counter to be treated as a limit for the -- current thread. When the allocation limit is enabled, if the @@ -242,16 +241,6 @@ disableAllocationLimit = do ThreadId t <- myThreadId rts_disableThreadAllocationLimit t --- We cannot do these operations safely on another thread, because on --- a 32-bit machine we cannot do atomic operations on a 64-bit value. --- Therefore, we only expose APIs that allow getting and setting the --- limit of the current thread. -foreign import ccall unsafe "rts_setThreadAllocationCounter" - rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO () - -foreign import ccall unsafe "rts_getThreadAllocationCounter" - rts_getThreadAllocationCounter :: ThreadId# -> IO Int64 - foreign import ccall unsafe "rts_enableThreadAllocationLimit" rts_enableThreadAllocationLimit :: ThreadId# -> IO () diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 67a238488c..e3f6e4cd19 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -2491,3 +2491,23 @@ stg_traceMarkerzh ( W_ msg ) return (); } + +stg_getThreadAllocationCounterzh () +{ + // Account for the allocation in the current block + W_ offset; + offset = Hp - bdescr_start(CurrentNursery); + return (StgTSO_alloc_limit(CurrentTSO) - TO_I64(offset)); +} + +stg_setThreadAllocationCounterzh ( I64 counter ) +{ + // Allocation in the current block will be subtracted by + // getThreadAllocationCounter#, so we have to offset any existing + // allocation here. See also openNursery/closeNursery in + // compiler/codeGen/StgCmmForeign.hs. + W_ offset; + offset = Hp - bdescr_start(CurrentNursery); + StgTSO_alloc_limit(CurrentTSO) = counter + TO_I64(offset); + return (); +} diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index e53a056a4c..d5800fd336 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -743,8 +743,6 @@ SymI_HasProto(rts_isProfiled) \ SymI_HasProto(rts_isDynamic) \ SymI_HasProto(rts_setInCallCapability) \ - SymI_HasProto(rts_getThreadAllocationCounter) \ - SymI_HasProto(rts_setThreadAllocationCounter) \ SymI_HasProto(rts_enableThreadAllocationLimit) \ SymI_HasProto(rts_disableThreadAllocationLimit) \ SymI_HasProto(rts_setMainThread) \ @@ -895,6 +893,8 @@ SymI_HasProto(stg_traceCcszh) \ SymI_HasProto(stg_traceEventzh) \ SymI_HasProto(stg_traceMarkerzh) \ + SymI_HasProto(stg_getThreadAllocationCounterzh) \ + SymI_HasProto(stg_setThreadAllocationCounterzh) \ SymI_HasProto(getMonotonicNSec) \ SymI_HasProto(lockFile) \ SymI_HasProto(unlockFile) \ diff --git a/rts/Threads.c b/rts/Threads.c index b76917773a..be6962246d 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso) } /* --------------------------------------------------------------------------- - * Getting & setting the thread allocation limit + * Enabling and disabling the thread allocation limit * ------------------------------------------------------------------------ */ -HsInt64 rts_getThreadAllocationCounter(StgPtr tso) -{ - // NB. doesn't take into account allocation in the current nursery - // block, so it might be off by up to 4k. - return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit)); -} - -void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i) -{ - ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i); -} void rts_enableThreadAllocationLimit(StgPtr tso) { diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T index cd70132610..ffbd05c745 100644 --- a/testsuite/tests/rts/all.T +++ b/testsuite/tests/rts/all.T @@ -392,3 +392,9 @@ test('T14702', [ ignore_stdout test('T14900', normal, compile_and_run, ['-package ghc-compact']) test('InternalCounters', normal, run_command, ['$MAKE -s --no-print-directory InternalCounters']) +test('alloccounter1', normal, compile_and_run, + [ + # avoid allocating stack chunks, which counts as + # allocation and messes up the results: + '-with-rtsopts=-k1m' + ]) diff --git a/testsuite/tests/rts/alloccounter1.hs b/testsuite/tests/rts/alloccounter1.hs new file mode 100644 index 0000000000..4b81896d2c --- /dev/null +++ b/testsuite/tests/rts/alloccounter1.hs @@ -0,0 +1,19 @@ +module Main where + +import Control.Exception +import Control.Monad +import Data.List +import System.Mem + +main = do + let + testAlloc n = do + let start = 999999 + setAllocationCounter start + evaluate (last [1..n]) + c <- getAllocationCounter + -- print (start - c) + return (start - c) + results <- forM [1..1000] testAlloc + print (sort results == results) + -- results better be in ascending order diff --git a/testsuite/tests/rts/alloccounter1.stdout b/testsuite/tests/rts/alloccounter1.stdout new file mode 100644 index 0000000000..0ca95142bb --- /dev/null +++ b/testsuite/tests/rts/alloccounter1.stdout @@ -0,0 +1 @@ +True |