diff options
author | Zejun Wu <watashi@fb.com> | 2018-10-28 12:39:58 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-10-28 13:40:43 -0400 |
commit | 5126764be614cc43b435e3f5ff34ea593c30269a (patch) | |
tree | 2f1b23dffd6dd02512e582313866a80c855e9635 /testsuite/tests/utils | |
parent | 42575701b5e71ec5c5e5418a4530e40e6487684c (diff) | |
download | haskell-5126764be614cc43b435e3f5ff34ea593c30269a.tar.gz |
Rewrite FastString table in concurrent hashtable
Summary:
Reimplement global FastString table using a concurrent hashatable with
fixed size segments and dynamically growing buckets instead of fixed size
buckets.
This addresses the problem that `mkFastString` was not linear when the
total number of entries was large.
Test Plan:
./validate
```
inplace/bin/ghc-stage2 --interactive -dfaststring-stats < /dev/null
GHCi, version 8.7.20181005: http://www.haskell.org/ghc/ :? for help
Prelude> Leaving GHCi.
FastString stats:
segments: 256
buckets: 16384
entries: 7117
largest segment: 64
smallest segment: 64
longest bucket: 5
has z-encoding: 0%
```
Also comapre the two implementation using
{P187}
The new implementation is on a par with the old version with different
conbination of parameters and perform better when the number of
FastString's are large.
{P188}
Reviewers: simonmar, bgamari, niteria
Reviewed By: simonmar, bgamari
Subscribers: rwbarton, carter
GHC Trac Issues: #14854
Differential Revision: https://phabricator.haskell.org/D5211
Diffstat (limited to 'testsuite/tests/utils')
-rw-r--r-- | testsuite/tests/utils/Makefile | 3 | ||||
-rw-r--r-- | testsuite/tests/utils/should_run/Makefile | 3 | ||||
-rw-r--r-- | testsuite/tests/utils/should_run/T14854.hs | 87 | ||||
-rw-r--r-- | testsuite/tests/utils/should_run/T14854.stdout | 2 | ||||
-rw-r--r-- | testsuite/tests/utils/should_run/all.T | 7 |
5 files changed, 102 insertions, 0 deletions
diff --git a/testsuite/tests/utils/Makefile b/testsuite/tests/utils/Makefile new file mode 100644 index 0000000000..9a36a1c5fe --- /dev/null +++ b/testsuite/tests/utils/Makefile @@ -0,0 +1,3 @@ +TOP=../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/utils/should_run/Makefile b/testsuite/tests/utils/should_run/Makefile new file mode 100644 index 0000000000..9101fbd40a --- /dev/null +++ b/testsuite/tests/utils/should_run/Makefile @@ -0,0 +1,3 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/utils/should_run/T14854.hs b/testsuite/tests/utils/should_run/T14854.hs new file mode 100644 index 0000000000..9187639d6d --- /dev/null +++ b/testsuite/tests/utils/should_run/T14854.hs @@ -0,0 +1,87 @@ +{-# LANGUAGE RecordWildCards #-} +module Main (main) where + +import FastString + +import Control.Concurrent +import Control.DeepSeq +import Control.Exception +import Control.Monad +import Data.ByteString (ByteString) +import Data.ByteString.Builder +import qualified Data.ByteString.Char8 as Char +import Data.ByteString.Lazy (toStrict) +import Data.List +import Data.Monoid +import qualified Data.Sequence as Seq +import Data.Time +import GHC.Conc +import System.IO +import System.Random +import Text.Printf + +data Options = Options + { optThreads :: Int -- ^ the number of threads to run concurrently + , optRepeat :: Int -- ^ how many times do we create the same 'FastString' + , optCount :: Int -- ^ the total number of different 'FastString's + , optPrefix :: Int -- ^ the length of prefix in each 'FastString' + } + +defOptions :: Options +defOptions = Options + { optThreads = 8 + , optRepeat = 16 + , optCount = 10000 + , optPrefix = 0 + } + +run :: [[ByteString]] -> (ByteString -> Int) -> IO Int +run jobs op = do + mvars <- forM ([0 ..] `zip` jobs) $ \(i, job) -> do + mvar <- newEmptyMVar + forkOn i $ do + uniq <- evaluate $ force $ maximum $ map op job + putMVar mvar uniq + return mvar + uniqs <- mapM takeMVar mvars + evaluate $ force $ maximum uniqs - 603979775 + +summary :: IO [[[a]]] -> IO Int +summary getTable = do + table <- getTable + evaluate $ force $ length $ concat $ concat table + +timeIt :: String -> IO a -> IO a +timeIt name io = do + before <- getCurrentTime + ret <- io + after <- getCurrentTime + hPrintf stderr "%s: %.2fms\n" name + (realToFrac $ diffUTCTime after before * 1000 :: Double) + return ret + +main :: IO () +main = do + seed <- randomIO + let Options{..} = defOptions + shuffle (i:is) s + | Seq.null s = [] + | otherwise = m: shuffle is (l <> r) + where + (l, m Seq.:< r) = Seq.viewl <$> Seq.splitAt (i `rem` Seq.length s) s + inputs = + shuffle (randoms $ mkStdGen seed) $ + mconcat $ replicate optRepeat $ + Seq.fromFunction optCount $ \i -> toStrict $ toLazyByteString $ + byteString (Char.replicate optPrefix '_') <> intDec i + jobs <- evaluate $ force $ transpose $ + map (take optThreads) $ + takeWhile (not . null) $ + iterate (drop optThreads) inputs + setNumCapabilities (length jobs) + -- The maximum unique may be greater than 'optCount' + u <- timeIt "run" $ run jobs $ uniqueOfFS . mkFastStringByteString + print $ optCount <= u && u <= min optThreads optRepeat * optCount + -- But we should never have duplicate 'FastString's in the table + n <- timeIt "summary" $ summary getFastStringTable + print $ n == optCount diff --git a/testsuite/tests/utils/should_run/T14854.stdout b/testsuite/tests/utils/should_run/T14854.stdout new file mode 100644 index 0000000000..dbde422651 --- /dev/null +++ b/testsuite/tests/utils/should_run/T14854.stdout @@ -0,0 +1,2 @@ +True +True diff --git a/testsuite/tests/utils/should_run/all.T b/testsuite/tests/utils/should_run/all.T new file mode 100644 index 0000000000..5a353d9103 --- /dev/null +++ b/testsuite/tests/utils/should_run/all.T @@ -0,0 +1,7 @@ +test('T14854', + [only_ways(threaded_ways), + omit_ways('ghci'), + reqlib('random'), + ignore_stderr], + compile_and_run, + ['-package ghc']) |