summaryrefslogtreecommitdiff
path: root/ghc/Main.hs
diff options
context:
space:
mode:
authorZejun Wu <watashi@fb.com>2018-10-28 12:39:58 -0400
committerBen Gamari <ben@smart-cactus.org>2018-10-28 13:40:43 -0400
commit5126764be614cc43b435e3f5ff34ea593c30269a (patch)
tree2f1b23dffd6dd02512e582313866a80c855e9635 /ghc/Main.hs
parent42575701b5e71ec5c5e5418a4530e40e6487684c (diff)
downloadhaskell-5126764be614cc43b435e3f5ff34ea593c30269a.tar.gz
Rewrite FastString table in concurrent hashtable
Summary: Reimplement global FastString table using a concurrent hashatable with fixed size segments and dynamically growing buckets instead of fixed size buckets. This addresses the problem that `mkFastString` was not linear when the total number of entries was large. Test Plan: ./validate ``` inplace/bin/ghc-stage2 --interactive -dfaststring-stats < /dev/null GHCi, version 8.7.20181005: http://www.haskell.org/ghc/ :? for help Prelude> Leaving GHCi. FastString stats: segments: 256 buckets: 16384 entries: 7117 largest segment: 64 smallest segment: 64 longest bucket: 5 has z-encoding: 0% ``` Also comapre the two implementation using {P187} The new implementation is on a par with the old version with different conbination of parameters and perform better when the number of FastString's are large. {P188} Reviewers: simonmar, bgamari, niteria Reviewed By: simonmar, bgamari Subscribers: rwbarton, carter GHC Trac Issues: #14854 Differential Revision: https://phabricator.haskell.org/D5211
Diffstat (limited to 'ghc/Main.hs')
-rw-r--r--ghc/Main.hs34
1 files changed, 15 insertions, 19 deletions
diff --git a/ghc/Main.hs b/ghc/Main.hs
index 03ac60db2d..15b7aee43e 100644
--- a/ghc/Main.hs
+++ b/ghc/Main.hs
@@ -805,14 +805,21 @@ dumpFinalStats dflags =
dumpFastStringStats :: DynFlags -> IO ()
dumpFastStringStats dflags = do
- buckets <- getFastStringTable
- let (entries, longest, has_z) = countFS 0 0 0 buckets
- msg = text "FastString stats:" $$
- nest 4 (vcat [text "size: " <+> int (length buckets),
- text "entries: " <+> int entries,
- text "longest chain: " <+> int longest,
- text "has z-encoding: " <+> (has_z `pcntOf` entries)
- ])
+ segments <- getFastStringTable
+ let buckets = concat segments
+ bucketsPerSegment = map length segments
+ entriesPerBucket = map length buckets
+ entries = sum entriesPerBucket
+ hasZ = sum $ map (length . filter hasZEncoding) buckets
+ msg = text "FastString stats:" $$ nest 4 (vcat
+ [ text "segments: " <+> int (length segments)
+ , text "buckets: " <+> int (sum bucketsPerSegment)
+ , text "entries: " <+> int entries
+ , text "largest segment: " <+> int (maximum bucketsPerSegment)
+ , text "smallest segment: " <+> int (minimum bucketsPerSegment)
+ , text "longest bucket: " <+> int (maximum entriesPerBucket)
+ , text "has z-encoding: " <+> (hasZ `pcntOf` entries)
+ ])
-- we usually get more "has z-encoding" than "z-encoded", because
-- when we z-encode a string it might hash to the exact same string,
-- which is not counted as "z-encoded". Only strings whose
@@ -822,17 +829,6 @@ dumpFastStringStats dflags = do
where
x `pcntOf` y = int ((x * 100) `quot` y) Outputable.<> char '%'
-countFS :: Int -> Int -> Int -> [[FastString]] -> (Int, Int, Int)
-countFS entries longest has_z [] = (entries, longest, has_z)
-countFS entries longest has_z (b:bs) =
- let
- len = length b
- longest' = max len longest
- entries' = entries + len
- has_zs = length (filter hasZEncoding b)
- in
- countFS entries' longest' (has_z + has_zs) bs
-
showPackages, dumpPackages, dumpPackagesSimple :: DynFlags -> IO ()
showPackages dflags = putStrLn (showSDoc dflags (pprPackages dflags))
dumpPackages dflags = putMsg dflags (pprPackages dflags)