Rewrite FastString table in concurrent hashtable

Summary: Reimplement global FastString table using a concurrent hashatable with fixed size segments and dynamically growing buckets instead of fixed size buckets. This addresses the problem that `mkFastString` was not linear when the total number of entries was large. Test Plan: ./validate ``` inplace/bin/ghc-stage2 --interactive -dfaststring-stats < /dev/null GHCi, version 8.7.20181005: http://www.haskell.org/ghc/ :? for help Prelude> Leaving GHCi. FastString stats: segments: 256 buckets: 16384 entries: 7117 largest segment: 64 smallest segment: 64 longest bucket: 5 has z-encoding: 0% ``` Also comapre the two implementation using {P187} The new implementation is on a par with the old version with different conbination of parameters and perform better when the number of FastString's are large. {P188} Reviewers: simonmar, bgamari, niteria Reviewed By: simonmar, bgamari Subscribers: rwbarton, carter GHC Trac Issues: #14854 Differential Revision: https://phabricator.haskell.org/D5211
author: Zejun Wu <watashi@fb.com> 2018-10-28 12:39:58 -0400
committer: Ben Gamari <ben@smart-cactus.org> 2018-10-28 13:40:43 -0400
commit: 5126764be614cc43b435e3f5ff34ea593c30269a (patch)
tree: 2f1b23dffd6dd02512e582313866a80c855e9635 /ghc/Main.hs
parent: 42575701b5e71ec5c5e5418a4530e40e6487684c (diff)
download: haskell-5126764be614cc43b435e3f5ff34ea593c30269a.tar.gz
1 files changed, 15 insertions, 19 deletions
diff --git a/ghc/Main.hs b/ghc/Main.hs
index 03ac60db2d..15b7aee43e 100644
--- a/ghc/Main.hs
+++ b/ghc/Main.hs
@@ -805,14 +805,21 @@ dumpFinalStats dflags =
 
 dumpFastStringStats :: DynFlags -> IO ()
 dumpFastStringStats dflags = do
-  buckets <- getFastStringTable
-  let (entries, longest, has_z) = countFS 0 0 0 buckets
-      msg = text "FastString stats:" $$
-            nest 4 (vcat [text "size:           " <+> int (length buckets),
-                          text "entries:        " <+> int entries,
-                          text "longest chain:  " <+> int longest,
-                          text "has z-encoding: " <+> (has_z `pcntOf` entries)
-                         ])
+  segments <- getFastStringTable
+  let buckets = concat segments
+      bucketsPerSegment = map length segments
+      entriesPerBucket = map length buckets
+      entries = sum entriesPerBucket
+      hasZ = sum $ map (length . filter hasZEncoding) buckets
+      msg = text "FastString stats:" $$ nest 4 (vcat
+        [ text "segments:         " <+> int (length segments)
+        , text "buckets:          " <+> int (sum bucketsPerSegment)
+        , text "entries:          " <+> int entries
+        , text "largest segment:  " <+> int (maximum bucketsPerSegment)
+        , text "smallest segment: " <+> int (minimum bucketsPerSegment)
+        , text "longest bucket:   " <+> int (maximum entriesPerBucket)
+        , text "has z-encoding:   " <+> (hasZ `pcntOf` entries)
+        ])
         -- we usually get more "has z-encoding" than "z-encoded", because
         -- when we z-encode a string it might hash to the exact same string,
         -- which is not counted as "z-encoded".  Only strings whose
@@ -822,17 +829,6 @@ dumpFastStringStats dflags = do
   where
    x `pcntOf` y = int ((x * 100) `quot` y) Outputable.<> char '%'
 
-countFS :: Int -> Int -> Int -> [[FastString]] -> (Int, Int, Int)
-countFS entries longest has_z [] = (entries, longest, has_z)
-countFS entries longest has_z (b:bs) =
-  let
-        len = length b
-        longest' = max len longest
-        entries' = entries + len
-        has_zs = length (filter hasZEncoding b)
-  in
-        countFS entries' longest' (has_z + has_zs) bs
-
 showPackages, dumpPackages, dumpPackagesSimple :: DynFlags -> IO ()
 showPackages       dflags = putStrLn (showSDoc dflags (pprPackages dflags))
 dumpPackages       dflags = putMsg dflags (pprPackages dflags)
author	Zejun Wu <watashi@fb.com>	2018-10-28 12:39:58 -0400
committer	Ben Gamari <ben@smart-cactus.org>	2018-10-28 13:40:43 -0400
commit	5126764be614cc43b435e3f5ff34ea593c30269a (patch)
tree	2f1b23dffd6dd02512e582313866a80c855e9635 /ghc/Main.hs
parent	42575701b5e71ec5c5e5418a4530e40e6487684c (diff)
download	haskell-5126764be614cc43b435e3f5ff34ea593c30269a.tar.gz