diff options
author | Torsten Schmits <git@tryp.io> | 2022-06-02 16:18:12 +0200 |
---|---|---|
committer | Zubin Duggal <zubin.duggal@gmail.com> | 2022-07-14 14:39:37 +0530 |
commit | 2c1a5ae85966e1e87c33c7c62ab876c9287ca4d6 (patch) | |
tree | a1038593594efcd3bdb413717f961deb2363de90 | |
parent | ac064f37a0490ac4b885a185726ec13e042c9e9d (diff) | |
download | haskell-2c1a5ae85966e1e87c33c7c62ab876c9287ca4d6.tar.gz |
replace quadratic nub to fight byte code gen perf explosion
Despite this code having been present in the core-to-bytecode
implementation, I have observed it in the wild starting with 9.2,
causing enormous slowdown in certain situations.
My test case produces the following profiles:
Before:
```
total time = 559.77 secs (559766 ticks @ 1000 us, 1 processor)
total alloc = 513,985,665,640 bytes (excludes profiling overheads)
COST CENTRE MODULE SRC %time %alloc ticks bytes
elem_by Data.OldList libraries/base/Data/OldList.hs:429:1-7 67.6 92.9 378282 477447404296
eqInt GHC.Classes libraries/ghc-prim/GHC/Classes.hs:275:8-14 12.4 0.0 69333 32
$c>>= GHC.Data.IOEnv <no location info> 6.9 0.6 38475 3020371232
```
After:
```
total time = 89.83 secs (89833 ticks @ 1000 us, 1 processor)
total alloc = 39,365,306,360 bytes (excludes profiling overheads)
COST CENTRE MODULE SRC %time %alloc ticks bytes
$c>>= GHC.Data.IOEnv <no location info> 43.6 7.7 39156 3020403424
doCase GHC.StgToByteCode compiler/GHC/StgToByteCode.hs:(805,1)-(1054,53) 2.5 7.4 2246 2920777088
```
(cherry picked from commit 25b510c3ffdb6f43695c31c0740a5cbe1b7f3898)
-rw-r--r-- | compiler/GHC/StgToByteCode.hs | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs index 0161fe5645..805362785a 100644 --- a/compiler/GHC/StgToByteCode.hs +++ b/compiler/GHC/StgToByteCode.hs @@ -70,7 +70,7 @@ import GHC.Types.Var.Env import GHC.Types.Tickish import Data.List ( genericReplicate, genericLength, intersperse - , partition, scanl', sort, sortBy, zip4, zip6, nub ) + , partition, scanl', sortBy, zip4, zip6 ) import Foreign hiding (shiftL, shiftR) import Control.Monad import Data.Char @@ -94,6 +94,7 @@ import Data.Either ( partitionEithers ) import qualified GHC.Types.CostCentre as CC import GHC.Stg.Syntax import GHC.Stg.FVs +import qualified Data.IntSet as IntSet -- ----------------------------------------------------------------------------- -- Generating byte code for a complete module @@ -1162,16 +1163,16 @@ doCase d s p scrut bndr alts pointers = extra_pointers ++ - sort (filter (< bitmap_size') (map (+extra_slots) rel_slots)) + filter (< bitmap_size') (map (+extra_slots) rel_slots) where - binds = Map.toList p -- NB: unboxed tuple cases bind the scrut binder to the same offset -- as one of the alt binders, so we have to remove any duplicates here: - rel_slots = nub $ map fromIntegral $ concatMap spread binds - spread (id, offset) | isUnboxedTupleType (idType id) || - isUnboxedSumType (idType id) = [] - | isFollowableArg (bcIdArgRep platform id) = [ rel_offset ] - | otherwise = [] + -- 'toAscList' takes care of sorting the result, which was previously done after the application of 'filter'. + rel_slots = IntSet.toAscList $ IntSet.fromList $ Map.elems $ Map.mapMaybeWithKey spread p + spread id offset | isUnboxedTupleType (idType id) || + isUnboxedSumType (idType id) = Nothing + | isFollowableArg (bcIdArgRep platform id) = Just (fromIntegral rel_offset) + | otherwise = Nothing where rel_offset = trunc16W $ bytesToWords platform (d - offset) bitmap = intsToReverseBitmap platform bitmap_size'{-size-} pointers |