summaryrefslogtreecommitdiff
path: root/compiler/GHC/Utils
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2020-11-24 20:39:56 +0100
committerBen Gamari <ben@smart-cactus.org>2020-12-14 15:22:13 -0500
commitaf855ac1d37359df3db8c48dc6c9dd2f3fe24e77 (patch)
tree434a4cda6edcd19f282a1a38a9f2d864c1309b60 /compiler/GHC/Utils
parentd0e8c10d587e4b9984526d0dfcfcb258b75733b8 (diff)
downloadhaskell-af855ac1d37359df3db8c48dc6c9dd2f3fe24e77.tar.gz
Optimize dumping of consecutive whitespace.wip/andreask/opt_dumps
The naive way of putting out n characters of indent would be something like `hPutStr hdl (replicate n ' ')`. However this is quite inefficient as we allocate an absurd number of strings consisting of simply spaces as we don't cache them. To improve on this we now track if we can simply write ascii spaces via hPutBuf instead. This is the case when running with -ddump-to-file where we force the encoding to be UTF8. This avoids both the cost of going through encoding as well as avoiding allocation churn from all the white space. Instead we simply use hPutBuf on a preallocated unlifted string. When dumping stg like this: > nofib/spectral/simple/Main.hs -fforce-recomp -ddump-stg-final -ddump-to-file -c +RTS -s Allocations went from 1,778 MB to 1,702MB. About a 4% reduction of allocation! I did not measure the difference in runtime but expect it to be similar. Bumps the haddock submodule since the interface of GHC's Pretty slightly changed. ------------------------- Metric Decrease: T12227 -------------------------
Diffstat (limited to 'compiler/GHC/Utils')
-rw-r--r--compiler/GHC/Utils/Error.hs3
-rw-r--r--compiler/GHC/Utils/Outputable.hs2
-rw-r--r--compiler/GHC/Utils/Ppr.hs50
3 files changed, 49 insertions, 6 deletions
diff --git a/compiler/GHC/Utils/Error.hs b/compiler/GHC/Utils/Error.hs
index 43c2cae4ad..1051a731c5 100644
--- a/compiler/GHC/Utils/Error.hs
+++ b/compiler/GHC/Utils/Error.hs
@@ -327,7 +327,8 @@ dumpSDocWithStyle sty dflags dumpOpt hdr doc =
$$ blankLine
$$ doc
return $ mkDumpDoc hdr d
- defaultLogActionHPrintDoc dflags handle (withPprStyle sty doc')
+ -- When we dump to files we use UTF8. Which allows ascii spaces.
+ defaultLogActionHPrintDoc dflags True handle (withPprStyle sty doc')
-- write the dump to stdout
writeDump Nothing = do
diff --git a/compiler/GHC/Utils/Outputable.hs b/compiler/GHC/Utils/Outputable.hs
index e88d9c42b6..ecef33ae86 100644
--- a/compiler/GHC/Utils/Outputable.hs
+++ b/compiler/GHC/Utils/Outputable.hs
@@ -567,7 +567,7 @@ pprCode cs d = withPprStyle (PprCode cs) d
renderWithContext :: SDocContext -> SDoc -> String
renderWithContext ctx sdoc
- = let s = Pretty.style{ Pretty.mode = PageMode,
+ = let s = Pretty.style{ Pretty.mode = PageMode False,
Pretty.lineLength = sdocLineLength ctx }
in Pretty.renderStyle s $ runSDoc sdoc ctx
diff --git a/compiler/GHC/Utils/Ppr.hs b/compiler/GHC/Utils/Ppr.hs
index 3fa84850b8..8871f98cef 100644
--- a/compiler/GHC/Utils/Ppr.hs
+++ b/compiler/GHC/Utils/Ppr.hs
@@ -917,16 +917,26 @@ data Style
, ribbonsPerLine :: Float -- ^ Ratio of line length to ribbon length
}
--- | The default style (@mode=PageMode, lineLength=100, ribbonsPerLine=1.5@).
+-- | The default style (@mode=PageMode False, lineLength=100, ribbonsPerLine=1.5@).
style :: Style
-style = Style { lineLength = 100, ribbonsPerLine = 1.5, mode = PageMode }
+style = Style { lineLength = 100, ribbonsPerLine = 1.5, mode = PageMode False }
-- | Rendering mode.
-data Mode = PageMode -- ^ Normal
+data Mode = PageMode { asciiSpace :: Bool } -- ^ Normal
| ZigZagMode -- ^ With zig-zag cuts
| LeftMode -- ^ No indentation, infinitely long lines
| OneLineMode -- ^ All on one line
+-- | Can we output an ascii space character for spaces?
+-- Mostly true, but not for e.g. UTF16
+-- See Note [putSpaces optimizations] for why we bother
+-- to track this.
+hasAsciiSpace :: Mode -> Bool
+hasAsciiSpace mode =
+ case mode of
+ PageMode asciiSpace -> asciiSpace
+ _ -> False
+
-- | Render the @Doc@ to a String using the given @Style@.
renderStyle :: Style -> Doc -> String
renderStyle s = fullRender (mode s) (lineLength s) (ribbonsPerLine s)
@@ -1034,6 +1044,20 @@ printDoc :: Mode -> Int -> Handle -> Doc -> IO ()
-- printDoc adds a newline to the end
printDoc mode cols hdl doc = printDoc_ mode cols hdl (doc $$ text "")
+{- Note [putSpaces optimizations]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using dump flags a lot of what we are dumping ends up being whitespace.
+This is especially true for Core/Stg dumps. Enough so that it's worth optimizing.
+
+Especially in the common case of writing to an UTF8 or similarly encoded file
+where space is equal to ascii space we use hPutBuf to write a preallocated
+buffer to the file. This avoids a fair bit of allocation.
+
+For other cases we fall back to the old and slow path for simplicity.
+
+-}
+
printDoc_ :: Mode -> Int -> Handle -> Doc -> IO ()
-- printDoc_ does not add a newline at the end, so that
-- successive calls can output stuff on the same line
@@ -1051,9 +1075,27 @@ printDoc_ mode pprCols hdl doc
-- the I/O library's encoding layer. (#3398)
put (ZStr s) next = hPutFZS hdl s >> next
put (LStr s) next = hPutPtrString hdl s >> next
- put (RStr n c) next = hPutStr hdl (replicate n c) >> next
+ put (RStr n c) next
+ | c == ' '
+ = putSpaces n >> next
+ | otherwise
+ = hPutStr hdl (replicate n c) >> next
+ putSpaces n
+ -- If we use ascii spaces we are allowed to use hPutBuf
+ -- See Note [putSpaces optimizations]
+ | hasAsciiSpace mode
+ , n <= 100
+ = hPutBuf hdl (Ptr spaces') n
+ | hasAsciiSpace mode
+ , n > 100
+ = hPutBuf hdl (Ptr spaces') 100 >> putSpaces (n-100)
+
+ | otherwise = hPutStr hdl (replicate n ' ')
done = return () -- hPutChar hdl '\n'
+ -- 100 spaces, so we avoid the allocation of replicate n ' '
+ spaces' = " "#
+
-- some versions of hPutBuf will barf if the length is zero
hPutPtrString :: Handle -> PtrString -> IO ()