Modules: CmmToAsm (#13009)

author: Sylvain Henry <sylvain@haskus.fr> 2020-02-22 15:05:20 +0100
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2020-02-24 20:55:25 -0500
commit: 1b1067d14b656bbbfa7c47f156ec2700c9751549 (patch)
tree: 32346e3c4c3f89117190b36364144d85dc260e05 /compiler/GHC/CmmToAsm/Ppr.hs
parent: 354e2787be08fb6d973de1a39e58080ff8e107f8 (diff)
download: haskell-1b1067d14b656bbbfa7c47f156ec2700c9751549.tar.gz
1 files changed, 275 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/Ppr.hs b/compiler/GHC/CmmToAsm/Ppr.hs
new file mode 100644
index 0000000000..636d2e4e3a
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Ppr.hs
@@ -0,0 +1,275 @@
+{-# LANGUAGE MagicHash #-}
+
+-----------------------------------------------------------------------------
+--
+-- Pretty-printing assembly language
+--
+-- (c) The University of Glasgow 1993-2005
+--
+-----------------------------------------------------------------------------
+
+module GHC.CmmToAsm.Ppr (
+        castFloatToWord8Array,
+        castDoubleToWord8Array,
+        floatToBytes,
+        doubleToBytes,
+        pprASCII,
+        pprBytes,
+        pprSectionHeader
+)
+
+where
+
+import GhcPrelude
+
+import AsmUtils
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Driver.Session
+import FastString
+import Outputable
+import GHC.Platform
+import FileCleanup
+
+import qualified Data.Array.Unsafe as U ( castSTUArray )
+import Data.Array.ST
+
+import Control.Monad.ST
+
+import Data.Word
+import Data.Bits
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS
+import GHC.Exts
+import GHC.Word
+import System.IO.Unsafe
+
+
+
+-- -----------------------------------------------------------------------------
+-- Converting floating-point literals to integrals for printing
+
+castFloatToWord8Array :: STUArray s Int Float -> ST s (STUArray s Int Word8)
+castFloatToWord8Array = U.castSTUArray
+
+castDoubleToWord8Array :: STUArray s Int Double -> ST s (STUArray s Int Word8)
+castDoubleToWord8Array = U.castSTUArray
+
+-- floatToBytes and doubleToBytes convert to the host's byte
+-- order.  Providing that we're not cross-compiling for a
+-- target with the opposite endianness, this should work ok
+-- on all targets.
+
+-- ToDo: this stuff is very similar to the shenanigans in PprAbs,
+-- could they be merged?
+
+floatToBytes :: Float -> [Int]
+floatToBytes f
+   = runST (do
+        arr <- newArray_ ((0::Int),3)
+        writeArray arr 0 f
+        arr <- castFloatToWord8Array arr
+        i0 <- readArray arr 0
+        i1 <- readArray arr 1
+        i2 <- readArray arr 2
+        i3 <- readArray arr 3
+        return (map fromIntegral [i0,i1,i2,i3])
+     )
+
+doubleToBytes :: Double -> [Int]
+doubleToBytes d
+   = runST (do
+        arr <- newArray_ ((0::Int),7)
+        writeArray arr 0 d
+        arr <- castDoubleToWord8Array arr
+        i0 <- readArray arr 0
+        i1 <- readArray arr 1
+        i2 <- readArray arr 2
+        i3 <- readArray arr 3
+        i4 <- readArray arr 4
+        i5 <- readArray arr 5
+        i6 <- readArray arr 6
+        i7 <- readArray arr 7
+        return (map fromIntegral [i0,i1,i2,i3,i4,i5,i6,i7])
+     )
+
+-- ---------------------------------------------------------------------------
+-- Printing ASCII strings.
+--
+-- Print as a string and escape non-printable characters.
+-- This is similar to charToC in Utils.
+
+pprASCII :: ByteString -> SDoc
+pprASCII str
+  -- Transform this given literal bytestring to escaped string and construct
+  -- the literal SDoc directly.
+  -- See #14741
+  -- and Note [Pretty print ASCII when AsmCodeGen]
+  = text $ BS.foldr (\w s -> do1 w ++ s) "" str
+    where
+       do1 :: Word8 -> String
+       do1 w | 0x09 == w = "\\t"
+             | 0x0A == w = "\\n"
+             | 0x22 == w = "\\\""
+             | 0x5C == w = "\\\\"
+               -- ASCII printable characters range
+             | w >= 0x20 && w <= 0x7E = [chr' w]
+             | otherwise = '\\' : octal w
+
+       -- we know that the Chars we create are in the ASCII range
+       -- so we bypass the check in "chr"
+       chr' :: Word8 -> Char
+       chr' (W8# w#) = C# (chr# (word2Int# w#))
+
+       octal :: Word8 -> String
+       octal w = [ chr' (ord0 + (w `unsafeShiftR` 6) .&. 0x07)
+                 , chr' (ord0 + (w `unsafeShiftR` 3) .&. 0x07)
+                 , chr' (ord0 + w .&. 0x07)
+                 ]
+       ord0 = 0x30 -- = ord '0'
+
+-- | Pretty print binary data.
+--
+-- Use either the ".string" directive or a ".incbin" directive.
+-- See Note [Embedding large binary blobs]
+--
+-- A NULL byte is added after the binary data.
+--
+pprBytes :: ByteString -> SDoc
+pprBytes bs = sdocWithDynFlags $ \dflags ->
+  if binBlobThreshold dflags == 0
+     || fromIntegral (BS.length bs) <= binBlobThreshold dflags
+    then text "\t.string " <> doubleQuotes (pprASCII bs)
+    else unsafePerformIO $ do
+      bFile <- newTempName dflags TFL_CurrentModule ".dat"
+      BS.writeFile bFile bs
+      return $ text "\t.incbin "
+         <> pprFilePathString bFile -- proper escape (see #16389)
+         <> text "\n\t.byte 0"
+
+{-
+Note [Embedding large binary blobs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To embed a blob of binary data (e.g. an UTF-8 encoded string) into the generated
+code object, we have several options:
+
+   1. Generate a ".byte" directive for each byte. This is what was done in the past
+      (see Note [Pretty print ASCII when AsmCodeGen]).
+
+   2. Generate a single ".string"/".asciz" directive for the whole sequence of
+      bytes. Bytes in the ASCII printable range are rendered as characters and
+      other values are escaped (e.g., "\t", "\077", etc.).
+
+   3. Create a temporary file into which we dump the binary data and generate a
+      single ".incbin" directive. The assembler will include the binary file for
+      us in the generated output object.
+
+Now the code generator uses either (2) or (3), depending on the binary blob
+size.  Using (3) for small blobs adds too much overhead (see benchmark results
+in #16190), so we only do it when the size is above a threshold (500K at the
+time of writing).
+
+The threshold is configurable via the `-fbinary-blob-threshold` flag.
+
+-}
+
+
+{-
+Note [Pretty print ASCII when AsmCodeGen]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Previously, when generating assembly code, we created SDoc with
+`(ptext . sLit)` for every bytes in literal bytestring, then
+combine them using `hcat`.
+
+When handling literal bytestrings with millions of bytes,
+millions of SDoc would be created and to combine, leading to
+high memory usage.
+
+Now we escape the given bytestring to string directly and construct
+SDoc only once. This improvement could dramatically decrease the
+memory allocation from 4.7GB to 1.3GB when embedding a 3MB literal
+string in source code. See #14741 for profiling results.
+-}
+
+-- ----------------------------------------------------------------------------
+-- Printing section headers.
+--
+-- If -split-section was specified, include the suffix label, otherwise just
+-- print the section type. For Darwin, where subsections-for-symbols are
+-- used instead, only print section type.
+--
+-- For string literals, additional flags are specified to enable merging of
+-- identical strings in the linker. With -split-sections each string also gets
+-- a unique section to allow strings from unused code to be GC'd.
+
+pprSectionHeader :: Platform -> Section -> SDoc
+pprSectionHeader platform (Section t suffix) =
+ case platformOS platform of
+   OSAIX     -> pprXcoffSectionHeader t
+   OSDarwin  -> pprDarwinSectionHeader t
+   OSMinGW32 -> pprGNUSectionHeader (char '$') t suffix
+   _         -> pprGNUSectionHeader (char '.') t suffix
+
+pprGNUSectionHeader :: SDoc -> SectionType -> CLabel -> SDoc
+pprGNUSectionHeader sep t suffix = sdocWithDynFlags $ \dflags ->
+  let splitSections = gopt Opt_SplitSections dflags
+      subsection | splitSections = sep <> ppr suffix
+                 | otherwise     = empty
+  in  text ".section " <> ptext (header dflags) <> subsection <>
+      flags dflags
+  where
+    header dflags = case t of
+      Text -> sLit ".text"
+      Data -> sLit ".data"
+      ReadOnlyData  | OSMinGW32 <- platformOS (targetPlatform dflags)
+                                -> sLit ".rdata"
+                    | otherwise -> sLit ".rodata"
+      RelocatableReadOnlyData | OSMinGW32 <- platformOS (targetPlatform dflags)
+                                -- Concept does not exist on Windows,
+                                -- So map these to R/O data.
+                                          -> sLit ".rdata$rel.ro"
+                              | otherwise -> sLit ".data.rel.ro"
+      UninitialisedData -> sLit ".bss"
+      ReadOnlyData16 | OSMinGW32 <- platformOS (targetPlatform dflags)
+                                 -> sLit ".rdata$cst16"
+                     | otherwise -> sLit ".rodata.cst16"
+      CString
+        | OSMinGW32 <- platformOS (targetPlatform dflags)
+                    -> sLit ".rdata"
+        | otherwise -> sLit ".rodata.str"
+      OtherSection _ ->
+        panic "PprBase.pprGNUSectionHeader: unknown section type"
+    flags dflags = case t of
+      CString
+        | OSMinGW32 <- platformOS (targetPlatform dflags)
+                    -> empty
+        | otherwise -> text ",\"aMS\"," <> sectionType "progbits" <> text ",1"
+      _ -> empty
+
+-- XCOFF doesn't support relocating label-differences, so we place all
+-- RO sections into .text[PR] sections
+pprXcoffSectionHeader :: SectionType -> SDoc
+pprXcoffSectionHeader t = text $ case t of
+     Text                    -> ".csect .text[PR]"
+     Data                    -> ".csect .data[RW]"
+     ReadOnlyData            -> ".csect .text[PR] # ReadOnlyData"
+     RelocatableReadOnlyData -> ".csect .text[PR] # RelocatableReadOnlyData"
+     ReadOnlyData16          -> ".csect .text[PR] # ReadOnlyData16"
+     CString                 -> ".csect .text[PR] # CString"
+     UninitialisedData       -> ".csect .data[BS]"
+     OtherSection _          ->
+       panic "PprBase.pprXcoffSectionHeader: unknown section type"
+
+pprDarwinSectionHeader :: SectionType -> SDoc
+pprDarwinSectionHeader t =
+  ptext $ case t of
+     Text -> sLit ".text"
+     Data -> sLit ".data"
+     ReadOnlyData -> sLit ".const"
+     RelocatableReadOnlyData -> sLit ".const_data"
+     UninitialisedData -> sLit ".data"
+     ReadOnlyData16 -> sLit ".const"
+     CString -> sLit ".section\t__TEXT,__cstring,cstring_literals"
+     OtherSection _ ->
+       panic "PprBase.pprDarwinSectionHeader: unknown section type"
author	Sylvain Henry <sylvain@haskus.fr>	2020-02-22 15:05:20 +0100
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2020-02-24 20:55:25 -0500
commit	1b1067d14b656bbbfa7c47f156ec2700c9751549 (patch)
tree	32346e3c4c3f89117190b36364144d85dc260e05 /compiler/GHC/CmmToAsm/Ppr.hs
parent	354e2787be08fb6d973de1a39e58080ff8e107f8 (diff)
download	haskell-1b1067d14b656bbbfa7c47f156ec2700c9751549.tar.gz