summaryrefslogtreecommitdiff
path: root/compiler/GHC/CmmToAsm/Ppr.hs
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2020-02-22 15:05:20 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-02-24 20:55:25 -0500
commit1b1067d14b656bbbfa7c47f156ec2700c9751549 (patch)
tree32346e3c4c3f89117190b36364144d85dc260e05 /compiler/GHC/CmmToAsm/Ppr.hs
parent354e2787be08fb6d973de1a39e58080ff8e107f8 (diff)
downloadhaskell-1b1067d14b656bbbfa7c47f156ec2700c9751549.tar.gz
Modules: CmmToAsm (#13009)
Diffstat (limited to 'compiler/GHC/CmmToAsm/Ppr.hs')
-rw-r--r--compiler/GHC/CmmToAsm/Ppr.hs275
1 files changed, 275 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/Ppr.hs b/compiler/GHC/CmmToAsm/Ppr.hs
new file mode 100644
index 0000000000..636d2e4e3a
--- /dev/null
+++ b/compiler/GHC/CmmToAsm/Ppr.hs
@@ -0,0 +1,275 @@
+{-# LANGUAGE MagicHash #-}
+
+-----------------------------------------------------------------------------
+--
+-- Pretty-printing assembly language
+--
+-- (c) The University of Glasgow 1993-2005
+--
+-----------------------------------------------------------------------------
+
+module GHC.CmmToAsm.Ppr (
+ castFloatToWord8Array,
+ castDoubleToWord8Array,
+ floatToBytes,
+ doubleToBytes,
+ pprASCII,
+ pprBytes,
+ pprSectionHeader
+)
+
+where
+
+import GhcPrelude
+
+import AsmUtils
+import GHC.Cmm.CLabel
+import GHC.Cmm
+import GHC.Driver.Session
+import FastString
+import Outputable
+import GHC.Platform
+import FileCleanup
+
+import qualified Data.Array.Unsafe as U ( castSTUArray )
+import Data.Array.ST
+
+import Control.Monad.ST
+
+import Data.Word
+import Data.Bits
+import Data.ByteString (ByteString)
+import qualified Data.ByteString as BS
+import GHC.Exts
+import GHC.Word
+import System.IO.Unsafe
+
+
+
+-- -----------------------------------------------------------------------------
+-- Converting floating-point literals to integrals for printing
+
+castFloatToWord8Array :: STUArray s Int Float -> ST s (STUArray s Int Word8)
+castFloatToWord8Array = U.castSTUArray
+
+castDoubleToWord8Array :: STUArray s Int Double -> ST s (STUArray s Int Word8)
+castDoubleToWord8Array = U.castSTUArray
+
+-- floatToBytes and doubleToBytes convert to the host's byte
+-- order. Providing that we're not cross-compiling for a
+-- target with the opposite endianness, this should work ok
+-- on all targets.
+
+-- ToDo: this stuff is very similar to the shenanigans in PprAbs,
+-- could they be merged?
+
+floatToBytes :: Float -> [Int]
+floatToBytes f
+ = runST (do
+ arr <- newArray_ ((0::Int),3)
+ writeArray arr 0 f
+ arr <- castFloatToWord8Array arr
+ i0 <- readArray arr 0
+ i1 <- readArray arr 1
+ i2 <- readArray arr 2
+ i3 <- readArray arr 3
+ return (map fromIntegral [i0,i1,i2,i3])
+ )
+
+doubleToBytes :: Double -> [Int]
+doubleToBytes d
+ = runST (do
+ arr <- newArray_ ((0::Int),7)
+ writeArray arr 0 d
+ arr <- castDoubleToWord8Array arr
+ i0 <- readArray arr 0
+ i1 <- readArray arr 1
+ i2 <- readArray arr 2
+ i3 <- readArray arr 3
+ i4 <- readArray arr 4
+ i5 <- readArray arr 5
+ i6 <- readArray arr 6
+ i7 <- readArray arr 7
+ return (map fromIntegral [i0,i1,i2,i3,i4,i5,i6,i7])
+ )
+
+-- ---------------------------------------------------------------------------
+-- Printing ASCII strings.
+--
+-- Print as a string and escape non-printable characters.
+-- This is similar to charToC in Utils.
+
+pprASCII :: ByteString -> SDoc
+pprASCII str
+ -- Transform this given literal bytestring to escaped string and construct
+ -- the literal SDoc directly.
+ -- See #14741
+ -- and Note [Pretty print ASCII when AsmCodeGen]
+ = text $ BS.foldr (\w s -> do1 w ++ s) "" str
+ where
+ do1 :: Word8 -> String
+ do1 w | 0x09 == w = "\\t"
+ | 0x0A == w = "\\n"
+ | 0x22 == w = "\\\""
+ | 0x5C == w = "\\\\"
+ -- ASCII printable characters range
+ | w >= 0x20 && w <= 0x7E = [chr' w]
+ | otherwise = '\\' : octal w
+
+ -- we know that the Chars we create are in the ASCII range
+ -- so we bypass the check in "chr"
+ chr' :: Word8 -> Char
+ chr' (W8# w#) = C# (chr# (word2Int# w#))
+
+ octal :: Word8 -> String
+ octal w = [ chr' (ord0 + (w `unsafeShiftR` 6) .&. 0x07)
+ , chr' (ord0 + (w `unsafeShiftR` 3) .&. 0x07)
+ , chr' (ord0 + w .&. 0x07)
+ ]
+ ord0 = 0x30 -- = ord '0'
+
+-- | Pretty print binary data.
+--
+-- Use either the ".string" directive or a ".incbin" directive.
+-- See Note [Embedding large binary blobs]
+--
+-- A NULL byte is added after the binary data.
+--
+pprBytes :: ByteString -> SDoc
+pprBytes bs = sdocWithDynFlags $ \dflags ->
+ if binBlobThreshold dflags == 0
+ || fromIntegral (BS.length bs) <= binBlobThreshold dflags
+ then text "\t.string " <> doubleQuotes (pprASCII bs)
+ else unsafePerformIO $ do
+ bFile <- newTempName dflags TFL_CurrentModule ".dat"
+ BS.writeFile bFile bs
+ return $ text "\t.incbin "
+ <> pprFilePathString bFile -- proper escape (see #16389)
+ <> text "\n\t.byte 0"
+
+{-
+Note [Embedding large binary blobs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To embed a blob of binary data (e.g. an UTF-8 encoded string) into the generated
+code object, we have several options:
+
+ 1. Generate a ".byte" directive for each byte. This is what was done in the past
+ (see Note [Pretty print ASCII when AsmCodeGen]).
+
+ 2. Generate a single ".string"/".asciz" directive for the whole sequence of
+ bytes. Bytes in the ASCII printable range are rendered as characters and
+ other values are escaped (e.g., "\t", "\077", etc.).
+
+ 3. Create a temporary file into which we dump the binary data and generate a
+ single ".incbin" directive. The assembler will include the binary file for
+ us in the generated output object.
+
+Now the code generator uses either (2) or (3), depending on the binary blob
+size. Using (3) for small blobs adds too much overhead (see benchmark results
+in #16190), so we only do it when the size is above a threshold (500K at the
+time of writing).
+
+The threshold is configurable via the `-fbinary-blob-threshold` flag.
+
+-}
+
+
+{-
+Note [Pretty print ASCII when AsmCodeGen]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Previously, when generating assembly code, we created SDoc with
+`(ptext . sLit)` for every bytes in literal bytestring, then
+combine them using `hcat`.
+
+When handling literal bytestrings with millions of bytes,
+millions of SDoc would be created and to combine, leading to
+high memory usage.
+
+Now we escape the given bytestring to string directly and construct
+SDoc only once. This improvement could dramatically decrease the
+memory allocation from 4.7GB to 1.3GB when embedding a 3MB literal
+string in source code. See #14741 for profiling results.
+-}
+
+-- ----------------------------------------------------------------------------
+-- Printing section headers.
+--
+-- If -split-section was specified, include the suffix label, otherwise just
+-- print the section type. For Darwin, where subsections-for-symbols are
+-- used instead, only print section type.
+--
+-- For string literals, additional flags are specified to enable merging of
+-- identical strings in the linker. With -split-sections each string also gets
+-- a unique section to allow strings from unused code to be GC'd.
+
+pprSectionHeader :: Platform -> Section -> SDoc
+pprSectionHeader platform (Section t suffix) =
+ case platformOS platform of
+ OSAIX -> pprXcoffSectionHeader t
+ OSDarwin -> pprDarwinSectionHeader t
+ OSMinGW32 -> pprGNUSectionHeader (char '$') t suffix
+ _ -> pprGNUSectionHeader (char '.') t suffix
+
+pprGNUSectionHeader :: SDoc -> SectionType -> CLabel -> SDoc
+pprGNUSectionHeader sep t suffix = sdocWithDynFlags $ \dflags ->
+ let splitSections = gopt Opt_SplitSections dflags
+ subsection | splitSections = sep <> ppr suffix
+ | otherwise = empty
+ in text ".section " <> ptext (header dflags) <> subsection <>
+ flags dflags
+ where
+ header dflags = case t of
+ Text -> sLit ".text"
+ Data -> sLit ".data"
+ ReadOnlyData | OSMinGW32 <- platformOS (targetPlatform dflags)
+ -> sLit ".rdata"
+ | otherwise -> sLit ".rodata"
+ RelocatableReadOnlyData | OSMinGW32 <- platformOS (targetPlatform dflags)
+ -- Concept does not exist on Windows,
+ -- So map these to R/O data.
+ -> sLit ".rdata$rel.ro"
+ | otherwise -> sLit ".data.rel.ro"
+ UninitialisedData -> sLit ".bss"
+ ReadOnlyData16 | OSMinGW32 <- platformOS (targetPlatform dflags)
+ -> sLit ".rdata$cst16"
+ | otherwise -> sLit ".rodata.cst16"
+ CString
+ | OSMinGW32 <- platformOS (targetPlatform dflags)
+ -> sLit ".rdata"
+ | otherwise -> sLit ".rodata.str"
+ OtherSection _ ->
+ panic "PprBase.pprGNUSectionHeader: unknown section type"
+ flags dflags = case t of
+ CString
+ | OSMinGW32 <- platformOS (targetPlatform dflags)
+ -> empty
+ | otherwise -> text ",\"aMS\"," <> sectionType "progbits" <> text ",1"
+ _ -> empty
+
+-- XCOFF doesn't support relocating label-differences, so we place all
+-- RO sections into .text[PR] sections
+pprXcoffSectionHeader :: SectionType -> SDoc
+pprXcoffSectionHeader t = text $ case t of
+ Text -> ".csect .text[PR]"
+ Data -> ".csect .data[RW]"
+ ReadOnlyData -> ".csect .text[PR] # ReadOnlyData"
+ RelocatableReadOnlyData -> ".csect .text[PR] # RelocatableReadOnlyData"
+ ReadOnlyData16 -> ".csect .text[PR] # ReadOnlyData16"
+ CString -> ".csect .text[PR] # CString"
+ UninitialisedData -> ".csect .data[BS]"
+ OtherSection _ ->
+ panic "PprBase.pprXcoffSectionHeader: unknown section type"
+
+pprDarwinSectionHeader :: SectionType -> SDoc
+pprDarwinSectionHeader t =
+ ptext $ case t of
+ Text -> sLit ".text"
+ Data -> sLit ".data"
+ ReadOnlyData -> sLit ".const"
+ RelocatableReadOnlyData -> sLit ".const_data"
+ UninitialisedData -> sLit ".data"
+ ReadOnlyData16 -> sLit ".const"
+ CString -> sLit ".section\t__TEXT,__cstring,cstring_literals"
+ OtherSection _ ->
+ panic "PprBase.pprDarwinSectionHeader: unknown section type"