diff options
Diffstat (limited to 'compiler/GHC/CmmToLlvm/Mangler.hs')
-rw-r--r-- | compiler/GHC/CmmToLlvm/Mangler.hs | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToLlvm/Mangler.hs b/compiler/GHC/CmmToLlvm/Mangler.hs new file mode 100644 index 0000000000..1cdad2009f --- /dev/null +++ b/compiler/GHC/CmmToLlvm/Mangler.hs @@ -0,0 +1,129 @@ +-- ----------------------------------------------------------------------------- +-- | GHC LLVM Mangler +-- +-- This script processes the assembly produced by LLVM, rewriting all symbols +-- of type @function to @object. This keeps them from going through the PLT, +-- which would be bad due to tables-next-to-code. On x86_64, +-- it also rewrites AVX instructions that require alignment to their +-- unaligned counterparts, since the stack is only 16-byte aligned but these +-- instructions require 32-byte alignment. +-- + +module GHC.CmmToLlvm.Mangler ( llvmFixupAsm ) where + +import GhcPrelude + +import DynFlags ( DynFlags, targetPlatform ) +import GHC.Platform ( platformArch, Arch(..) ) +import ErrUtils ( withTiming ) +import Outputable ( text ) + +import Control.Exception +import qualified Data.ByteString.Char8 as B +import System.IO + +-- | Read in assembly file and process +llvmFixupAsm :: DynFlags -> FilePath -> FilePath -> IO () +llvmFixupAsm dflags f1 f2 = {-# SCC "llvm_mangler" #-} + withTiming dflags (text "LLVM Mangler") id $ + withBinaryFile f1 ReadMode $ \r -> withBinaryFile f2 WriteMode $ \w -> do + go r w + hClose r + hClose w + return () + where + go :: Handle -> Handle -> IO () + go r w = do + e_l <- try $ B.hGetLine r ::IO (Either IOError B.ByteString) + let writeline a = B.hPutStrLn w (rewriteLine dflags rewrites a) >> go r w + case e_l of + Right l -> writeline l + Left _ -> return () + +-- | These are the rewrites that the mangler will perform +rewrites :: [Rewrite] +rewrites = [rewriteSymType, rewriteAVX] + +type Rewrite = DynFlags -> B.ByteString -> Maybe B.ByteString + +-- | Rewrite a line of assembly source with the given rewrites, +-- taking the first rewrite that applies. +rewriteLine :: DynFlags -> [Rewrite] -> B.ByteString -> B.ByteString +rewriteLine dflags rewrites l + -- We disable .subsections_via_symbols on darwin and ios, as the llvm code + -- gen uses prefix data for the info table. This however does not prevent + -- llvm from generating .subsections_via_symbols, which in turn with + -- -dead_strip, strips the info tables, and therefore breaks ghc. + | isSubsectionsViaSymbols l = + (B.pack "## no .subsection_via_symbols for ghc. We need our info tables!") + | otherwise = + case firstJust $ map (\rewrite -> rewrite dflags rest) rewrites of + Nothing -> l + Just rewritten -> B.concat $ [symbol, B.pack "\t", rewritten] + where + isSubsectionsViaSymbols = B.isPrefixOf (B.pack ".subsections_via_symbols") + + (symbol, rest) = splitLine l + + firstJust :: [Maybe a] -> Maybe a + firstJust (Just x:_) = Just x + firstJust [] = Nothing + firstJust (_:rest) = firstJust rest + +-- | This rewrites @.type@ annotations of function symbols to @%object@. +-- This is done as the linker can relocate @%functions@ through the +-- Procedure Linking Table (PLT). This is bad since we expect that the +-- info table will appear directly before the symbol's location. In the +-- case that the PLT is used, this will be not an info table but instead +-- some random PLT garbage. +rewriteSymType :: Rewrite +rewriteSymType _ l + | isType l = Just $ rewrite '@' $ rewrite '%' l + | otherwise = Nothing + where + isType = B.isPrefixOf (B.pack ".type") + + rewrite :: Char -> B.ByteString -> B.ByteString + rewrite prefix = replaceOnce funcType objType + where + funcType = prefix `B.cons` B.pack "function" + objType = prefix `B.cons` B.pack "object" + +-- | This rewrites aligned AVX instructions to their unaligned counterparts on +-- x86-64. This is necessary because the stack is not adequately aligned for +-- aligned AVX spills, so LLVM would emit code that adjusts the stack pointer +-- and disable tail call optimization. Both would be catastrophic here so GHC +-- tells LLVM that the stack is 32-byte aligned (even though it isn't) and then +-- rewrites the instructions in the mangler. +rewriteAVX :: Rewrite +rewriteAVX dflags s + | not isX86_64 = Nothing + | isVmovdqa s = Just $ replaceOnce (B.pack "vmovdqa") (B.pack "vmovdqu") s + | isVmovap s = Just $ replaceOnce (B.pack "vmovap") (B.pack "vmovup") s + | otherwise = Nothing + where + isX86_64 = platformArch (targetPlatform dflags) == ArchX86_64 + isVmovdqa = B.isPrefixOf (B.pack "vmovdqa") + isVmovap = B.isPrefixOf (B.pack "vmovap") + +-- | @replaceOnce match replace bs@ replaces the first occurrence of the +-- substring @match@ in @bs@ with @replace@. +replaceOnce :: B.ByteString -> B.ByteString -> B.ByteString -> B.ByteString +replaceOnce matchBS replaceOnceBS = loop + where + loop :: B.ByteString -> B.ByteString + loop cts = + case B.breakSubstring matchBS cts of + (hd,tl) | B.null tl -> hd + | otherwise -> hd `B.append` replaceOnceBS `B.append` + B.drop (B.length matchBS) tl + +-- | This function splits a line of assembly code into the label and the +-- rest of the code. +splitLine :: B.ByteString -> (B.ByteString, B.ByteString) +splitLine l = (symbol, B.dropWhile isSpace rest) + where + isSpace ' ' = True + isSpace '\t' = True + isSpace _ = False + (symbol, rest) = B.span (not . isSpace) l |