summaryrefslogtreecommitdiff
path: root/compiler/GHC/CmmToLlvm/Mangler.hs
blob: 43132942946bfd7b476420ec8829d72d7eb5532a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
-- -----------------------------------------------------------------------------
-- | GHC LLVM Mangler
--
-- This script processes the assembly produced by LLVM, rewriting all symbols
-- of type @function to @object. This keeps them from going through the PLT,
-- which would be bad due to tables-next-to-code. On x86_64,
-- it also rewrites AVX instructions that require alignment to their
-- unaligned counterparts, since the stack is only 16-byte aligned but these
-- instructions require 32-byte alignment.
--

module GHC.CmmToLlvm.Mangler ( llvmFixupAsm ) where

import GHC.Prelude

import GHC.Driver.Session ( DynFlags, targetPlatform )
import GHC.Platform ( platformArch, Arch(..) )
import GHC.Utils.Error ( withTiming )
import GHC.Utils.Outputable ( text )
import GHC.Utils.Logger

import Control.Exception
import qualified Data.ByteString.Char8 as B
import System.IO

-- | Read in assembly file and process
llvmFixupAsm :: Logger -> DynFlags -> FilePath -> FilePath -> IO ()
llvmFixupAsm logger dflags f1 f2 = {-# SCC "llvm_mangler" #-}
    withTiming logger dflags (text "LLVM Mangler") id $
    withBinaryFile f1 ReadMode $ \r -> withBinaryFile f2 WriteMode $ \w -> do
        go r w
        hClose r
        hClose w
        return ()
  where
    go :: Handle -> Handle -> IO ()
    go r w = do
      e_l <- try $ B.hGetLine r ::IO (Either IOError B.ByteString)
      let writeline a = B.hPutStrLn w (rewriteLine dflags rewrites a) >> go r w
      case e_l of
        Right l -> writeline l
        Left _  -> return ()

-- | These are the rewrites that the mangler will perform
rewrites :: [Rewrite]
rewrites = [rewriteSymType, rewriteAVX, rewriteCall]

type Rewrite = DynFlags -> B.ByteString -> Maybe B.ByteString

-- | Rewrite a line of assembly source with the given rewrites,
-- taking the first rewrite that applies.
rewriteLine :: DynFlags -> [Rewrite] -> B.ByteString -> B.ByteString
rewriteLine dflags rewrites l
  -- We disable .subsections_via_symbols on darwin and ios, as the llvm code
  -- gen uses prefix data for the info table.  This however does not prevent
  -- llvm from generating .subsections_via_symbols, which in turn with
  -- -dead_strip, strips the info tables, and therefore breaks ghc.
  | isSubsectionsViaSymbols l =
    (B.pack "## no .subsection_via_symbols for ghc. We need our info tables!")
  | otherwise =
    case firstJust $ map (\rewrite -> rewrite dflags rest) rewrites of
      Nothing        -> l
      Just rewritten -> B.concat $ [symbol, B.pack "\t", rewritten]
  where
    isSubsectionsViaSymbols = B.isPrefixOf (B.pack ".subsections_via_symbols")

    (symbol, rest) = splitLine l

    firstJust :: [Maybe a] -> Maybe a
    firstJust (Just x:_) = Just x
    firstJust []         = Nothing
    firstJust (_:rest)   = firstJust rest

-- | This rewrites @.type@ annotations of function symbols to @%object@.
-- This is done as the linker can relocate @%functions@ through the
-- Procedure Linking Table (PLT). This is bad since we expect that the
-- info table will appear directly before the symbol's location. In the
-- case that the PLT is used, this will be not an info table but instead
-- some random PLT garbage.
rewriteSymType :: Rewrite
rewriteSymType _ l
  | isType l  = Just $ rewrite '@' $ rewrite '%' l
  | otherwise = Nothing
  where
    isType = B.isPrefixOf (B.pack ".type")

    rewrite :: Char -> B.ByteString -> B.ByteString
    rewrite prefix = replaceOnce funcType objType
      where
        funcType = prefix `B.cons` B.pack "function"
        objType  = prefix `B.cons` B.pack "object"

-- | This rewrites aligned AVX instructions to their unaligned counterparts on
-- x86-64. This is necessary because the stack is not adequately aligned for
-- aligned AVX spills, so LLVM would emit code that adjusts the stack pointer
-- and disable tail call optimization. Both would be catastrophic here so GHC
-- tells LLVM that the stack is 32-byte aligned (even though it isn't) and then
-- rewrites the instructions in the mangler.
rewriteAVX :: Rewrite
rewriteAVX dflags s
  | not isX86_64 = Nothing
  | isVmovdqa s  = Just $ replaceOnce (B.pack "vmovdqa") (B.pack "vmovdqu") s
  | isVmovap s   = Just $ replaceOnce (B.pack "vmovap") (B.pack "vmovup") s
  | otherwise    = Nothing
  where
    isX86_64 = platformArch (targetPlatform dflags) == ArchX86_64
    isVmovdqa = B.isPrefixOf (B.pack "vmovdqa")
    isVmovap = B.isPrefixOf (B.pack "vmovap")

-- | This rewrites (tail) calls to avoid creating PLT entries for
-- functions on riscv64. The replacement will load the address from the
-- GOT, which is resolved to point to the real address of the function.
rewriteCall :: Rewrite
rewriteCall dflags l
  | not isRISCV64 = Nothing
  | isCall l      = Just $ replaceCall "call" "jalr" "ra" l
  | isTail l      = Just $ replaceCall "tail" "jr" "t1" l
  | otherwise     = Nothing
  where
    isRISCV64 = platformArch (targetPlatform dflags) == ArchRISCV64
    isCall = B.isPrefixOf (B.pack "call\t")
    isTail = B.isPrefixOf (B.pack "tail\t")

    replaceCall call jump reg l =
        appendInsn (jump ++ "\t" ++ reg) $ removePlt $
        replaceOnce (B.pack call) (B.pack ("la\t" ++ reg ++ ",")) l
      where
        removePlt = replaceOnce (B.pack "@plt") (B.pack "")
        appendInsn i = (`B.append` B.pack ("\n\t" ++ i))

-- | @replaceOnce match replace bs@ replaces the first occurrence of the
-- substring @match@ in @bs@ with @replace@.
replaceOnce :: B.ByteString -> B.ByteString -> B.ByteString -> B.ByteString
replaceOnce matchBS replaceOnceBS = loop
  where
    loop :: B.ByteString -> B.ByteString
    loop cts =
        case B.breakSubstring matchBS cts of
          (hd,tl) | B.null tl -> hd
                  | otherwise -> hd `B.append` replaceOnceBS `B.append`
                                 B.drop (B.length matchBS) tl

-- | This function splits a line of assembly code into the label and the
-- rest of the code.
splitLine :: B.ByteString -> (B.ByteString, B.ByteString)
splitLine l = (symbol, B.dropWhile isSpace rest)
  where
    isSpace ' ' = True
    isSpace '\t' = True
    isSpace _ = False
    (symbol, rest) = B.span (not . isSpace) l