1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
-- -----------------------------------------------------------------------------
-- | GHC LLVM Mangler
--
-- This script processes the assembly produced by LLVM, rewriting all symbols
-- of type @function to @object. This keeps them from going through the PLT,
-- which would be bad due to tables-next-to-code. On x86_64,
-- it also rewrites AVX instructions that require alignment to their
-- unaligned counterparts, since the stack is only 16-byte aligned but these
-- instructions require 32-byte alignment.
--
module LlvmMangler ( llvmFixupAsm ) where
import GhcPrelude
import DynFlags ( DynFlags, targetPlatform )
import Platform ( platformArch, Arch(..) )
import ErrUtils ( withTiming )
import Outputable ( text )
import Control.Exception
import qualified Data.ByteString.Char8 as B
import System.IO
-- | Read in assembly file and process
llvmFixupAsm :: DynFlags -> FilePath -> FilePath -> IO ()
llvmFixupAsm dflags f1 f2 = {-# SCC "llvm_mangler" #-}
withTiming (pure dflags) (text "LLVM Mangler") id $
withBinaryFile f1 ReadMode $ \r -> withBinaryFile f2 WriteMode $ \w -> do
go r w
hClose r
hClose w
return ()
where
go :: Handle -> Handle -> IO ()
go r w = do
e_l <- try $ B.hGetLine r ::IO (Either IOError B.ByteString)
let writeline a = B.hPutStrLn w (rewriteLine dflags rewrites a) >> go r w
case e_l of
Right l -> writeline l
Left _ -> return ()
-- | These are the rewrites that the mangler will perform
rewrites :: [Rewrite]
rewrites = [rewriteSymType, rewriteAVX]
type Rewrite = DynFlags -> B.ByteString -> Maybe B.ByteString
-- | Rewrite a line of assembly source with the given rewrites,
-- taking the first rewrite that applies.
rewriteLine :: DynFlags -> [Rewrite] -> B.ByteString -> B.ByteString
rewriteLine dflags rewrites l
-- We disable .subsections_via_symbols on darwin and ios, as the llvm code
-- gen uses prefix data for the info table. This however does not prevent
-- llvm from generating .subsections_via_symbols, which in turn with
-- -dead_strip, strips the info tables, and therefore breaks ghc.
| isSubsectionsViaSymbols l =
(B.pack "## no .subsection_via_symbols for ghc. We need our info tables!")
| otherwise =
case firstJust $ map (\rewrite -> rewrite dflags rest) rewrites of
Nothing -> l
Just rewritten -> B.concat $ [symbol, B.pack "\t", rewritten]
where
isSubsectionsViaSymbols = B.isPrefixOf (B.pack ".subsections_via_symbols")
(symbol, rest) = splitLine l
firstJust :: [Maybe a] -> Maybe a
firstJust (Just x:_) = Just x
firstJust [] = Nothing
firstJust (_:rest) = firstJust rest
-- | This rewrites @.type@ annotations of function symbols to @%object@.
-- This is done as the linker can relocate @%functions@ through the
-- Procedure Linking Table (PLT). This is bad since we expect that the
-- info table will appear directly before the symbol's location. In the
-- case that the PLT is used, this will be not an info table but instead
-- some random PLT garbage.
rewriteSymType :: Rewrite
rewriteSymType _ l
| isType l = Just $ rewrite '@' $ rewrite '%' l
| otherwise = Nothing
where
isType = B.isPrefixOf (B.pack ".type")
rewrite :: Char -> B.ByteString -> B.ByteString
rewrite prefix = replaceOnce funcType objType
where
funcType = prefix `B.cons` B.pack "function"
objType = prefix `B.cons` B.pack "object"
-- | This rewrites aligned AVX instructions to their unaligned counterparts on
-- x86-64. This is necessary because the stack is not adequately aligned for
-- aligned AVX spills, so LLVM would emit code that adjusts the stack pointer
-- and disable tail call optimization. Both would be catastrophic here so GHC
-- tells LLVM that the stack is 32-byte aligned (even though it isn't) and then
-- rewrites the instructions in the mangler.
rewriteAVX :: Rewrite
rewriteAVX dflags s
| not isX86_64 = Nothing
| isVmovdqa s = Just $ replaceOnce (B.pack "vmovdqa") (B.pack "vmovdqu") s
| isVmovap s = Just $ replaceOnce (B.pack "vmovap") (B.pack "vmovup") s
| otherwise = Nothing
where
isX86_64 = platformArch (targetPlatform dflags) == ArchX86_64
isVmovdqa = B.isPrefixOf (B.pack "vmovdqa")
isVmovap = B.isPrefixOf (B.pack "vmovap")
-- | @replaceOnce match replace bs@ replaces the first occurrence of the
-- substring @match@ in @bs@ with @replace@.
replaceOnce :: B.ByteString -> B.ByteString -> B.ByteString -> B.ByteString
replaceOnce matchBS replaceOnceBS = loop
where
loop :: B.ByteString -> B.ByteString
loop cts =
case B.breakSubstring matchBS cts of
(hd,tl) | B.null tl -> hd
| otherwise -> hd `B.append` replaceOnceBS `B.append`
B.drop (B.length matchBS) tl
-- | This function splits a line of assembly code into the label and the
-- rest of the code.
splitLine :: B.ByteString -> (B.ByteString, B.ByteString)
splitLine l = (symbol, B.dropWhile isSpace rest)
where
isSpace ' ' = True
isSpace '\t' = True
isSpace _ = False
(symbol, rest) = B.span (not . isSpace) l
|