summaryrefslogtreecommitdiff
path: root/compiler/main/Ar.hs
blob: 1a1862a6fe95d7f900284633acf534da73f6697e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
{-# LANGUAGE OverloadedStrings, GeneralizedNewtypeDeriving, CPP #-}
{- Note: [The need for Ar.hs]
Building `-staticlib` required the presence of libtool, and was a such
restricted to mach-o only. As libtool on macOS and gnu libtool are very
different, there was no simple portable way to support this.

libtool for static archives does essentially: concatinate the input archives,
add the input objects, and create a symbol index. Using `ar` for this task
fails as even `ar` (bsd and gnu, llvm, ...) do not provide the same
features across platforms (e.g. index prefixed retrieval of objects with
the same name.)

As Archives are rather simple structurally, we can just build the archives
with Haskell directly and use ranlib on the final result to get the symbol
index. This should allow us to work around with the differences/abailability
of libtool across different platforms.
-}
module Ar
  (ArchiveEntry(..)
  ,Archive(..)
  ,afilter

  ,parseAr

  ,loadAr
  ,loadObj
  ,writeBSDAr
  ,writeGNUAr

  ,isBSDSymdef
  ,isGNUSymdef
  )
   where

import GhcPrelude

import Data.List (mapAccumL, isPrefixOf)
import Data.Monoid ((<>))
import Data.Binary.Get
import Data.Binary.Put
import Control.Monad
import Control.Applicative
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as C
import qualified Data.ByteString.Lazy as L
#if !defined(mingw32_HOST_OS)
import qualified System.Posix.Files as POSIX
#endif
import System.FilePath (takeFileName)

data ArchiveEntry = ArchiveEntry
    { filename :: String       -- ^ File name.
    , filetime :: Int          -- ^ File modification time.
    , fileown  :: Int          -- ^ File owner.
    , filegrp  :: Int          -- ^ File group.
    , filemode :: Int          -- ^ File mode.
    , filesize :: Int          -- ^ File size.
    , filedata :: B.ByteString -- ^ File bytes.
    } deriving (Eq, Show)

newtype Archive = Archive [ArchiveEntry]
        deriving (Eq, Show, Semigroup, Monoid)

afilter :: (ArchiveEntry -> Bool) -> Archive -> Archive
afilter f (Archive xs) = Archive (filter f xs)

isBSDSymdef, isGNUSymdef :: ArchiveEntry -> Bool
isBSDSymdef a = "__.SYMDEF" `isPrefixOf` (filename a)
isGNUSymdef a = "/" == (filename a)

-- | Archives have numeric values padded with '\x20' to the right.
getPaddedInt :: B.ByteString -> Int
getPaddedInt = read . C.unpack . C.takeWhile (/= '\x20')

putPaddedInt :: Int -> Int -> Put
putPaddedInt padding i = putPaddedString '\x20' padding (show i)

putPaddedString :: Char -> Int -> String -> Put
putPaddedString pad padding s = putByteString . C.pack . take padding $ s `mappend` (repeat pad)

getBSDArchEntries :: Get [ArchiveEntry]
getBSDArchEntries = do
    empty <- isEmpty
    if empty then
        return []
     else do
        name    <- getByteString 16
        when ('/' `C.elem` name && C.take 3 name /= "#1/") $
          fail "Looks like GNU Archive"
        time    <- getPaddedInt <$> getByteString 12
        own     <- getPaddedInt <$> getByteString 6
        grp     <- getPaddedInt <$> getByteString 6
        mode    <- getPaddedInt <$> getByteString 8
        st_size <- getPaddedInt <$> getByteString 10
        end     <- getByteString 2
        when (end /= "\x60\x0a") $
          fail ("[BSD Archive] Invalid archive header end marker for name: " ++
                C.unpack name)
        off1    <- liftM fromIntegral bytesRead :: Get Int
        -- BSD stores extended filenames, by writing #1/<length> into the
        -- name field, the first @length@ bytes then represent the file name
        -- thus the payload size is filesize + file name length.
        name    <- if C.unpack (C.take 3 name) == "#1/" then
                        liftM (C.unpack . C.takeWhile (/= '\0')) (getByteString $ read $ C.unpack $ C.drop 3 name)
                    else
                        return $ C.unpack $ C.takeWhile (/= ' ') name
        off2    <- liftM fromIntegral bytesRead :: Get Int
        file    <- getByteString (st_size - (off2 - off1))
        -- data sections are two byte aligned (see #15396)
        when (odd st_size) $
          void (getByteString 1)

        rest    <- getBSDArchEntries
        return $ (ArchiveEntry name time own grp mode (st_size - (off2 - off1)) file) : rest

-- | GNU Archives feature a special '//' entry that contains the
-- extended names. Those are referred to as /<num>, where num is the
-- offset into the '//' entry.
-- In addition, filenames are terminated with '/' in the archive.
getGNUArchEntries :: Maybe ArchiveEntry -> Get [ArchiveEntry]
getGNUArchEntries extInfo = do
  empty <- isEmpty
  if empty
    then return []
    else
    do
      name    <- getByteString 16
      time    <- getPaddedInt <$> getByteString 12
      own     <- getPaddedInt <$> getByteString 6
      grp     <- getPaddedInt <$> getByteString 6
      mode    <- getPaddedInt <$> getByteString 8
      st_size <- getPaddedInt <$> getByteString 10
      end     <- getByteString 2
      when (end /= "\x60\x0a") $
        fail ("[BSD Archive] Invalid archive header end marker for name: " ++
              C.unpack name)
      file <- getByteString st_size
      -- data sections are two byte aligned (see #15396)
      when (odd st_size) $
        void (getByteString 1)
      name <- return . C.unpack $
        if C.unpack (C.take 1 name) == "/"
        then case C.takeWhile (/= ' ') name of
               name@"/"  -> name               -- symbol table
               name@"//" -> name               -- extendedn file names table
               name      -> getExtName extInfo (read . C.unpack $ C.drop 1 name)
        else C.takeWhile (/= '/') name
      case name of
        "/"  -> getGNUArchEntries extInfo
        "//" -> getGNUArchEntries (Just (ArchiveEntry name time own grp mode st_size file))
        _    -> (ArchiveEntry name time own grp mode st_size file :) <$> getGNUArchEntries extInfo

  where
   getExtName :: Maybe ArchiveEntry -> Int -> B.ByteString
   getExtName Nothing _ = error "Invalid extended filename reference."
   getExtName (Just info) offset = C.takeWhile (/= '/') . C.drop offset $ filedata info

-- | put an Archive Entry. This assumes that the entries
-- have been preprocessed to account for the extenden file name
-- table section "//" e.g. for GNU Archives. Or that the names
-- have been move into the payload for BSD Archives.
putArchEntry :: ArchiveEntry -> PutM ()
putArchEntry (ArchiveEntry name time own grp mode st_size file) = do
  putPaddedString ' '  16 name
  putPaddedInt         12 time
  putPaddedInt          6 own
  putPaddedInt          6 grp
  putPaddedInt          8 mode
  putPaddedInt         10 (st_size + pad)
  putByteString           "\x60\x0a"
  putByteString           file
  when (pad == 1) $
    putWord8              0x0a
  where
    pad         = st_size `mod` 2

getArchMagic :: Get ()
getArchMagic = do
  magic <- liftM C.unpack $ getByteString 8
  if magic /= "!<arch>\n"
    then fail $ "Invalid magic number " ++ show magic
    else return ()

putArchMagic :: Put
putArchMagic = putByteString $ C.pack "!<arch>\n"

getArch :: Get Archive
getArch = Archive <$> do
  getArchMagic
  getBSDArchEntries <|> getGNUArchEntries Nothing

putBSDArch :: Archive -> PutM ()
putBSDArch (Archive as) = do
  putArchMagic
  mapM_ putArchEntry (processEntries as)

  where
    padStr pad size str = take size $ str <> repeat pad
    nameSize name = case length name `divMod` 4 of
      (n, 0) -> 4 * n
      (n, _) -> 4 * (n + 1)
    needExt name = length name > 16 || ' ' `elem` name
    processEntry :: ArchiveEntry -> ArchiveEntry
    processEntry archive@(ArchiveEntry name _ _ _ _ st_size _)
      | needExt name = archive { filename = "#1/" <> show sz
                               , filedata = C.pack (padStr '\0' sz name) <> filedata archive
                               , filesize = st_size + sz }
      | otherwise    = archive

      where sz = nameSize name

    processEntries = map processEntry

putGNUArch :: Archive -> PutM ()
putGNUArch (Archive as) = do
  putArchMagic
  mapM_ putArchEntry (processEntries as)

  where
    processEntry :: ArchiveEntry -> ArchiveEntry -> (ArchiveEntry, ArchiveEntry)
    processEntry extInfo archive@(ArchiveEntry name _ _ _ _ _ _)
      | length name > 15 = ( extInfo { filesize = filesize extInfo + length name + 2
                                    ,  filedata = filedata extInfo <>  C.pack name <> "/\n" }
                           , archive { filename = "/" <> show (filesize extInfo) } )
      | otherwise        = ( extInfo, archive { filename = name <> "/" } )

    processEntries :: [ArchiveEntry] -> [ArchiveEntry]
    processEntries =
      uncurry (:) . mapAccumL processEntry (ArchiveEntry "//" 0 0 0 0 0 mempty)

parseAr :: B.ByteString -> Archive
parseAr = runGet getArch . L.fromChunks . pure

writeBSDAr, writeGNUAr :: FilePath -> Archive -> IO ()
writeBSDAr fp = L.writeFile fp . runPut . putBSDArch
writeGNUAr fp = L.writeFile fp . runPut . putGNUArch

loadAr :: FilePath -> IO Archive
loadAr fp = parseAr <$> B.readFile fp

loadObj :: FilePath -> IO ArchiveEntry
loadObj fp = do
  payload <- B.readFile fp
  (modt, own, grp, mode) <- fileInfo fp
  return $ ArchiveEntry
    (takeFileName fp) modt own grp mode
    (B.length payload) payload

-- | Take a filePath and return (mod time, own, grp, mode in decimal)
fileInfo :: FilePath -> IO ( Int, Int, Int, Int) -- ^ mod time, own, grp, mode (in decimal)
#if defined(mingw32_HOST_OS)
-- on windows mod time, owner group and mode are zero.
fileInfo _ = pure (0,0,0,0)
#else
fileInfo fp = go <$> POSIX.getFileStatus fp
  where go status = ( fromEnum $ POSIX.modificationTime status
                    , fromIntegral $ POSIX.fileOwner status
                    , fromIntegral $ POSIX.fileGroup status
                    , oct2dec . fromIntegral $ POSIX.fileMode status
                    )

oct2dec :: Int -> Int
oct2dec = foldl' (\a b -> a * 10 + b) 0 . reverse . dec 8
  where dec _ 0 = []
        dec b i = let (rest, last) = i `quotRem` b
                  in last:dec b rest

#endif