diff options
author | Sylvain HENRY <hsyl20@gmail.com> | 2015-11-11 12:31:31 +0100 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2015-11-11 12:31:42 +0100 |
commit | 109d7ce85aadbd9fb7a322a0a83548e5d4e83926 (patch) | |
tree | bd367f2d0c1559f1a811322989c116d09576094f /compiler | |
parent | f0f9365fd7fe1a4c06926f390a6183449c3c6332 (diff) | |
download | haskell-109d7ce85aadbd9fb7a322a0a83548e5d4e83926.tar.gz |
Systools: read ELF section without calling readelf
This patch tackles two issues:
1) GHC stores a "link info" string into a ELF section. Initially a
section with type "note" was used but GHC didn't follow the ELF
specification which specifies a record-based format for these sections.
With D1375 we switched to a "progbits" section type for which there
isn't any format constraint. This is an issue for D1242 which use GCC's
--gc-sections which collects "unused" sections, such as our section
containing link info... In this patch, we fall back to a section with
type "note" but we respect the specified format.
2) Reading back the ELF section was done by parsing the result of a
call to "readelf". Calling readelf is problematic because the program
may not be available or it may be renamed on some platforms (see
D1326). Moreover we have no garanty that its output layout will stay
the same in future releases of readelf. Finally we would need to fix
the parsing to support "note" sections because of 1. Instead, this
patch proposes to use Data.Binary.Get to directly read the "link info"
note into its section. ELF has a specification, hence it should work on
every conforming platform.
This patch "reverts" D1375, hence it supersedes D1432. It makes D1326
not necessary anymore.
Test Plan:
- recomp011 should pass (test that relinking is avoided when both "link
info" match)
- we should add a test for ELF objects with more than 0xff00 sections
=> added test "recomp015"
- we should check that GAS generates 32-bit words with .int on every
supported platform using ELF (or find a place where this is
documented). harbomaster and I (@hsyl20) only tested on x86-64. On
platforms where it is not true, it should make recomp011 fail. =>
tested to work on Linux/amd64, Solaris/i386 and OpenBSD/amd64
Reviewers: olsner, ony, thomie, kgardas, austin, bgamari
Reviewed By: thomie, bgamari
Subscribers: kgardas, rwbarton, thomie
Differential Revision: https://phabricator.haskell.org/D1381
GHC Trac Issues: #10974, #11022
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/ghc.cabal.in | 2 | ||||
-rw-r--r-- | compiler/main/DriverPipeline.hs | 61 | ||||
-rw-r--r-- | compiler/main/Elf.hs | 471 | ||||
-rw-r--r-- | compiler/main/SysTools.hs | 29 |
4 files changed, 501 insertions, 62 deletions
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index badb90edf9..4c740f1002 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -48,6 +48,7 @@ Library directory >= 1 && < 1.3, process >= 1 && < 1.5, bytestring >= 0.9 && < 0.11, + binary >= 0.7 && < 0.8, time < 1.6, containers >= 0.5 && < 0.6, array >= 0.1 && < 0.6, @@ -332,6 +333,7 @@ Library StaticFlags StaticPtrTable SysTools + Elf TidyPgm Ctype HaddockUtils diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs index 3a027c24d1..33770b92f6 100644 --- a/compiler/main/DriverPipeline.hs +++ b/compiler/main/DriverPipeline.hs @@ -43,6 +43,7 @@ import Packages import HeaderInfo import DriverPhases import SysTools +import Elf import HscMain import Finder import HscTypes hiding ( Hsc ) @@ -72,7 +73,6 @@ import System.IO import Control.Monad import Data.List ( isSuffixOf ) import Data.Maybe -import Data.Char import Data.Time import Data.Version @@ -447,9 +447,15 @@ checkLinkInfo dflags pkg_deps exe_file = do link_info <- getLinkInfo dflags pkg_deps debugTraceMsg dflags 3 $ text ("Link info: " ++ link_info) - m_exe_link_info <- readElfSection dflags ghcLinkInfoSectionName exe_file - debugTraceMsg dflags 3 $ text ("Exe link info: " ++ show m_exe_link_info) - return (Just link_info /= m_exe_link_info) + m_exe_link_info <- readElfNoteAsString dflags exe_file + ghcLinkInfoSectionName ghcLinkInfoNoteName + let sameLinkInfo = (Just link_info == m_exe_link_info) + debugTraceMsg dflags 3 $ case m_exe_link_info of + Nothing -> text "Exe link info: Not found" + Just s + | sameLinkInfo -> text ("Exe link info is the same") + | otherwise -> text ("Exe link info is different: " ++ s) + return (not sameLinkInfo) platformSupportsSavingLinkOpts :: OS -> Bool platformSupportsSavingLinkOpts os @@ -461,6 +467,10 @@ ghcLinkInfoSectionName :: String ghcLinkInfoSectionName = ".debug-ghc-link-info" -- if we use the ".debug" prefix, then strip will strip it by default +-- Identifier for the note (see Note [LinkInfo section]) +ghcLinkInfoNoteName :: String +ghcLinkInfoNoteName = "GHC link info" + findHSLib :: DynFlags -> [String] -> String -> IO (Maybe FilePath) findHSLib dflags dirs lib = do let batch_lib_file = if gopt Opt_Static dflags @@ -1660,34 +1670,17 @@ mkNoteObjsToLinkIntoBinary dflags dep_packages = do where link_opts info = hcat [ - -- LinkInfo section must be of type "progbits" - -- See Note [LinkInfo section] - text "\t.section ", text ghcLinkInfoSectionName, - text ",\"\",", - text elfSectionProgBits, - text "\n", - - text "\t.ascii \"", info', text "\"\n", - - -- ALL generated assembly must have this section to disable - -- executable stacks. See also - -- compiler/nativeGen/AsmCodeGen.hs for another instance - -- where we need to do this. - (if platformHasGnuNonexecStack (targetPlatform dflags) - then text ".section .note.GNU-stack,\"\",@progbits\n" - else Outputable.empty) - - ] - where - info' = text $ escape info - - escape :: String -> String - escape = concatMap (charToC.fromIntegral.ord) - - elfSectionProgBits :: String - elfSectionProgBits = case platformArch (targetPlatform dflags) of - ArchARM _ _ _ -> "%progbits" - _ -> "@progbits" + -- "link info" section (see Note [LinkInfo section]) + makeElfNote dflags ghcLinkInfoSectionName ghcLinkInfoNoteName 0 info, + + -- ALL generated assembly must have this section to disable + -- executable stacks. See also + -- compiler/nativeGen/AsmCodeGen.hs for another instance + -- where we need to do this. + if platformHasGnuNonexecStack (targetPlatform dflags) + then text ".section .note.GNU-stack,\"\",@progbits\n" + else Outputable.empty + ] -- | Return the "link info" string -- @@ -1720,8 +1713,8 @@ changed, we use the link info stored in the existing binary to decide whether to re-link or not. The "link info" string is stored in a ELF section called ".debug-ghc-link-info" -(see ghcLinkInfoSectionName) with the SHT_PROGBITS type. It used to be of type -SHT_NOTE without following their specified record-based format (see #11022). +(see ghcLinkInfoSectionName) with the SHT_NOTE type. For some time, it used to +not follow the specified record-based format (see #11022). -} diff --git a/compiler/main/Elf.hs b/compiler/main/Elf.hs new file mode 100644 index 0000000000..6dd1019023 --- /dev/null +++ b/compiler/main/Elf.hs @@ -0,0 +1,471 @@ +{- +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2015 +-- +-- ELF format tools +-- +----------------------------------------------------------------------------- +-} + +module Elf ( + readElfSectionByName, + readElfNoteAsString, + makeElfNote + ) where + +import Exception +import DynFlags +import Platform +import ErrUtils +import Maybes (MaybeT(..),runMaybeT) +import Util (charToC) +import Outputable (text,hcat,SDoc) + +import Control.Monad (when) +import Data.Binary.Get +import Data.Word +import Data.Char (ord) +import Data.ByteString.Lazy (ByteString) +import qualified Data.ByteString.Lazy as LBS +import qualified Data.ByteString.Lazy.Char8 as B8 + +{- Note [ELF specification] + ~~~~~~~~~~~~~~~~~~~~~~~~ + + ELF (Executable and Linking Format) is described in the System V Application + Binary Interface (or ABI). The latter is composed of two parts: a generic + part and a processor specific part. The generic ABI describes the parts of + the interface that remain constant accross all hardware implementations of + System V. + + The latest release of the specification of the generic ABI is the version + 4.1 from March 18, 1997: + + - http://www.sco.com/developers/devspecs/gabi41.pdf + + Since 1997, snapshots of the draft for the "next" version are published: + + - http://www.sco.com/developers/gabi/ + + Quoting the notice on the website: "There is more than one instance of these + chapters to permit references to older instances to remain valid. All + modifications to these chapters are forward-compatible, so that correct use + of an older specification will not be invalidated by a newer instance. + Approximately on a yearly basis, a new instance will be saved, as it reaches + what appears to be a stable state." + + Nevertheless we will see that since 1998 it is not true for Note sections. + + Many ELF sections + ----------------- + + ELF-4.1: the normal section number fields in ELF are limited to 16 bits, + which runs out of bits when you try to cram in more sections than that. Two + fields are concerned: the one containing the number of the sections and the + one containing the index of the section that contains section's names. (The + same thing applies to the field containing the number of segments, but we + don't care about it here). + + ELF-next: to solve this, theses fields in the ELF header have an escape + value (different for each case), and the actual section number is stashed + into unused fields in the first section header. + + We support this extension as it is forward-compatible with ELF-4.1. + Moreover, GHC may generate objects with a lot of sections with the + "function-sections" feature (one section per function). + + Note sections + ------------- + + Sections with type "note" (SHT_NOTE in the specification) are used to add + arbitrary data into an ELF file. An entry in a note section is composed of a + name, a type and a value. + + ELF-4.1: "The note information in sections and program header elements holds + any number of entries, each of which is an array of 4-byte words in the + format of the target processor." Each entry has the following format: + | namesz | Word32: size of the name string (including the ending \0) + | descsz | Word32: size of the value + | type | Word32: type of the note + | name | Name string (with \0 padding to ensure 4-byte alignment) + | ... | + | desc | Value (with \0 padding to ensure 4-byte alignment) + | ... | + + ELF-next: "The note information in sections and program header elements + holds a variable amount of entries. In 64-bit objects (files with + e_ident[EI_CLASS] equal to ELFCLASS64), each entry is an array of 8-byte + words in the format of the target processor. In 32-bit objects (files with + e_ident[EI_CLASS] equal to ELFCLASS32), each entry is an array of 4-byte + words in the format of the target processor." (from 1998-2015 snapshots) + + This is not forward-compatible with ELF-4.1. In practice, for almost all + platforms namesz, descz and type fields are 4-byte words for both 32-bit and + 64-bit objects (see elf.h and readelf source code). + + The only exception in readelf source code is for IA_64 machines with OpenVMS + OS: "This OS has so many departures from the ELF standard that we test it at + many places" (comment for is_ia64_vms() in readelf.c). In this case, namesz, + descsz and type fields are 8-byte words and name and value fields are padded + to ensure 8-byte alignment. + + We don't support this platform in the following code. Reading a note section + could be done easily (by testing Machine and OS fields in the ELF header). + Writing a note section, however, requires that we generate a different + assembly code for GAS depending on the target platform and this is a little + bit more involved. + +-} + + +-- | ELF header +-- +-- The ELF header indicates the native word size (32-bit or 64-bit) and the +-- endianness of the target machine. We directly store getters for words of +-- different sizes as it is more convenient to use. We also store the word size +-- as it is useful to skip some uninteresting fields. +-- +-- Other information such as the target machine and OS are left out as we don't +-- use them yet. We could add them in the future if we ever need them. +data ElfHeader = ElfHeader + { gw16 :: Get Word16 -- ^ Get a Word16 with the correct endianness + , gw32 :: Get Word32 -- ^ Get a Word32 with the correct endianness + , gwN :: Get Word64 -- ^ Get a Word with the correct word size + -- and endianness + , wordSize :: Int -- ^ Word size in bytes + } + + +-- | Read the ELF header +readElfHeader :: DynFlags -> ByteString -> IO (Maybe ElfHeader) +readElfHeader dflags bs = runGetOrThrow getHeader bs `catchIO` \_ -> do + debugTraceMsg dflags 3 $ + text ("Unable to read ELF header") + return Nothing + where + getHeader = do + magic <- getWord32be + ws <- getWord8 + endian <- getWord8 + version <- getWord8 + skip 9 -- skip OSABI, ABI version and padding + when (magic /= 0x7F454C46 || version /= 1) $ fail "Invalid ELF header" + + case (ws, endian) of + -- ELF 32, little endian + (1,1) -> return . Just $ ElfHeader + getWord16le + getWord32le + (fmap fromIntegral getWord32le) 4 + -- ELF 32, big endian + (1,2) -> return . Just $ ElfHeader + getWord16be + getWord32be + (fmap fromIntegral getWord32be) 4 + -- ELF 64, little endian + (2,1) -> return . Just $ ElfHeader + getWord16le + getWord32le + (fmap fromIntegral getWord64le) 8 + -- ELF 64, big endian + (2,2) -> return . Just $ ElfHeader + getWord16be + getWord32be + (fmap fromIntegral getWord64be) 8 + _ -> fail "Invalid ELF header" + + +------------------ +-- SECTIONS +------------------ + + +-- | Description of the section table +data SectionTable = SectionTable + { sectionTableOffset :: Word64 -- ^ offset of the table describing sections + , sectionEntrySize :: Word16 -- ^ size of an entry in the section table + , sectionEntryCount :: Word64 -- ^ number of sections + , sectionNameIndex :: Word32 -- ^ index of a special section which + -- contains section's names + } + +-- | Read the ELF section table +readElfSectionTable :: DynFlags + -> ElfHeader + -> ByteString + -> IO (Maybe SectionTable) + +readElfSectionTable dflags hdr bs = action `catchIO` \_ -> do + debugTraceMsg dflags 3 $ + text ("Unable to read ELF section table") + return Nothing + where + getSectionTable :: Get SectionTable + getSectionTable = do + skip (24 + 2*wordSize hdr) -- skip header and some other fields + secTableOffset <- gwN hdr + skip 10 + entrySize <- gw16 hdr + entryCount <- gw16 hdr + secNameIndex <- gw16 hdr + return (SectionTable secTableOffset entrySize + (fromIntegral entryCount) + (fromIntegral secNameIndex)) + + action = do + secTable <- runGetOrThrow getSectionTable bs + -- In some cases, the number of entries and the index of the section + -- containing section's names must be found in unused fields of the first + -- section entry (see Note [ELF specification]) + let + offSize0 = fromIntegral $ sectionTableOffset secTable + 8 + + 3 * fromIntegral (wordSize hdr) + offLink0 = fromIntegral $ offSize0 + fromIntegral (wordSize hdr) + + entryCount' <- if sectionEntryCount secTable /= 0 + then return (sectionEntryCount secTable) + else runGetOrThrow (gwN hdr) (LBS.drop offSize0 bs) + entryNameIndex' <- if sectionNameIndex secTable /= 0xffff + then return (sectionNameIndex secTable) + else runGetOrThrow (gw32 hdr) (LBS.drop offLink0 bs) + return (Just $ secTable + { sectionEntryCount = entryCount' + , sectionNameIndex = entryNameIndex' + }) + + +-- | A section +data Section = Section + { entryName :: ByteString -- ^ Name of the section + , entryBS :: ByteString -- ^ Content of the section + } + +-- | Read a ELF section +readElfSectionByIndex :: DynFlags + -> ElfHeader + -> SectionTable + -> Word64 + -> ByteString + -> IO (Maybe Section) + +readElfSectionByIndex dflags hdr secTable i bs = action `catchIO` \_ -> do + debugTraceMsg dflags 3 $ + text ("Unable to read ELF section") + return Nothing + where + -- read an entry from the section table + getEntry = do + nameIndex <- gw32 hdr + skip (4+2*wordSize hdr) + offset <- fmap fromIntegral $ gwN hdr + size <- fmap fromIntegral $ gwN hdr + let bs' = LBS.take size (LBS.drop offset bs) + return (nameIndex,bs') + + -- read the entry with the given index in the section table + getEntryByIndex x = runGetOrThrow getEntry bs' + where + bs' = LBS.drop off bs + off = fromIntegral $ sectionTableOffset secTable + + x * fromIntegral (sectionEntrySize secTable) + + -- Get the name of a section + getEntryName nameIndex = do + let idx = fromIntegral (sectionNameIndex secTable) + (_,nameTable) <- getEntryByIndex idx + let bs' = LBS.drop nameIndex nameTable + runGetOrThrow getLazyByteStringNul bs' + + action = do + (nameIndex,bs') <- getEntryByIndex (fromIntegral i) + name <- getEntryName (fromIntegral nameIndex) + return (Just $ Section name bs') + + +-- | Find a section from its name. Return the section contents. +-- +-- We do not perform any check on the section type. +findSectionFromName :: DynFlags + -> ElfHeader + -> SectionTable + -> String + -> ByteString + -> IO (Maybe ByteString) +findSectionFromName dflags hdr secTable name bs = + rec [0..sectionEntryCount secTable - 1] + where + -- convert the required section name into a ByteString to perform + -- ByteString comparison instead of String comparison + name' = B8.pack name + + -- compare recursively each section name and return the contents of + -- the matching one, if any + rec [] = return Nothing + rec (x:xs) = do + me <- readElfSectionByIndex dflags hdr secTable x bs + case me of + Just e | entryName e == name' -> return (Just (entryBS e)) + _ -> rec xs + + +-- | Given a section name, read its contents as a ByteString. +-- +-- If the section isn't found or if there is any parsing error, we return +-- Nothing +readElfSectionByName :: DynFlags + -> ByteString + -> String + -> IO (Maybe LBS.ByteString) + +readElfSectionByName dflags bs name = action `catchIO` \_ -> do + debugTraceMsg dflags 3 $ + text ("Unable to read ELF section \"" ++ name ++ "\"") + return Nothing + where + action = runMaybeT $ do + hdr <- MaybeT $ readElfHeader dflags bs + secTable <- MaybeT $ readElfSectionTable dflags hdr bs + MaybeT $ findSectionFromName dflags hdr secTable name bs + +------------------ +-- NOTE SECTIONS +------------------ + +-- | read a Note as a ByteString +-- +-- If you try to read a note from a section which does not support the Note +-- format, the parsing is likely to fail and Nothing will be returned +readElfNoteBS :: DynFlags + -> ByteString + -> String + -> String + -> IO (Maybe LBS.ByteString) + +readElfNoteBS dflags bs sectionName noteId = action `catchIO` \_ -> do + debugTraceMsg dflags 3 $ + text ("Unable to read ELF note \"" ++ noteId ++ + "\" in section \"" ++ sectionName ++ "\"") + return Nothing + where + -- align the getter on n bytes + align n = do + m <- bytesRead + if m `mod` n == 0 + then return () + else skip 1 >> align n + + -- noteId as a bytestring + noteId' = B8.pack noteId + + -- read notes recursively until the one with a valid identifier is found + findNote hdr = do + align 4 + namesz <- gw32 hdr + descsz <- gw32 hdr + _ <- gw32 hdr -- we don't use the note type + name <- if namesz == 0 + then return LBS.empty + else getLazyByteStringNul + align 4 + desc <- if descsz == 0 + then return LBS.empty + else getLazyByteString (fromIntegral descsz) + if name == noteId' + then return $ Just desc + else findNote hdr + + + action = runMaybeT $ do + hdr <- MaybeT $ readElfHeader dflags bs + sec <- MaybeT $ readElfSectionByName dflags bs sectionName + MaybeT $ runGetOrThrow (findNote hdr) sec + +-- | read a Note as a String +-- +-- If you try to read a note from a section which does not support the Note +-- format, the parsing is likely to fail and Nothing will be returned +readElfNoteAsString :: DynFlags + -> FilePath + -> String + -> String + -> IO (Maybe String) + +readElfNoteAsString dflags path sectionName noteId = action `catchIO` \_ -> do + debugTraceMsg dflags 3 $ + text ("Unable to read ELF note \"" ++ noteId ++ + "\" in section \"" ++ sectionName ++ "\"") + return Nothing + where + action = do + bs <- LBS.readFile path + note <- readElfNoteBS dflags bs sectionName noteId + return (fmap B8.unpack note) + + +-- | Generate the GAS code to create a Note section +-- +-- Header fields for notes are 32-bit long (see Note [ELF specification]). +-- +-- It seems there is no easy way to force GNU AS to generate a 32-bit word in +-- every case. Hence we use .int directive to create them: however "The byte +-- order and bit size of the number depends on what kind of target the assembly +-- is for." (https://sourceware.org/binutils/docs/as/Int.html#Int) +-- +-- If we add new target platforms, we need to check that the generated words +-- are 32-bit long, otherwise we need to use platform specific directives to +-- force 32-bit .int in asWord32. +makeElfNote :: DynFlags -> String -> String -> Word32 -> String -> SDoc +makeElfNote dflags sectionName noteName typ contents = hcat [ + text "\t.section ", + text sectionName, + text ",\"\",", + text elfSectionNote, + text "\n", + + -- note name length (+ 1 for ending \0) + asWord32 (length noteName + 1), + + -- note contents size + asWord32 (length contents), + + -- note type + asWord32 typ, + + -- note name (.asciz for \0 ending string) + padding + text "\t.asciz \"", + text noteName, + text "\"\n", + text "\t.align 4\n", + + -- note contents (.ascii to avoid ending \0) + padding + text "\t.ascii \"", + text (escape contents), + text "\"\n", + text "\t.align 4\n"] + where + escape :: String -> String + escape = concatMap (charToC.fromIntegral.ord) + + asWord32 :: Show a => a -> SDoc + asWord32 x = hcat [ + text "\t.int ", + text (show x), + text "\n"] + + elfSectionNote :: String + elfSectionNote = case platformArch (targetPlatform dflags) of + ArchARM _ _ _ -> "%note" + _ -> "@note" + + + +------------------ +-- Helpers +------------------ + +-- | runGet in IO monad that throws an IOException on failure +runGetOrThrow :: Get a -> LBS.ByteString -> IO a +runGetOrThrow g bs = case runGetOrFail g bs of + Left _ -> fail "Error while reading file" + Right (_,_,a) -> return a diff --git a/compiler/main/SysTools.hs b/compiler/main/SysTools.hs index 5e9646d510..879b035d03 100644 --- a/compiler/main/SysTools.hs +++ b/compiler/main/SysTools.hs @@ -25,7 +25,6 @@ module SysTools ( runLlvmLlc, runClang, figureLlvmVersion, - readElfSection, getLinkerInfo, getCompilerInfo, @@ -78,8 +77,6 @@ import System.Directory import Data.Char import Data.List import qualified Data.Map as Map -import Text.ParserCombinators.ReadP hiding (char) -import qualified Text.ParserCombinators.ReadP as R #ifndef mingw32_HOST_OS import qualified System.Posix.Internals @@ -1043,31 +1040,7 @@ copyWithHeader dflags purpose maybe_header from to = do hPutStr h str hSetBinaryMode h True --- | read the contents of the named section in an ELF object as a --- String. -readElfSection :: DynFlags -> String -> FilePath -> IO (Maybe String) -readElfSection _dflags section exe = do - let - prog = "readelf" - args = [Option "-p", Option section, FileOption "" exe] - -- - r <- readProcessEnvWithExitCode prog (filter notNull (map showOpt args)) - en_locale_env - case r of - (ExitSuccess, out, _err) -> return (doFilter (lines out)) - _ -> return Nothing - where - doFilter [] = Nothing - doFilter (s:r) = case readP_to_S parse s of - [(p,"")] -> Just p - _r -> doFilter r - where parse = do - skipSpaces - _ <- R.char '[' - skipSpaces - _ <- string "0]" - skipSpaces - munch (const True) + {- ************************************************************************ |