summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Prantl <aprantl@apple.com>2021-11-08 11:29:04 -0800
committerAdrian Prantl <aprantl@apple.com>2021-11-08 11:30:12 -0800
commit8bd8dd16e22b7a8f4df0f51ae3511b887629be57 (patch)
tree2df6d0044d2370a1d77a2cd0c4381e06ba774819
parentd398e8f170fa9b11a13707b8ccc2a278653d374b (diff)
downloadllvm-8bd8dd16e22b7a8f4df0f51ae3511b887629be57.tar.gz
Extend obj2yaml to optionally preserve raw __LINKEDIT/__DATA segments.
I am planning to upstream MachOObjectFile code to support Darwin chained fixups. In order to test the new parser features we need a way to produce correct (and incorrect) chained fixups. Right now the only tool that can produce them is the Darwin linker. To avoid having to check in binary files, this patch allows obj2yaml to print a hexdump of the raw LINKEDIT and DATA segment, which both allows to bootstrap the parser and enables us to easily create malformed inputs to test error handling in the parser. This patch adds two new options to obj2yaml: -raw-data-segment -raw-linkedit-segment Differential Revision: https://reviews.llvm.org/D113234
-rw-r--r--llvm/include/llvm/Object/MachO.h3
-rw-r--r--llvm/include/llvm/ObjectYAML/MachOYAML.h1
-rw-r--r--llvm/lib/Object/MachOObjectFile.cpp40
-rw-r--r--llvm/lib/ObjectYAML/MachOEmitter.cpp11
-rw-r--r--llvm/lib/ObjectYAML/MachOYAML.cpp3
-rw-r--r--llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml184
-rw-r--r--llvm/tools/obj2yaml/macho2yaml.cpp35
-rw-r--r--llvm/tools/obj2yaml/obj2yaml.cpp15
-rw-r--r--llvm/tools/obj2yaml/obj2yaml.h5
9 files changed, 280 insertions, 17 deletions
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index d2ad12e98deb..ca5d63e4074f 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -311,6 +311,9 @@ public:
bool isSectionBitcode(DataRefImpl Sec) const override;
bool isDebugSection(DataRefImpl Sec) const override;
+ /// Return the raw contents of an entire segment.
+ ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
+
/// When dsymutil generates the companion file, it strips all unnecessary
/// sections (e.g. everything in the _TEXT segment) by omitting their body
/// and setting the offset in their corresponding load command to zero.
diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h
index 5d1d3ee23594..ee89f4eac61f 100644
--- a/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -131,6 +131,7 @@ struct Object {
std::vector<LoadCommand> LoadCommands;
std::vector<Section> Sections;
LinkEditData LinkEdit;
+ Optional<llvm::yaml::BinaryRef> RawLinkEditSegment;
DWARFYAML::Data DWARF;
};
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index e84defb6786e..7501661591f0 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -2048,6 +2048,46 @@ bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const {
SectionName == "__swift_ast";
}
+namespace {
+template <typename LoadCommandType>
+ArrayRef<uint8_t> getSegmentContents(const MachOObjectFile &Obj,
+ MachOObjectFile::LoadCommandInfo LoadCmd,
+ StringRef SegmentName) {
+ auto SegmentOrErr = getStructOrErr<LoadCommandType>(Obj, LoadCmd.Ptr);
+ if (!SegmentOrErr) {
+ consumeError(SegmentOrErr.takeError());
+ return {};
+ }
+ auto &Segment = SegmentOrErr.get();
+ if (StringRef(Segment.segname, 16).startswith(SegmentName))
+ return arrayRefFromStringRef(Obj.getData().slice(
+ Segment.fileoff, Segment.fileoff + Segment.filesize));
+ return {};
+}
+} // namespace
+
+ArrayRef<uint8_t>
+MachOObjectFile::getSegmentContents(StringRef SegmentName) const {
+ for (auto LoadCmd : load_commands()) {
+ ArrayRef<uint8_t> Contents;
+ switch (LoadCmd.C.cmd) {
+ case MachO::LC_SEGMENT:
+ Contents = ::getSegmentContents<MachO::segment_command>(*this, LoadCmd,
+ SegmentName);
+ break;
+ case MachO::LC_SEGMENT_64:
+ Contents = ::getSegmentContents<MachO::segment_command_64>(*this, LoadCmd,
+ SegmentName);
+ break;
+ default:
+ continue;
+ }
+ if (!Contents.empty())
+ return Contents;
+ }
+ return {};
+}
+
unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
return Sec.getRawDataRefImpl().d.a;
}
diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp
index 63179ae61400..c653c29ec9a7 100644
--- a/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -288,6 +288,7 @@ void MachOWriter::writeLoadCommands(raw_ostream &OS) {
}
Error MachOWriter::writeSectionData(raw_ostream &OS) {
+ uint64_t LinkEditOff = 0;
for (auto &LC : Obj.LoadCommands) {
switch (LC.Data.load_command_data.cmd) {
case MachO::LC_SEGMENT:
@@ -297,6 +298,9 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
if (0 ==
strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) {
FoundLinkEditSeg = true;
+ LinkEditOff = segOff;
+ if (Obj.RawLinkEditSegment)
+ continue;
writeLinkEditData(OS);
}
for (auto &Sec : LC.Sections) {
@@ -344,6 +348,13 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
}
}
+ if (Obj.RawLinkEditSegment) {
+ ZeroToOffset(OS, LinkEditOff);
+ if (OS.tell() - fileStart > LinkEditOff || !LinkEditOff)
+ return createStringError(errc::invalid_argument,
+ "section offsets don't line up");
+ Obj.RawLinkEditSegment->writeAsBinary(OS);
+ }
return Error::success();
}
diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index 757e46cefc40..c9562bd72258 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -110,6 +110,9 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 ||
Object.Header.magic == MachO::MH_CIGAM_64;
IO.mapOptional("LoadCommands", Object.LoadCommands);
+
+ if (Object.RawLinkEditSegment || !IO.outputting())
+ IO.mapOptional("__LINKEDIT", Object.RawLinkEditSegment);
if(!Object.LinkEdit.isEmpty() || !IO.outputting())
IO.mapOptional("LinkEditData", Object.LinkEdit);
diff --git a/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml b/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml
new file mode 100644
index 000000000000..b9d0cae8ce8a
--- /dev/null
+++ b/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml
@@ -0,0 +1,184 @@
+# Test that obj2yaml + yaml2obj can round-trip mach-o executables with
+# raw __LINKEDIT segments.
+#
+# RUN: yaml2obj %s | obj2yaml --raw-segment=data --raw-segment=linkedit | FileCheck %s
+#
+# This file was produced using:
+# echo "int ext;" > a.c
+# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o a.o a.c -c
+# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib a.o -o liba.dylib -install_name @executable_path/liba.dylib
+# echo "extern int ext;" > b.c
+# echo "int padding;" >> b.c
+# echo "int *p = &ext + 4;" >> b.c
+# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o b.o b.c -c
+# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib b.o -o libfixups.dylib -install_name @executable_path/libfixups.dylib -L. -la
+#
+# CHECK: - sectname: __data
+# CHECK: segname: __DATA
+# CHECK: content: '0000001000000080'
+# CHECK: __LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
+
+--- !mach-o
+FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x100000C
+ cpusubtype: 0x0
+ filetype: 0x6
+ ncmds: 16
+ sizeofcmds: 816
+ flags: 0x100085
+ reserved: 0x0
+LoadCommands:
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __TEXT
+ vmaddr: 0
+ vmsize: 16384
+ fileoff: 0
+ filesize: 16384
+ maxprot: 5
+ initprot: 5
+ nsects: 1
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x4000
+ size: 0
+ offset: 0x4000
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000400
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: ''
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __DATA
+ vmaddr: 16384
+ vmsize: 16384
+ fileoff: 16384
+ filesize: 16384
+ maxprot: 3
+ initprot: 3
+ nsects: 1
+ flags: 0
+ Sections:
+ - sectname: __data
+ segname: __DATA
+ addr: 0x4000
+ size: 8
+ offset: 0x4000
+ align: 3
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: '0000001000000080'
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __LINKEDIT
+ vmaddr: 32768
+ vmsize: 16384
+ fileoff: 32768
+ filesize: 160
+ maxprot: 1
+ initprot: 1
+ nsects: 0
+ flags: 0
+ - cmd: LC_ID_DYLIB
+ cmdsize: 64
+ dylib:
+ name: 24
+ timestamp: 1
+ current_version: 0
+ compatibility_version: 0
+ Content: '@executable_path/libfixups.dylib'
+ ZeroPadBytes: 8
+ - cmd: LC_DYLD_CHAINED_FIXUPS
+ cmdsize: 16
+ dataoff: 32768
+ datasize: 88
+ - cmd: LC_DYLD_EXPORTS_TRIE
+ cmdsize: 16
+ dataoff: 32856
+ datasize: 16
+ - cmd: LC_SYMTAB
+ cmdsize: 24
+ symoff: 32880
+ nsyms: 2
+ stroff: 32912
+ strsize: 16
+ - cmd: LC_DYSYMTAB
+ cmdsize: 80
+ ilocalsym: 0
+ nlocalsym: 0
+ iextdefsym: 0
+ nextdefsym: 1
+ iundefsym: 1
+ nundefsym: 1
+ tocoff: 0
+ ntoc: 0
+ modtaboff: 0
+ nmodtab: 0
+ extrefsymoff: 0
+ nextrefsyms: 0
+ indirectsymoff: 0
+ nindirectsyms: 0
+ extreloff: 0
+ nextrel: 0
+ locreloff: 0
+ nlocrel: 0
+ - cmd: LC_UUID
+ cmdsize: 24
+ uuid: 56F7BCE0-C1A7-38E3-A90D-742D8E3D5FA9
+ - cmd: LC_BUILD_VERSION
+ cmdsize: 32
+ platform: 2
+ minos: 983296
+ sdk: 983552
+ ntools: 1
+ Tools:
+ - tool: 3
+ version: 46596096
+ - cmd: LC_SOURCE_VERSION
+ cmdsize: 16
+ version: 0
+ - cmd: LC_ENCRYPTION_INFO_64
+ cmdsize: 24
+ cryptoff: 16384
+ cryptsize: 0
+ cryptid: 0
+ pad: 0
+ - cmd: LC_LOAD_DYLIB
+ cmdsize: 56
+ dylib:
+ name: 24
+ timestamp: 2
+ current_version: 0
+ compatibility_version: 0
+ Content: '@executable_path/liba.dylib'
+ ZeroPadBytes: 5
+ - cmd: LC_LOAD_DYLIB
+ cmdsize: 56
+ dylib:
+ name: 24
+ timestamp: 2
+ current_version: 85917696
+ compatibility_version: 65536
+ Content: '/usr/lib/libSystem.B.dylib'
+ ZeroPadBytes: 6
+ - cmd: LC_FUNCTION_STARTS
+ cmdsize: 16
+ dataoff: 32872
+ datasize: 8
+ - cmd: LC_DATA_IN_CODE
+ cmdsize: 16
+ dataoff: 32880
+ datasize: 0
+__LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000
+...
diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp
index b7289bff67ed..d3b4bf1bf8cc 100644
--- a/llvm/tools/obj2yaml/macho2yaml.cpp
+++ b/llvm/tools/obj2yaml/macho2yaml.cpp
@@ -29,6 +29,7 @@ class MachODumper {
const object::MachOObjectFile &Obj;
std::unique_ptr<DWARFContext> DWARFCtx;
+ unsigned RawSegments;
void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
@@ -52,8 +53,8 @@ class MachODumper {
public:
MachODumper(const object::MachOObjectFile &O,
- std::unique_ptr<DWARFContext> DCtx)
- : Obj(O), DWARFCtx(std::move(DCtx)) {}
+ std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
+ : Obj(O), DWARFCtx(std::move(DCtx)), RawSegments(RawSegments) {}
Expected<std::unique_ptr<MachOYAML::Object>> dump();
};
@@ -176,6 +177,13 @@ Expected<const char *> MachODumper::extractSections(
if (Expected<MachOYAML::Section> S =
constructSection(Sec, Sections.size() + 1)) {
StringRef SecName(S->sectname);
+
+ // Copy data sections if requested.
+ if ((RawSegments & RawSegments::data) &&
+ StringRef(S->segname).startswith("__DATA"))
+ S->content =
+ yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
+
if (SecName.startswith("__debug_")) {
// If the DWARF section cannot be successfully parsed, emit raw content
// instead of an entry in the DWARF section of the YAML.
@@ -282,7 +290,11 @@ Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
dumpHeader(Y);
if (Error Err = dumpLoadCommands(Y))
return std::move(Err);
- dumpLinkEdit(Y);
+ if (RawSegments & RawSegments::linkedit)
+ Y->RawLinkEditSegment =
+ yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
+ else
+ dumpLinkEdit(Y);
return std::move(Y);
}
@@ -587,9 +599,10 @@ void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
}
}
-Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
+Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
+ unsigned RawSegments) {
std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
- MachODumper Dumper(Obj, std::move(DCtx));
+ MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
if (!YAML)
return YAML.takeError();
@@ -602,7 +615,8 @@ Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) {
return Error::success();
}
-Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
+Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
+ unsigned RawSegments) {
yaml::YamlObjectFile YAMLFile;
YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
@@ -624,7 +638,7 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
return SliceObj.takeError();
std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
- MachODumper Dumper(*SliceObj.get(), std::move(DCtx));
+ MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
if (!YAMLObj)
return YAMLObj.takeError();
@@ -636,12 +650,13 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) {
return Error::success();
}
-Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) {
+Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
+ unsigned RawSegments) {
if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
- return macho2yaml(Out, *MachOObj);
+ return macho2yaml(Out, *MachOObj, RawSegments);
if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
- return macho2yaml(Out, *MachOObj);
+ return macho2yaml(Out, *MachOObj, RawSegments);
llvm_unreachable("unexpected Mach-O file format");
}
diff --git a/llvm/tools/obj2yaml/obj2yaml.cpp b/llvm/tools/obj2yaml/obj2yaml.cpp
index e9e47d1a2b18..9c7a3385850d 100644
--- a/llvm/tools/obj2yaml/obj2yaml.cpp
+++ b/llvm/tools/obj2yaml/obj2yaml.cpp
@@ -1,4 +1,4 @@
-//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===//
+//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -18,6 +18,14 @@
using namespace llvm;
using namespace llvm::object;
+static cl::opt<std::string>
+ InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+static cl::bits<RawSegments> RawSegment(
+ "raw-segment",
+ cl::desc("Mach-O: dump the raw contents of the listed segments instead of "
+ "parsing them:"),
+ cl::values(clEnumVal(data, "__DATA"), clEnumVal(linkedit, "__LINKEDIT")));
+
static Error dumpObject(const ObjectFile &Obj) {
if (Obj.isCOFF())
return errorCodeToError(coff2yaml(outs(), cast<COFFObjectFile>(Obj)));
@@ -54,7 +62,7 @@ static Error dumpInput(StringRef File) {
// Universal MachO is not a subclass of ObjectFile, so it needs to be handled
// here with the other binary types.
if (Binary.isMachO() || Binary.isMachOUniversalBinary())
- return macho2yaml(outs(), Binary);
+ return macho2yaml(outs(), Binary, RawSegment.getBits());
if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
return dumpObject(*Obj);
if (MinidumpFile *Minidump = dyn_cast<MinidumpFile>(&Binary))
@@ -74,9 +82,6 @@ static void reportError(StringRef Input, Error Err) {
errs().flush();
}
-cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"),
- cl::init("-"));
-
int main(int argc, char *argv[]) {
InitLLVM X(argc, argv);
cl::ParseCommandLineOptions(argc, argv);
diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h
index fdd9b2a00185..c026482eaf0c 100644
--- a/llvm/tools/obj2yaml/obj2yaml.h
+++ b/llvm/tools/obj2yaml/obj2yaml.h
@@ -20,12 +20,13 @@
#include "llvm/Support/MemoryBufferRef.h"
#include <system_error>
+enum RawSegments : unsigned { none = 0, data = 1, linkedit = 1 << 1 };
std::error_code coff2yaml(llvm::raw_ostream &Out,
const llvm::object::COFFObjectFile &Obj);
llvm::Error elf2yaml(llvm::raw_ostream &Out,
const llvm::object::ObjectFile &Obj);
-llvm::Error macho2yaml(llvm::raw_ostream &Out,
- const llvm::object::Binary &Obj);
+llvm::Error macho2yaml(llvm::raw_ostream &Out, const llvm::object::Binary &Obj,
+ unsigned RawSegments);
llvm::Error minidump2yaml(llvm::raw_ostream &Out,
const llvm::object::MinidumpFile &Obj);
llvm::Error xcoff2yaml(llvm::raw_ostream &Out,