diff options
author | Jez Ng <jezng@fb.com> | 2020-12-01 14:45:09 -0800 |
---|---|---|
committer | Jez Ng <jezng@fb.com> | 2020-12-01 15:05:20 -0800 |
commit | 51629abce0e2f9d1376eb0b5070532a2bbec6766 (patch) | |
tree | aa3d1f46edeb475eae49ed1726e6c2a7606b00ed | |
parent | 3fcb0eeb152beb4320c7632bcfa2b1e7c2e5ca00 (diff) | |
download | llvm-51629abce0e2f9d1376eb0b5070532a2bbec6766.tar.gz |
[lld-macho] Emit local symbols in symtab; record metadata in LC_DYSYMTAB
Symbols of the same type must be laid out contiguously: following ld64's
lead, we choose to emit all local symbols first, then external symbols,
and finally undefined symbols. For each symbol type, the LC_DYSYMTAB
load command will record the range (start index and total number) of
those symbols in the symbol table.
This work was motivated by the fact that LLDB won't search for debug
info if LC_DYSYMTAB says there are no local symbols (since STABS symbols
are all local symbols). With this change, LLDB is now able to display
the source lines at a given breakpoint when debugging our binaries.
Some tests had to be updated due to local symbol names now appearing in
`llvm-objdump`'s output.
Reviewed By: #lld-macho, smeenai, clayborg
Differential Revision: https://reviews.llvm.org/D89285
-rw-r--r-- | lld/MachO/SyntheticSections.cpp | 72 | ||||
-rw-r--r-- | lld/MachO/SyntheticSections.h | 17 | ||||
-rw-r--r-- | lld/MachO/Writer.cpp | 19 | ||||
-rw-r--r-- | lld/test/MachO/stabs.s | 8 | ||||
-rw-r--r-- | lld/test/MachO/subsections-symbol-relocs.s | 4 | ||||
-rw-r--r-- | lld/test/MachO/symtab.s | 98 | ||||
-rw-r--r-- | lld/test/MachO/tlv.s | 15 |
7 files changed, 173 insertions, 60 deletions
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index feaa4b5d3e22..56a095c4a7e4 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -19,6 +19,7 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" @@ -630,14 +631,35 @@ void SymtabSection::emitFunStabs(Defined *defined) { } void SymtabSection::finalizeContents() { - InputFile *lastFile = nullptr; + // Local symbols aren't in the SymbolTable, so we walk the list of object + // files to gather them. + for (InputFile *file : inputFiles) { + if (auto *objFile = dyn_cast<ObjFile>(file)) { + for (Symbol *sym : objFile->symbols) { + // TODO: when we implement -dead_strip, we should filter out symbols + // that belong to dead sections. + if (auto *defined = dyn_cast<Defined>(sym)) { + if (!defined->isExternal()) { + uint32_t strx = stringTableSection.addString(sym->getName()); + localSymbols.push_back({sym, strx}); + } + } + } + } + } + for (Symbol *sym : symtab->getSymbols()) { - // TODO support other symbol types - if (isa<Defined>(sym) || sym->isInGot() || sym->isInStubs()) { - sym->symtabIndex = symbols.size(); - symbols.push_back({sym, stringTableSection.addString(sym->getName())}); + uint32_t strx = stringTableSection.addString(sym->getName()); + if (auto *defined = dyn_cast<Defined>(sym)) { + assert(defined->isExternal()); + externalSymbols.push_back({sym, strx}); + } else if (sym->isInGot() || sym->isInStubs()) { + undefinedSymbols.push_back({sym, strx}); } + } + InputFile *lastFile = nullptr; + for (Symbol *sym : symtab->getSymbols()) { // Emit STABS symbols so that dsymutil and/or the debugger can map address // regions in the final binary to the source and object files from which // they originated. @@ -670,13 +692,35 @@ void SymtabSection::finalizeContents() { if (!stabs.empty()) emitEndSourceStab(); + + uint32_t symtabIndex = stabs.size(); + for (const SymtabEntry &entry : + concat<SymtabEntry>(localSymbols, externalSymbols, undefinedSymbols)) { + entry.sym->symtabIndex = symtabIndex++; + } +} + +uint32_t SymtabSection::getNumSymbols() const { + return stabs.size() + localSymbols.size() + externalSymbols.size() + + undefinedSymbols.size(); } void SymtabSection::writeTo(uint8_t *buf) const { auto *nList = reinterpret_cast<structs::nlist_64 *>(buf); - for (const SymtabEntry &entry : symbols) { + // Emit the stabs entries before the "real" symbols. We cannot emit them + // after as that would render Symbol::symtabIndex inaccurate. + for (const StabsEntry &entry : stabs) { + nList->n_strx = entry.strx; + nList->n_type = entry.type; + nList->n_sect = entry.sect; + nList->n_desc = entry.desc; + nList->n_value = entry.value; + ++nList; + } + + for (const SymtabEntry &entry : concat<const SymtabEntry>( + localSymbols, externalSymbols, undefinedSymbols)) { nList->n_strx = entry.strx; - // TODO support other symbol types // TODO populate n_desc with more flags if (auto *defined = dyn_cast<Defined>(entry.sym)) { if (defined->isAbsolute()) { @@ -684,7 +728,8 @@ void SymtabSection::writeTo(uint8_t *buf) const { nList->n_sect = MachO::NO_SECT; nList->n_value = defined->value; } else { - nList->n_type = MachO::N_EXT | MachO::N_SECT; + nList->n_type = + (defined->isExternal() ? MachO::N_EXT : 0) | MachO::N_SECT; nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol nList->n_value = defined->getVA(); @@ -693,17 +738,6 @@ void SymtabSection::writeTo(uint8_t *buf) const { } ++nList; } - - // Emit the stabs entries after the "real" symbols. We cannot emit them - // before as that would render Symbol::symtabIndex inaccurate. - for (const StabsEntry &entry : stabs) { - nList->n_strx = entry.strx; - nList->n_type = entry.type; - nList->n_sect = entry.sect; - nList->n_desc = entry.desc; - nList->n_value = entry.value; - ++nList; - } } IndirectSymtabSection::IndirectSymtabSection() diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 4a8820a285a8..e52bc480e9aa 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -420,11 +420,20 @@ struct StabsEntry { explicit StabsEntry(uint8_t type) : type(type) {} }; +// Symbols of the same type must be laid out contiguously: we choose to emit +// all local symbols first, then external symbols, and finally undefined +// symbols. For each symbol type, the LC_DYSYMTAB load command will record the +// range (start index and total number) of those symbols in the symbol table. class SymtabSection : public LinkEditSection { public: SymtabSection(StringTableSection &); void finalizeContents(); - size_t getNumSymbols() const { return stabs.size() + symbols.size(); } + uint32_t getNumSymbols() const; + uint32_t getNumLocalSymbols() const { + return stabs.size() + localSymbols.size(); + } + uint32_t getNumExternalSymbols() const { return externalSymbols.size(); } + uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); } uint64_t getRawSize() const override; void writeTo(uint8_t *buf) const override; @@ -435,8 +444,12 @@ private: void emitFunStabs(Defined *); StringTableSection &stringTableSection; + // STABS symbols are always local symbols, but we represent them with special + // entries because they may use fields like n_sect and n_desc differently. std::vector<StabsEntry> stabs; - std::vector<SymtabEntry> symbols; + std::vector<SymtabEntry> localSymbols; + std::vector<SymtabEntry> externalSymbols; + std::vector<SymtabEntry> undefinedSymbols; }; // The indirect symbol table is a list of 32-bit integers that serve as indices diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index e9d88af90ecc..39de749e2735 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -113,8 +113,10 @@ public: class LCDysymtab : public LoadCommand { public: - LCDysymtab(IndirectSymtabSection *indirectSymtabSection) - : indirectSymtabSection(indirectSymtabSection) {} + LCDysymtab(SymtabSection *symtabSection, + IndirectSymtabSection *indirectSymtabSection) + : symtabSection(symtabSection), + indirectSymtabSection(indirectSymtabSection) {} uint32_t getSize() const override { return sizeof(dysymtab_command); } @@ -122,11 +124,19 @@ public: auto *c = reinterpret_cast<dysymtab_command *>(buf); c->cmd = LC_DYSYMTAB; c->cmdsize = getSize(); + + c->ilocalsym = 0; + c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols(); + c->nextdefsym = symtabSection->getNumExternalSymbols(); + c->iundefsym = c->iextdefsym + c->nextdefsym; + c->nundefsym = symtabSection->getNumUndefinedSymbols(); + c->indirectsymoff = indirectSymtabSection->fileOff; c->nindirectsyms = indirectSymtabSection->getNumSymbols(); } - IndirectSymtabSection *indirectSymtabSection = nullptr; + SymtabSection *symtabSection; + IndirectSymtabSection *indirectSymtabSection; }; class LCSegment : public LoadCommand { @@ -396,7 +406,8 @@ void Writer::createLoadCommands() { in.header->addLoadCommand(make<LCDyldInfo>( in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports)); in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection)); - in.header->addLoadCommand(make<LCDysymtab>(indirectSymtabSection)); + in.header->addLoadCommand( + make<LCDysymtab>(symtabSection, indirectSymtabSection)); for (StringRef path : config->runtimePaths) in.header->addLoadCommand(make<LCRPath>(path)); diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s index 5e85ccc3bc4a..2c49453dcd43 100644 --- a/lld/test/MachO/stabs.s +++ b/lld/test/MachO/stabs.s @@ -12,18 +12,18 @@ # RUN: cd %t && %lld -lSystem test.o foo.o -o test # RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t -# CHECK-DAG: [[#%x, MAIN:]] T _main -# CHECK-DAG: [[#%x, FOO: ]] T _foo # CHECK: 0000000000000000 - 00 0000 SO /tmp/test.cpp # CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/test.o -# CHECK-NEXT: [[#MAIN]] - 01 0000 FUN _main +# CHECK-NEXT: [[#%x, MAIN:]] - 01 0000 FUN _main # CHECK-NEXT: 0000000000000001 - 00 0000 FUN # CHECK-NEXT: 0000000000000000 - 01 0000 SO # CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp # CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/foo.o -# CHECK-NEXT: [[#FOO]] - 01 0000 FUN _foo +# CHECK-NEXT: [[#%x, FOO:]] - 01 0000 FUN _foo # CHECK-NEXT: 0000000000000001 - 00 0000 FUN # CHECK-NEXT: 0000000000000000 - 01 0000 SO +# CHECK-NEXT: [[#MAIN]] T _main +# CHECK-NEXT: [[#FOO]] T _foo #--- test.s .text diff --git a/lld/test/MachO/subsections-symbol-relocs.s b/lld/test/MachO/subsections-symbol-relocs.s index bb5470515b5c..8010a50e7444 100644 --- a/lld/test/MachO/subsections-symbol-relocs.s +++ b/lld/test/MachO/subsections-symbol-relocs.s @@ -22,7 +22,7 @@ # RUN: %lld -o %t/test-2 %t/test.o -order_file %t/order-file-2 # RUN: llvm-objdump -d --no-show-raw-insn %t/test-2 | FileCheck %s # CHECK-LABEL: Disassembly of section __TEXT,__text: -# CHECK: <_bar>: +# CHECK: <_ba{{r|z}}>: # CHECK-NEXT: callq {{.*}} <_foo> # CHECK-EMPTY: # CHECK-NEXT: <_qux>: @@ -30,7 +30,7 @@ # CHECK: <_foo>: # CHECK-NEXT: retq # CHECK: <_main>: -# CHECK-NEXT: callq {{.*}} <_bar> +# CHECK-NEXT: callq {{.*}} <_ba{{r|z}}> # CHECK-NEXT: movq $0, %rax # CHECK-NEXT: retq diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s index 1e9ec157193d..22dc80ad62f3 100644 --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -1,56 +1,108 @@ # REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o -# RUN: %lld -o %t %t.o -# RUN: llvm-readobj -symbols %t | FileCheck %s +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o +# RUN: %lld -dylib %t/libfoo.o -o %t/libfoo.dylib +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test +# RUN: llvm-readobj --syms --macho-dysymtab %t/test | FileCheck %s # CHECK: Symbols [ # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _main +# CHECK-NEXT: Name: _local (1) +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _main (8) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) # CHECK-NEXT: Section: __text (0x1) -# CHECK-NEXT: RefType: +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _external (54) +# CHECK-NEXT: Extern +# CHECK-NEXT: Type: Section (0xE) +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: bar +# CHECK-NEXT: Name: _external_weak (64) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) # CHECK-NEXT: Section: __text (0x1) -# CHECK-NEXT: RefType: +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x80) # CHECK-NEXT: WeakDef (0x80) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: foo +# CHECK-NEXT: Name: __dyld_private (102) # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data -# CHECK-NEXT: RefType: +# CHECK-NEXT: Section: __data (0x4) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) # CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] -# CHECK-NEXT: Value: +# CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: dyld_stub_binder (14) +# CHECK-NEXT: Type: Undef (0x0) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Symbol { +# CHECK-NEXT: Name: _dynamic (79) +# CHECK-NEXT: Type: Undef (0x0) +# CHECK-NEXT: Section: (0x0) +# CHECK-NEXT: RefType: UndefinedNonLazy (0x0) +# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Value: 0x0 # CHECK-NEXT: } # CHECK-NEXT: ] +# CHECK-NEXT: Dysymtab { +# CHECK-NEXT: ilocalsym: 0 +# CHECK-NEXT: nlocalsym: 1 +# CHECK-NEXT: iextdefsym: 1 +# CHECK-NEXT: nextdefsym: 4 +# CHECK-NEXT: iundefsym: 5 +# CHECK-NEXT: nundefsym: 2 + +#--- libfoo.s +.globl _dynamic +_dynamic: + +#--- test.s +.globl _main, _external, _external_weak .data -.global foo -foo: - .asciz "Hello world!\n" +_external: + .space 0 +_local: + .space 0 .text -.weak_definition bar -.global bar -.global _main +.weak_definition _external_weak +_external_weak: + .space 0 _main: + callq _dynamic mov $0, %rax ret - -bar: - mov $2, %rax - ret diff --git a/lld/test/MachO/tlv.s b/lld/test/MachO/tlv.s index cf472bcfb14e..bf0dfcd02b56 100644 --- a/lld/test/MachO/tlv.s +++ b/lld/test/MachO/tlv.s @@ -16,10 +16,13 @@ # CHECK-EMPTY: # CHECK-NEXT: Disassembly of section __DATA,__thread_data: # CHECK-EMPTY: -# CHECK-NEXT: <__thread_data>: -# CHECK-NEXT: ef -# CHECK-NEXT: be ad de be ba -# CHECK-NEXT: fe ca +# CHECK-NEXT: <_foo$tlv$init>: +# CHECK-NEXT: 00 00 +# CHECK-NEXT: 00 00 +# CHECK-EMPTY: +# CHECK-NEXT: <_bar$tlv$init>: +# CHECK-NEXT: 00 00 +# CHECK-NEXT: 00 00 # CHECK-EMPTY: # CHECK-NEXT: Disassembly of section __DATA,__thread_vars: # CHECK-EMPTY: @@ -41,9 +44,9 @@ _main: .section __DATA,__thread_data,thread_local_regular _foo$tlv$init: - .long 0xdeadbeef + .space 4 _bar$tlv$init: - .long 0xcafebabe + .space 4 .section __DATA,__thread_vars,thread_local_variables .globl _foo, _bar |