summaryrefslogtreecommitdiff
path: root/lld/MachO
diff options
context:
space:
mode:
authorJez Ng <jezng@fb.com>2023-03-10 22:28:36 -0500
committerJez Ng <jezng@fb.com>2023-03-11 01:40:14 -0500
commitbb69a66ced27918f894fb0d5e58e22fda6958d99 (patch)
treed7fd4815650ae105b21bf42e6526dce5d7f39354 /lld/MachO
parent3b4cb1e96c645bb833fe710856479c31383859bb (diff)
downloadllvm-bb69a66ced27918f894fb0d5e58e22fda6958d99.tar.gz
[lld-macho] Coalesce local symbol aliases along with their aliased weak def
This supersedes {D139069}. In some ways we are now closer to ld64's behavior: we previously only did this coalescing for private-label symbols, but now we do it for all locals, just like ld64. However, we no longer generate weak binds when a local alias to a weak symbol is referenced. This is merely for implementation simplicity; it's not clear to me that any real-world programs depend on us emulating this behavior. The problem with the previous approach is that we ended up with duplicate references to the same symbol instance in our InputFiles, which translated into duplicate symbols in our output. While we could work around that problem by performing a dedup step before emitting the symbol table, it seems cleaner to not generate duplicate references in the first place. Numbers for chromium_framework on my 16 Core Intel Mac Pro: base diff difference (95% CI) sys_time 2.243 ± 0.093 2.231 ± 0.066 [ -2.5% .. +1.4%] user_time 6.529 ± 0.087 6.080 ± 0.050 [ -7.5% .. -6.3%] wall_time 6.928 ± 0.175 6.474 ± 0.112 [ -7.7% .. -5.4%] samples 26 31 Yep, that's a massive win... because it turns out that {D140606} and {D139069} caused a regression (of about the same size.) I just didn't think to measure them back then. I'm guessing all the extra symbols we have been emitting did not help perf at all... Reviewed By: lgrey Differential Revision: https://reviews.llvm.org/D145455
Diffstat (limited to 'lld/MachO')
-rw-r--r--lld/MachO/InputFiles.cpp28
-rw-r--r--lld/MachO/SymbolTable.cpp51
2 files changed, 54 insertions, 25 deletions
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index ed0d98a2ecec..e0f3e0a4d459 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -849,15 +849,12 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
// We populate subsections by repeatedly splitting the last (highest
// address) subsection.
llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
- // Put private-label symbols that have no flags after other symbols at the
- // same address.
- StringRef lhsName = getSymName(nList[lhs]);
- StringRef rhsName = getSymName(nList[rhs]);
- if (nList[lhs].n_value == nList[rhs].n_value) {
- if (isPrivateLabel(lhsName) && isPrivateLabel(rhsName))
- return nList[lhs].n_desc > nList[rhs].n_desc;
- return !isPrivateLabel(lhsName) && isPrivateLabel(rhsName);
- }
+ // Put extern weak symbols after other symbols at the same address so
+ // that weak symbol coalescing works correctly. See
+ // SymbolTable::addDefined() for details.
+ if (nList[lhs].n_value == nList[rhs].n_value &&
+ nList[lhs].n_type & N_EXT && nList[rhs].n_type & N_EXT)
+ return !(nList[lhs].n_desc & N_WEAK_DEF) && (nList[rhs].n_desc & N_WEAK_DEF);
return nList[lhs].n_value < nList[rhs].n_value;
});
for (size_t j = 0; j < symbolIndices.size(); ++j) {
@@ -883,17 +880,8 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
if (!subsectionsViaSymbols || symbolOffset == 0 ||
sym.n_desc & N_ALT_ENTRY || !isa<ConcatInputSection>(isec)) {
isec->hasAltEntry = symbolOffset != 0;
- // If we have an private-label symbol that's an alias, and that alias
- // doesn't have any flags of its own, then we can just reuse the aliased
- // symbol. Our sorting step above ensures that any such symbols will
- // appear after the non-private-label ones. See weak-def-alias-ignored.s
- // for the motivation behind this.
- if (symbolOffset == 0 && isPrivateLabel(name) && j != 0 &&
- sym.n_desc == 0)
- symbols[symIndex] = symbols[symbolIndices[j - 1]];
- else
- symbols[symIndex] = createDefined(sym, name, isec, symbolOffset,
- symbolSize, forceHidden);
+ symbols[symIndex] = createDefined(sym, name, isec, symbolOffset,
+ symbolSize, forceHidden);
continue;
}
auto *concatIsec = cast<ConcatInputSection>(isec);
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 03631239ccba..09b3d8a376e6 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -61,6 +61,38 @@ struct DuplicateSymbolDiag {
SmallVector<DuplicateSymbolDiag> dupSymDiags;
} // namespace
+// Move symbols at \p fromOff in \p fromIsec into \p toIsec, unless that symbol
+// is \p skip.
+static void transplantSymbolsAtOffset(InputSection *fromIsec,
+ InputSection *toIsec, Defined *skip,
+ uint64_t fromOff, uint64_t toOff) {
+ // Ensure the symbols will still be in address order after our insertions.
+ auto insertIt = llvm::upper_bound(toIsec->symbols, toOff,
+ [](uint64_t off, const Symbol *s) {
+ return cast<Defined>(s)->value < off;
+ });
+ llvm::erase_if(fromIsec->symbols, [&](Symbol *s) {
+ auto *d = cast<Defined>(s);
+ if (d->value != fromOff)
+ return false;
+ if (d != skip) {
+ // This repeated insertion will be quadratic unless insertIt is the end
+ // iterator. However, that is typically the case for files that have
+ // .subsections_via_symbols set.
+ insertIt = toIsec->symbols.insert(insertIt, d);
+ d->isec = toIsec;
+ d->value = toOff;
+ // We don't want to have more than one unwindEntry at a given address, so
+ // drop the redundant ones. We We can safely drop the unwindEntries of
+ // the symbols in fromIsec since we will be adding another unwindEntry as
+ // we finish parsing toIsec's file. (We can assume that toIsec has its
+ // own unwindEntry because of the ODR.)
+ d->unwindEntry = nullptr;
+ }
+ return true;
+ });
+}
+
Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef,
@@ -82,18 +114,27 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
defined->referencedDynamically |= isReferencedDynamically;
defined->noDeadStrip |= noDeadStrip;
}
- // FIXME: Handle this for bitcode files.
- if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec))
+ if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) {
concatIsec->wasCoalesced = true;
+ // Any local symbols that alias the coalesced symbol should be moved
+ // into the prevailing section. Note that we have sorted the symbols
+ // in ObjFile::parseSymbols() such that extern weak symbols appear
+ // last, so we don't need to worry about subsequent symbols being
+ // added to an already-coalesced section.
+ if (defined->isec)
+ transplantSymbolsAtOffset(concatIsec, defined->isec,
+ /*skip=*/nullptr, value, defined->value);
+ }
return defined;
}
if (defined->isWeakDef()) {
- // FIXME: Handle this for bitcode files.
if (auto concatIsec =
dyn_cast_or_null<ConcatInputSection>(defined->isec)) {
concatIsec->wasCoalesced = true;
- concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined));
+ if (isec)
+ transplantSymbolsAtOffset(concatIsec, isec, defined, defined->value,
+ value);
}
} else {
std::string srcLoc1 = defined->getSourceLocation();
@@ -382,7 +423,7 @@ struct UndefinedDiag {
};
MapVector<const Undefined *, UndefinedDiag> undefs;
-}
+} // namespace
void macho::reportPendingDuplicateSymbols() {
for (const auto &duplicate : dupSymDiags) {