diff options
author | Jez Ng <jezng@fb.com> | 2023-03-07 11:09:01 -0800 |
---|---|---|
committer | Jez Ng <jezng@fb.com> | 2023-03-07 14:48:56 -0800 |
commit | ef122753db7fe8e9a0b7bedd46d2f3668a780fcb (patch) | |
tree | 306cbe31d27ed5afedc21b341a306cb6ee891294 /lld/MachO | |
parent | b461398f1ce307ec80708b7eb50f3bc82b76ed3f (diff) | |
download | llvm-ef122753db7fe8e9a0b7bedd46d2f3668a780fcb.tar.gz |
[lld-macho] Warn on method name collisions from category definitions
This implements ld64's checks for duplicate method names in categories &
classes.
In addition, this sets us up for implementing Obj-C category merging.
This diff handles the most of the parsing work; what's left is rewriting
those category / class structures.
Numbers for chromium_framework:
base diff difference (95% CI)
sys_time 2.182 ± 0.027 2.200 ± 0.047 [ -0.2% .. +1.8%]
user_time 6.451 ± 0.034 6.479 ± 0.062 [ -0.0% .. +0.9%]
wall_time 6.841 ± 0.048 6.885 ± 0.105 [ -0.1% .. +1.4%]
samples 33 22
Fixes https://github.com/llvm/llvm-project/issues/54912.
Reviewed By: #lld-macho, thevinster, oontvoo
Differential Revision: https://reviews.llvm.org/D142916
Diffstat (limited to 'lld/MachO')
-rw-r--r-- | lld/MachO/Driver.cpp | 2 | ||||
-rw-r--r-- | lld/MachO/InputFiles.cpp | 9 | ||||
-rw-r--r-- | lld/MachO/InputSection.cpp | 17 | ||||
-rw-r--r-- | lld/MachO/InputSection.h | 9 | ||||
-rw-r--r-- | lld/MachO/Layout.h | 74 | ||||
-rw-r--r-- | lld/MachO/ObjC.cpp | 227 | ||||
-rw-r--r-- | lld/MachO/ObjC.h | 3 | ||||
-rw-r--r-- | lld/MachO/Relocations.cpp | 10 | ||||
-rw-r--r-- | lld/MachO/Relocations.h | 2 | ||||
-rw-r--r-- | lld/MachO/UnwindInfoSection.cpp | 64 |
10 files changed, 366 insertions, 51 deletions
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 0f2326b305b1..322d7de30b63 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1920,6 +1920,8 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, if (config->deadStrip) markLive(); + objc::checkCategories(); + // ICF assumes that all literals have been folded already, so we must run // foldIdenticalLiterals before foldIdenticalSections. foldIdenticalLiterals(); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index ed0d98a2ecec..65e06a8557d9 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -1272,13 +1272,10 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader, } } if (personalityAddrOff != 0) { - auto personalityRelocIt = - llvm::find_if(isec->relocs, [=](const macho::Reloc &r) { - return r.offset == personalityAddrOff; - }); - if (personalityRelocIt == isec->relocs.end()) + const auto *personalityReloc = isec->getRelocAt(personalityAddrOff); + if (!personalityReloc) reader.failOn(off, "Failed to locate relocation for personality symbol"); - cie.personalitySymbol = personalityRelocIt->referent.get<macho::Symbol *>(); + cie.personalitySymbol = personalityReloc->referent.get<macho::Symbol *>(); } return cie; } diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 1d8d58477139..ff5a15067adc 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -135,6 +135,14 @@ std::string InputSection::getSourceLocation(uint64_t off) const { return {}; } +const Reloc *InputSection::getRelocAt(uint32_t off) const { + auto it = llvm::find_if( + relocs, [=](const macho::Reloc &r) { return r.offset == off; }); + if (it == relocs.end()) + return nullptr; + return &*it; +} + void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { align = std::max(align, copy->align); copy->live = false; @@ -259,6 +267,15 @@ const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { return const_cast<CStringInputSection *>(this)->getStringPiece(off); } +size_t CStringInputSection::getStringPieceIndex(uint64_t off) const { + if (off >= data.size()) + fatal(toString(this) + ": offset is outside the section"); + + auto it = + partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); + return std::distance(pieces.begin(), it) - 1; +} + uint64_t CStringInputSection::getOffset(uint64_t off) const { const StringPiece &piece = getStringPiece(off); uint64_t addend = off - piece.inSecOff; diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 5a6a205f9047..becb01017d63 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -55,6 +55,8 @@ public: // Return the source line corresponding to an address, or the empty string. // Format: Source.cpp:123 (/path/to/Source.cpp:123) std::string getSourceLocation(uint64_t off) const; + // Return the relocation at \p off, if it exists. This does a linear search. + const Reloc *getRelocAt(uint32_t off) const; // Whether the data at \p off in this InputSection is live. virtual bool isLive(uint64_t off) const = 0; virtual void markLive(uint64_t off) = 0; @@ -218,6 +220,10 @@ public: return toStringRef(data.slice(begin, end - begin)); } + StringRef getStringRefAtOffset(uint64_t off) const { + return getStringRef(getStringPieceIndex(off)); + } + // Returns i'th piece as a CachedHashStringRef. This function is very hot when // string merging is enabled, so we want to inline. LLVM_ATTRIBUTE_ALWAYS_INLINE @@ -232,6 +238,9 @@ public: bool deduplicateLiterals = false; std::vector<StringPiece> pieces; + +private: + size_t getStringPieceIndex(uint64_t off) const; }; class WordLiteralInputSection final : public InputSection { diff --git a/lld/MachO/Layout.h b/lld/MachO/Layout.h new file mode 100644 index 000000000000..6a7653f315e3 --- /dev/null +++ b/lld/MachO/Layout.h @@ -0,0 +1,74 @@ +//===- Layout.h -----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Convenience macros for obtaining offsets of members in structs. +// +// Usage: +// +// #define FOR_EACH_FOO_FIELD(DO) \ +// DO(Ptr, bar) \ +// DO(uint32_t, baz) \ +// CREATE_LAYOUT_CLASS(Foo, FOR_EACH_FOO_FIELD) +// #undef FOR_EACH_FOO_FIELD +// +// This will generate +// +// struct FooLayout { +// uint32_t barOffset; +// uint32_t bazOffset; +// uint32_t totalSize; +// +// FooLayout(size_t wordSize) { +// if (wordSize == 8) +// init<uint64_t>(); +// else { +// assert(wordSize == 4); +// init<uint32_t>(); +// } +// } +// +// private: +// template <class Ptr> void init() { +// FOR_EACH_FIELD(_INIT_OFFSET); +// barOffset = offsetof(Layout<Ptr>, bar); +// bazOffset = offsetof(Layout<Ptr>, baz); +// totalSize = sizeof(Layout<Ptr>); +// } +// template <class Ptr> struct Layout { +// Ptr bar; +// uint32_t baz; +// }; +// }; + +#define _OFFSET_FOR_FIELD(_, name) uint32_t name##Offset; +#define _INIT_OFFSET(type, name) name##Offset = offsetof(Layout<Ptr>, name); +#define _LAYOUT_ENTRY(type, name) type name; + +#define CREATE_LAYOUT_CLASS(className, FOR_EACH_FIELD) \ + struct className##Layout { \ + FOR_EACH_FIELD(_OFFSET_FOR_FIELD) \ + uint32_t totalSize; \ + \ + className##Layout(size_t wordSize) { \ + if (wordSize == 8) \ + init<uint64_t>(); \ + else { \ + assert(wordSize == 4); \ + init<uint32_t>(); \ + } \ + } \ + \ + private: \ + template <class Ptr> void init() { \ + FOR_EACH_FIELD(_INIT_OFFSET); \ + totalSize = sizeof(Layout<Ptr>); \ + } \ + template <class Ptr> struct Layout { \ + FOR_EACH_FIELD(_LAYOUT_ENTRY) \ + }; \ + } diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index d484c4029f6b..bdb125859b71 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -9,10 +9,12 @@ #include "ObjC.h" #include "InputFiles.h" #include "InputSection.h" +#include "Layout.h" #include "OutputSegment.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -66,3 +68,228 @@ bool macho::hasObjCSection(MemoryBufferRef mb) { return false; } } + +namespace { + +#define FOR_EACH_CATEGORY_FIELD(DO) \ + DO(Ptr, name) \ + DO(Ptr, klass) \ + DO(Ptr, instanceMethods) \ + DO(Ptr, classMethods) \ + DO(Ptr, protocols) \ + DO(Ptr, instanceProps) \ + DO(Ptr, classProps) + +CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD); + +#undef FOR_EACH_CATEGORY_FIELD + +#define FOR_EACH_CLASS_FIELD(DO) \ + DO(Ptr, metaClass) \ + DO(Ptr, superClass) \ + DO(Ptr, methodCache) \ + DO(Ptr, vtable) \ + DO(Ptr, roData) + +CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD); + +#undef FOR_EACH_CLASS_FIELD + +#define FOR_EACH_RO_CLASS_FIELD(DO) \ + DO(uint32_t, flags) \ + DO(uint32_t, instanceStart) \ + DO(Ptr, instanceSize) \ + DO(Ptr, ivarLayout) \ + DO(Ptr, name) \ + DO(Ptr, baseMethods) \ + DO(Ptr, baseProtocols) \ + DO(Ptr, ivars) \ + DO(Ptr, weakIvarLayout) \ + DO(Ptr, baseProperties) + +CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD); + +#undef FOR_EACH_RO_CLASS_FIELD + +#define FOR_EACH_LIST_HEADER(DO) \ + DO(uint32_t, size) \ + DO(uint32_t, count) + +CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER); + +#undef FOR_EACH_LIST_HEADER + +#define FOR_EACH_METHOD(DO) \ + DO(Ptr, name) \ + DO(Ptr, type) \ + DO(Ptr, impl) + +CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD); + +#undef FOR_EACH_METHOD + +enum MethodContainerKind { + MCK_Class, + MCK_Category, +}; + +struct MethodContainer { + MethodContainerKind kind; + const ConcatInputSection *isec; +}; + +enum MethodKind { + MK_Instance, + MK_Static, +}; + +struct ObjcClass { + DenseMap<CachedHashStringRef, MethodContainer> instanceMethods; + DenseMap<CachedHashStringRef, MethodContainer> classMethods; +}; + +} // namespace + +class ObjcCategoryChecker { +public: + ObjcCategoryChecker(); + void parseCategory(const ConcatInputSection *catListIsec); + +private: + void parseClass(const Defined *classSym); + void parseMethods(const ConcatInputSection *methodsIsec, + const Symbol *methodContainer, + const ConcatInputSection *containerIsec, + MethodContainerKind, MethodKind); + + CategoryLayout catLayout; + ClassLayout classLayout; + ROClassLayout roClassLayout; + ListHeaderLayout listHeaderLayout; + MethodLayout methodLayout; + + DenseMap<const Symbol *, ObjcClass> classMap; +}; + +ObjcCategoryChecker::ObjcCategoryChecker() + : catLayout(target->wordSize), classLayout(target->wordSize), + roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), + methodLayout(target->wordSize) {} + +// \p r must point to an offset within a cstring section. +static StringRef getReferentString(const Reloc &r) { + if (auto *isec = r.referent.dyn_cast<InputSection *>()) + return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend); + auto *d = cast<Defined>(r.referent.get<Symbol *>()); + return cast<CStringInputSection>(d->isec)->getStringRefAtOffset(d->value + r.addend); +} + +void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec, + const Symbol *methodContainerSym, + const ConcatInputSection *containerIsec, + MethodContainerKind mcKind, + MethodKind mKind) { + ObjcClass &klass = classMap[methodContainerSym]; + for (const Reloc &r : methodsIsec->relocs) { + if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize != + methodLayout.nameOffset) + continue; + + CachedHashStringRef methodName(getReferentString(r)); + auto &methodMap = + mKind == MK_Instance ? klass.instanceMethods : klass.classMethods; + if (methodMap + .try_emplace(methodName, MethodContainer{mcKind, containerIsec}) + .second) + continue; + + // We have a duplicate; generate a warning message. + const auto &mc = methodMap.lookup(methodName); + const Reloc *nameReloc = nullptr; + if (mc.kind == MCK_Category) { + nameReloc = mc.isec->getRelocAt(catLayout.nameOffset); + } else { + assert(mc.kind == MCK_Class); + const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset) + ->getReferentInputSection(); + nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset); + } + StringRef containerName = getReferentString(*nameReloc); + StringRef methPrefix = mKind == MK_Instance ? "-" : "+"; + + // We should only ever encounter collisions when parsing category methods + // (since the Class struct is parsed before any of its categories). + assert(mcKind == MCK_Category); + StringRef newCatName = + getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset)); + + StringRef containerType = mc.kind == MCK_Category ? "category" : "class"; + warn("method '" + methPrefix + methodName.val() + + "' has conflicting definitions:\n>>> defined in category " + + newCatName + " from " + toString(containerIsec->getFile()) + + "\n>>> defined in " + containerType + " " + containerName + " from " + + toString(mc.isec->getFile())); + } +} + +void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) { + auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset); + if (!classReloc) + return; + + auto *classSym = classReloc->referent.get<Symbol *>(); + if (auto *d = dyn_cast<Defined>(classSym)) + if (!classMap.count(d)) + parseClass(d); + + if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) { + parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), + classSym, catIsec, MCK_Category, MK_Static); + } + + if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) { + parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), + classSym, catIsec, MCK_Category, MK_Instance); + } +} + +void ObjcCategoryChecker::parseClass(const Defined *classSym) { + // Given a Class struct, get its corresponding Methods struct + auto getMethodsIsec = + [&](const InputSection *classIsec) -> ConcatInputSection * { + if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) { + const auto *roIsec = + cast<ConcatInputSection>(r->getReferentInputSection()); + if (const auto *r = roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) { + if (auto *methodsIsec = + cast_or_null<ConcatInputSection>(r->getReferentInputSection())) + return methodsIsec; + } + } + return nullptr; + }; + + const auto *classIsec = cast<ConcatInputSection>(classSym->isec); + + // Parse instance methods. + if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec)) + parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class, + MK_Instance); + + // Class methods are contained in the metaclass. + if (const auto *r = classSym->isec->getRelocAt(classLayout.metaClassOffset)) + if (const auto *classMethodsIsec = getMethodsIsec( + cast<ConcatInputSection>(r->getReferentInputSection()))) + parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static); +} + +void objc::checkCategories() { + ObjcCategoryChecker checker; + for (const InputSection *isec : inputSections) { + if (isec->getName() == section_names::objcCatList) + for (const Reloc &r : isec->relocs) { + auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection()); + checker.parseCategory(catIsec); + } + } +} diff --git a/lld/MachO/ObjC.h b/lld/MachO/ObjC.h index 67fa4114db00..560c5cc0bc50 100644 --- a/lld/MachO/ObjC.h +++ b/lld/MachO/ObjC.h @@ -20,6 +20,9 @@ constexpr const char metaclass[] = "_OBJC_METACLASS_$_"; constexpr const char ehtype[] = "_OBJC_EHTYPE_$_"; constexpr const char ivar[] = "_OBJC_IVAR_$_"; +// Check for duplicate method names within related categories / classes. +void checkCategories(); + } // namespace objc bool hasObjCSection(llvm::MemoryBufferRef); diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp index 9e5ac69612cf..4e840c6912cc 100644 --- a/lld/MachO/Relocations.cpp +++ b/lld/MachO/Relocations.cpp @@ -21,6 +21,16 @@ using namespace lld::macho; static_assert(sizeof(void *) != 8 || sizeof(Reloc) == 24, "Try to minimize Reloc's size; we create many instances"); +InputSection *Reloc::getReferentInputSection() const { + if (const auto *sym = referent.dyn_cast<Symbol *>()) { + if (const auto *d = dyn_cast<Defined>(sym)) + return d->isec; + return nullptr; + } else { + return referent.get<InputSection *>(); + } +} + bool macho::validateSymbolRelocation(const Symbol *sym, const InputSection *isec, const Reloc &r) { const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h index 023d25a795a0..5f161c8fcbfd 100644 --- a/lld/MachO/Relocations.h +++ b/lld/MachO/Relocations.h @@ -67,6 +67,8 @@ struct Reloc { int64_t addend, llvm::PointerUnion<Symbol *, InputSection *> referent) : type(type), pcrel(pcrel), length(length), offset(offset), addend(addend), referent(referent) {} + + InputSection *getReferentInputSection() const; }; bool validateSymbolRelocation(const Symbol *, const InputSection *, diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index b7d0d563360a..d480f7ed294a 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -8,6 +8,7 @@ #include "UnwindInfoSection.h" #include "InputSection.h" +#include "Layout.h" #include "OutputSection.h" #include "OutputSegment.h" #include "SymbolTable.h" @@ -88,41 +89,18 @@ using namespace lld::macho; // TODO(gkm): how do we align the 2nd-level pages? -// The offsets of various fields in the on-disk representation of each compact -// unwind entry. -struct CompactUnwindOffsets { - uint32_t functionAddress; - uint32_t functionLength; - uint32_t encoding; - uint32_t personality; - uint32_t lsda; - - CompactUnwindOffsets(size_t wordSize) { - if (wordSize == 8) - init<uint64_t>(); - else { - assert(wordSize == 4); - init<uint32_t>(); - } - } +// The various fields in the on-disk representation of each compact unwind +// entry. +#define FOR_EACH_CU_FIELD(DO) \ + DO(Ptr, functionAddress) \ + DO(uint32_t, functionLength) \ + DO(compact_unwind_encoding_t, encoding) \ + DO(Ptr, personality) \ + DO(Ptr, lsda) -private: - template <class Ptr> void init() { - functionAddress = offsetof(Layout<Ptr>, functionAddress); - functionLength = offsetof(Layout<Ptr>, functionLength); - encoding = offsetof(Layout<Ptr>, encoding); - personality = offsetof(Layout<Ptr>, personality); - lsda = offsetof(Layout<Ptr>, lsda); - } +CREATE_LAYOUT_CLASS(CompactUnwind, FOR_EACH_CU_FIELD); - template <class Ptr> struct Layout { - Ptr functionAddress; - uint32_t functionLength; - compact_unwind_encoding_t encoding; - Ptr personality; - Ptr lsda; - }; -}; +#undef FOR_EACH_CU_FIELD // LLD's internal representation of a compact unwind entry. struct CompactUnwindEntry { @@ -148,7 +126,7 @@ struct SecondLevelPage { // lengthy definition of UnwindInfoSection. class UnwindInfoSectionImpl final : public UnwindInfoSection { public: - UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {} + UnwindInfoSectionImpl() : cuLayout(target->wordSize) {} uint64_t getSize() const override { return unwindInfoSize; } void prepare() override; void finalize() override; @@ -162,7 +140,7 @@ private: uint64_t unwindInfoSize = 0; std::vector<decltype(symbols)::value_type> symbolsVec; - CompactUnwindOffsets cuOffsets; + CompactUnwindLayout cuLayout; std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings; EncodingMap commonEncodingIndexes; // The entries here will be in the same order as their originating symbols @@ -261,7 +239,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { // compact unwind entries that references them, and thus appear as section // relocs. There is no need to prepare them. We only prepare relocs for // personality functions. - if (r.offset != cuOffsets.personality) + if (r.offset != cuLayout.personalityOffset) continue; if (auto *s = r.referent.dyn_cast<Symbol *>()) { @@ -373,17 +351,13 @@ void UnwindInfoSectionImpl::relocateCompactUnwind( auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) - target->wordSize; cu.functionLength = - support::endian::read32le(buf + cuOffsets.functionLength); - cu.encoding = support::endian::read32le(buf + cuOffsets.encoding); + support::endian::read32le(buf + cuLayout.functionLengthOffset); + cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset); for (const Reloc &r : d->unwindEntry->relocs) { - if (r.offset == cuOffsets.personality) { + if (r.offset == cuLayout.personalityOffset) cu.personality = r.referent.get<Symbol *>(); - } else if (r.offset == cuOffsets.lsda) { - if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) - cu.lsda = cast<Defined>(referentSym)->isec; - else - cu.lsda = r.referent.get<InputSection *>(); - } + else if (r.offset == cuLayout.lsdaOffset) + cu.lsda = r.getReferentInputSection(); } }); } |