//===- ICF.cpp ------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "ICF.h" #include "ConcatOutputSection.h" #include "Config.h" #include "InputSection.h" #include "SymbolTable.h" #include "Symbols.h" #include "UnwindInfoSection.h" #include "lld/Common/CommonLinkerContext.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include using namespace llvm; using namespace lld; using namespace lld::macho; static constexpr bool verboseDiagnostics = false; class ICF { public: ICF(std::vector &inputs); void run(); using EqualsFn = bool (ICF::*)(const ConcatInputSection *, const ConcatInputSection *); void segregate(size_t begin, size_t end, EqualsFn); size_t findBoundary(size_t begin, size_t end); void forEachClassRange(size_t begin, size_t end, llvm::function_ref func); void forEachClass(llvm::function_ref func); bool equalsConstant(const ConcatInputSection *ia, const ConcatInputSection *ib); bool equalsVariable(const ConcatInputSection *ia, const ConcatInputSection *ib); // ICF needs a copy of the inputs vector because its equivalence-class // segregation algorithm destroys the proper sequence. std::vector icfInputs; unsigned icfPass = 0; std::atomic icfRepeat{false}; std::atomic equalsConstantCount{0}; std::atomic equalsVariableCount{0}; }; ICF::ICF(std::vector &inputs) { icfInputs.assign(inputs.begin(), inputs.end()); } // ICF = Identical Code Folding // // We only fold __TEXT,__text, so this is really "code" folding, and not // "COMDAT" folding. String and scalar constant literals are deduplicated // elsewhere. // // Summary of segments & sections: // // The __TEXT segment is readonly at the MMU. Some sections are already // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are // synthetic and inherently free of duplicates (__TEXT,__stubs & // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, // because doing so induces many test failures. // // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and // thus ineligible for ICF. // // The __DATA_CONST segment is read/write at the MMU, but is logically const to // the application after dyld applies fixups to pointer data. We currently // fold only the __DATA_CONST,__cfstring section. // // The __DATA segment is read/write at the MMU, and as application-writeable // data, none of its sections are eligible for ICF. // // Please see the large block comment in lld/ELF/ICF.cpp for an explanation // of the segregation algorithm. // // FIXME(gkm): implement keep-unique attributes // FIXME(gkm): implement address-significance tables for MachO object files // Compare "non-moving" parts of two ConcatInputSections, namely everything // except references to other ConcatInputSections. bool ICF::equalsConstant(const ConcatInputSection *ia, const ConcatInputSection *ib) { if (verboseDiagnostics) ++equalsConstantCount; // We can only fold within the same OutputSection. if (ia->parent != ib->parent) return false; if (ia->data.size() != ib->data.size()) return false; if (ia->data != ib->data) return false; if (ia->relocs.size() != ib->relocs.size()) return false; auto f = [](const Reloc &ra, const Reloc &rb) { if (ra.type != rb.type) return false; if (ra.pcrel != rb.pcrel) return false; if (ra.length != rb.length) return false; if (ra.offset != rb.offset) return false; if (ra.referent.is() != rb.referent.is()) return false; InputSection *isecA, *isecB; uint64_t valueA = 0; uint64_t valueB = 0; if (ra.referent.is()) { const auto *sa = ra.referent.get(); const auto *sb = rb.referent.get(); if (sa->kind() != sb->kind()) return false; // ICF runs before Undefineds are treated (and potentially converted into // DylibSymbols). if (isa(sa) || isa(sa)) return sa == sb && ra.addend == rb.addend; assert(isa(sa)); const auto *da = cast(sa); const auto *db = cast(sb); if (!da->isec || !db->isec) { assert(da->isAbsolute() && db->isAbsolute()); return da->value + ra.addend == db->value + rb.addend; } isecA = da->isec; valueA = da->value; isecB = db->isec; valueB = db->value; } else { isecA = ra.referent.get(); isecB = rb.referent.get(); } if (isecA->parent != isecB->parent) return false; // Sections with identical parents should be of the same kind. assert(isecA->kind() == isecB->kind()); // We will compare ConcatInputSection contents in equalsVariable. if (isa(isecA)) return ra.addend == rb.addend; // Else we have two literal sections. References to them are equal iff their // offsets in the output section are equal. if (ra.referent.is()) // For symbol relocs, we compare the contents at the symbol address. We // don't do `getOffset(value + addend)` because value + addend may not be // a valid offset in the literal section. return isecA->getOffset(valueA) == isecB->getOffset(valueB) && ra.addend == rb.addend; else { assert(valueA == 0 && valueB == 0); // For section relocs, we compare the content at the section offset. return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend); } }; return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f); } // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not // handled by equalsConstant(). bool ICF::equalsVariable(const ConcatInputSection *ia, const ConcatInputSection *ib) { if (verboseDiagnostics) ++equalsVariableCount; assert(ia->relocs.size() == ib->relocs.size()); auto f = [this](const Reloc &ra, const Reloc &rb) { // We already filtered out mismatching values/addends in equalsConstant. if (ra.referent == rb.referent) return true; const ConcatInputSection *isecA, *isecB; if (ra.referent.is()) { // Matching DylibSymbols are already filtered out by the // identical-referent check above. Non-matching DylibSymbols were filtered // out in equalsConstant(). So we can safely cast to Defined here. const auto *da = cast(ra.referent.get()); const auto *db = cast(rb.referent.get()); if (da->isAbsolute()) return true; isecA = dyn_cast(da->isec); if (!isecA) return true; // literal sections were checked in equalsConstant. isecB = cast(db->isec); } else { const auto *sa = ra.referent.get(); const auto *sb = rb.referent.get(); isecA = dyn_cast(sa); if (!isecA) return true; isecB = cast(sb); } return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; }; if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f)) return false; // If there are symbols with associated unwind info, check that the unwind // info matches. For simplicity, we only handle the case where there are only // symbols at offset zero within the section (which is typically the case with // .subsections_via_symbols.) auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; }; const auto *itA = llvm::find_if(ia->symbols, hasUnwind); const auto *itB = llvm::find_if(ib->symbols, hasUnwind); if (itA == ia->symbols.end()) return itB == ib->symbols.end(); if (itB == ib->symbols.end()) return false; const Defined *da = *itA; const Defined *db = *itB; if (da->unwindEntry->icfEqClass[icfPass % 2] != db->unwindEntry->icfEqClass[icfPass % 2] || da->value != 0 || db->value != 0) return false; auto isZero = [](Defined *d) { return d->value == 0; }; return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == ia->symbols.end() && std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == ib->symbols.end(); } // Find the first InputSection after BEGIN whose equivalence class differs size_t ICF::findBoundary(size_t begin, size_t end) { uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2]; for (size_t i = begin + 1; i < end; ++i) if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2]) return i; return end; } // Invoke FUNC on subranges with matching equivalence class void ICF::forEachClassRange(size_t begin, size_t end, llvm::function_ref func) { while (begin < end) { size_t mid = findBoundary(begin, end); func(begin, mid); begin = mid; } } // Split icfInputs into shards, then parallelize invocation of FUNC on subranges // with matching equivalence class void ICF::forEachClass(llvm::function_ref func) { // Only use threads when the benefits outweigh the overhead. const size_t threadingThreshold = 1024; if (icfInputs.size() < threadingThreshold) { forEachClassRange(0, icfInputs.size(), func); ++icfPass; return; } // Shard into non-overlapping intervals, and call FUNC in parallel. The // sharding must be completed before any calls to FUNC are made so that FUNC // can modify the InputSection in its shard without causing data races. const size_t shards = 256; size_t step = icfInputs.size() / shards; size_t boundaries[shards + 1]; boundaries[0] = 0; boundaries[shards] = icfInputs.size(); parallelFor(1, shards, [&](size_t i) { boundaries[i] = findBoundary((i - 1) * step, icfInputs.size()); }); parallelFor(1, shards + 1, [&](size_t i) { if (boundaries[i - 1] < boundaries[i]) { forEachClassRange(boundaries[i - 1], boundaries[i], func); } }); ++icfPass; } void ICF::run() { // Into each origin-section hash, combine all reloc referent section hashes. for (icfPass = 0; icfPass < 2; ++icfPass) { parallelForEach(icfInputs, [&](ConcatInputSection *isec) { uint32_t hash = isec->icfEqClass[icfPass % 2]; for (const Reloc &r : isec->relocs) { if (auto *sym = r.referent.dyn_cast()) { if (auto *defined = dyn_cast(sym)) { if (defined->isec) { if (auto *referentIsec = dyn_cast(defined->isec)) hash += defined->value + referentIsec->icfEqClass[icfPass % 2]; else hash += defined->isec->kind() + defined->isec->getOffset(defined->value); } else { hash += defined->value; } } else { // ICF runs before Undefined diags assert(isa(sym) || isa(sym)); } } } // Set MSB to 1 to avoid collisions with non-hashed classes. isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31); }); } llvm::stable_sort( icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) { return a->icfEqClass[0] < b->icfEqClass[0]; }); forEachClass([&](size_t begin, size_t end) { segregate(begin, end, &ICF::equalsConstant); }); // Split equivalence groups by comparing relocations until convergence do { icfRepeat = false; forEachClass([&](size_t begin, size_t end) { segregate(begin, end, &ICF::equalsVariable); }); } while (icfRepeat); log("ICF needed " + Twine(icfPass) + " iterations"); if (verboseDiagnostics) { log("equalsConstant() called " + Twine(equalsConstantCount) + " times"); log("equalsVariable() called " + Twine(equalsVariableCount) + " times"); } // Fold sections within equivalence classes forEachClass([&](size_t begin, size_t end) { if (end - begin < 2) return; ConcatInputSection *beginIsec = icfInputs[begin]; for (size_t i = begin + 1; i < end; ++i) beginIsec->foldIdentical(icfInputs[i]); }); } // Split an equivalence class into smaller classes. void ICF::segregate(size_t begin, size_t end, EqualsFn equals) { while (begin < end) { // Divide [begin, end) into two. Let mid be the start index of the // second group. auto bound = std::stable_partition( icfInputs.begin() + begin + 1, icfInputs.begin() + end, [&](ConcatInputSection *isec) { return (this->*equals)(icfInputs[begin], isec); }); size_t mid = bound - icfInputs.begin(); // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an // equivalence class ID because every group ends with a unique index. for (size_t i = begin; i < mid; ++i) icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid; // If we created a group, we need to iterate the main loop again. if (mid != end) icfRepeat = true; begin = mid; } } void macho::markSymAsAddrSig(Symbol *s) { if (auto *d = dyn_cast_or_null(s)) if (d->isec) d->isec->keepUnique = true; } void macho::markAddrSigSymbols() { TimeTraceScope timeScope("Mark addrsig symbols"); for (InputFile *file : inputFiles) { ObjFile *obj = dyn_cast(file); if (!obj) continue; Section *addrSigSection = obj->addrSigSection; if (!addrSigSection) continue; assert(addrSigSection->subsections.size() == 1); const InputSection *isec = addrSigSection->subsections[0].isec; for (const Reloc &r : isec->relocs) { if (auto *sym = r.referent.dyn_cast()) markSymAsAddrSig(sym); else error(toString(isec) + ": unexpected section relocation"); } } } void macho::foldIdenticalSections(bool onlyCfStrings) { TimeTraceScope timeScope("Fold Identical Code Sections"); // The ICF equivalence-class segregation algorithm relies on pre-computed // hashes of InputSection::data for the ConcatOutputSection::inputs and all // sections referenced by their relocs. We could recursively traverse the // relocs to find every referenced InputSection, but that precludes easy // parallelization. Therefore, we hash every InputSection here where we have // them all accessible as simple vectors. // If an InputSection is ineligible for ICF, we give it a unique ID to force // it into an unfoldable singleton equivalence class. Begin the unique-ID // space at inputSections.size(), so that it will never intersect with // equivalence-class IDs which begin at 0. Since hashes & unique IDs never // coexist with equivalence-class IDs, this is not necessary, but might help // someone keep the numbers straight in case we ever need to debug the // ICF::segregate() std::vector foldable; uint64_t icfUniqueID = inputSections.size(); for (ConcatInputSection *isec : inputSections) { bool isFoldableWithAddendsRemoved = isCfStringSection(isec) || isClassRefsSection(isec) || isSelRefsSection(isec); // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we // can still fold it. bool hasFoldableFlags = (isSelRefsSection(isec) || sectionType(isec->getFlags()) == MachO::S_REGULAR); // FIXME: consider non-code __text sections as foldable? bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) && (isCodeSection(isec) || isFoldableWithAddendsRemoved || isGccExceptTabSection(isec)) && !isec->keepUnique && !isec->hasAltEntry && !isec->shouldOmitFromOutput() && hasFoldableFlags; if (isFoldable) { foldable.push_back(isec); for (Defined *d : isec->symbols) if (d->unwindEntry) foldable.push_back(d->unwindEntry); // Some sections have embedded addends that foil ICF's hashing / equality // checks. (We can ignore embedded addends when doing ICF because the same // information gets recorded in our Reloc structs.) We therefore create a // mutable copy of the section data and zero out the embedded addends // before performing any hashing / equality checks. if (isFoldableWithAddendsRemoved) { // We have to do this copying serially as the BumpPtrAllocator is not // thread-safe. FIXME: Make a thread-safe allocator. MutableArrayRef copy = isec->data.copy(bAlloc()); for (const Reloc &r : isec->relocs) target->relocateOne(copy.data() + r.offset, r, /*va=*/0, /*relocVA=*/0); isec->data = copy; } } else if (!isEhFrameSection(isec)) { // EH frames are gathered as foldables from unwindEntry above; give a // unique ID to everything else. isec->icfEqClass[0] = ++icfUniqueID; } } parallelForEach(foldable, [](ConcatInputSection *isec) { assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID! // Turn-on the top bit to guarantee that valid hashes have no collisions // with the small-integer unique IDs for ICF-ineligible sections isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 31); }); // Now that every input section is either hashed or marked as unique, run the // segregation algorithm to detect foldable subsections. ICF(foldable).run(); }