From 35af20d9e036deeed250b73fd3ae86d6455173c5 Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Fri, 31 Mar 2023 17:08:16 +0800 Subject: [BOLT] Parallelize legacy profile merging Merging profiles is quite expensive, but easily paralleizable. 8359 profiles on n2d-standard-128: single-thread: 808s multi-thread: 200s (~75% speed up) Differential Revision: https://reviews.llvm.org/D149014 --- bolt/tools/merge-fdata/merge-fdata.cpp | 69 +++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 22 deletions(-) (limited to 'bolt') diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp index 61ca97971dae..0292c32f06f4 100644 --- a/bolt/tools/merge-fdata/merge-fdata.cpp +++ b/bolt/tools/merge-fdata/merge-fdata.cpp @@ -20,6 +20,8 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/ThreadPool.h" +#include #include using namespace llvm; @@ -258,31 +260,38 @@ bool isYAML(const StringRef Filename) { void mergeLegacyProfiles(const SmallVectorImpl &Filenames) { errs() << "Using legacy profile format.\n"; std::optional BoltedCollection; - StringMap Entries; - for (const std::string &Filename : Filenames) { + std::mutex BoltedCollectionMutex; + typedef StringMap ProfileTy; + + auto ParseProfile = [&](const std::string &Filename, auto &Profiles) { + const llvm::thread::id tid = llvm::this_thread::get_id(); + if (isYAML(Filename)) report_error(Filename, "cannot mix YAML and legacy formats"); ErrorOr> MB = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = MB.getError()) report_error(Filename, EC); - errs() << "Merging data from " << Filename << "...\n"; StringRef Buf = MB.get()->getBuffer(); - // Check if the string "boltedcollection" is in the first line - if (Buf.startswith("boltedcollection\n")) { - if (!BoltedCollection.value_or(true)) - report_error( - Filename, - "cannot mix profile collected in BOLT and non-BOLT deployments"); - BoltedCollection = true; - Buf = Buf.drop_front(17); - } else { - if (BoltedCollection.value_or(false)) - report_error( - Filename, - "cannot mix profile collected in BOLT and non-BOLT deployments"); - BoltedCollection = false; + + { + std::lock_guard Lock(BoltedCollectionMutex); + // Check if the string "boltedcollection" is in the first line + if (Buf.startswith("boltedcollection\n")) { + if (!BoltedCollection.value_or(true)) + report_error( + Filename, + "cannot mix profile collected in BOLT and non-BOLT deployments"); + BoltedCollection = true; + Buf = Buf.drop_front(17); + } else { + if (BoltedCollection.value_or(false)) + report_error( + Filename, + "cannot mix profile collected in BOLT and non-BOLT deployments"); + BoltedCollection = false; + } } SmallVector Lines; @@ -295,15 +304,31 @@ void mergeLegacyProfiles(const SmallVectorImpl &Filenames) { uint64_t Count; if (Line.substr(Pos + 1, Line.size() - Pos).getAsInteger(10, Count)) report_error(Filename, "Malformed / corrupted profile counter"); - Count += Entries.lookup(Signature); - Entries.insert_or_assign(Signature, Count); + Count += Profiles[tid].lookup(Signature); + Profiles[tid].insert_or_assign(Signature, Count); + } + }; + + // The final reduction has non-trivial cost, make sure each thread has at + // least 4 tasks. + ThreadPoolStrategy S = optimal_concurrency(Filenames.size() / 4); + ThreadPool Pool(S); + DenseMap ParsedProfiles(Pool.getThreadCount()); + for (const auto &Filename : Filenames) + Pool.async(ParseProfile, std::cref(Filename), std::ref(ParsedProfiles)); + Pool.wait(); + + ProfileTy MergedProfile; + for (const auto &[Thread, Profile] : ParsedProfiles) + for (const auto &[Key, Value] : Profile) { + uint64_t Count = MergedProfile.lookup(Key) + Value; + MergedProfile.insert_or_assign(Key, Count); } - } if (BoltedCollection) output() << "boltedcollection\n"; - for (const auto &Entry : Entries) - output() << Entry.getKey() << " " << Entry.getValue() << "\n"; + for (const auto &[Key, Value] : MergedProfile) + output() << Key << " " << Value << "\n"; errs() << "Profile from " << Filenames.size() << " files merged.\n"; } -- cgit v1.2.1