diff options
author | wlei <wlei@fb.com> | 2021-01-21 09:36:32 -0800 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2021-02-19 21:21:11 -0800 |
commit | 87c27020cc6466ae33550f1f1f55d5989afaca2e (patch) | |
tree | e54179c0594869f383e2a5e6fba03b6925e29434 | |
parent | e562ff08f634d814c1cd1e65e3428ca5308d3022 (diff) | |
download | llvm-87c27020cc6466ae33550f1f1f55d5989afaca2e.tar.gz |
[CSSPGO][llvm-profgen] Merge and trim profile for cold context to reduce profile size
This change allows merging and trimming cold context profile in llvm-profgen to solve profile size bloat problem. Currently when the profile's total sample is below threshold(supported by a switch), it will be considered cold and merged into a base context-less profile, which will at least keep the profile quality as good as the baseline(non-cs).
For example, two input profiles:
[main @ foo @ bar]:60
[main @ bar]:50
Under threshold = 100, the two profiles will be merge into one with the base context, get result:
[bar]:110
Added two switches:
`--csprof-cold-thres=<value>`: Specified the total samples threshold for a context profile to be considered cold, with 100 being the default. Any cold context profiles will be merged into context-less base profile by default.
`--csprof-keep-cold`: Force profile generation to keep cold context profiles instead of dropping them. By default, any cold context will not be written to output profile.
Results:
Though not yet evaluating it with the latest CSSPGO, our internal branch shows neutral on performance but significantly reduce the profile size. Detailed evaluation on llvm-profgen with CSSPGO will come later.
Differential Revision: https://reviews.llvm.org/D94111
9 files changed, 142 insertions, 9 deletions
diff --git a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test index 98767a9b29b7..943832ebef10 100644 --- a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test @@ -1,4 +1,4 @@ -; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER +; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t ; CHECK:[main:1 @ foo]:44:0 diff --git a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test index 19928322a66d..c7aa1dea21bb 100644 --- a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test @@ -1,4 +1,4 @@ -; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER +; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t ; CHECK: [main:2 @ foo]:74:0 diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test new file mode 100644 index 000000000000..e0c65ac44e2b --- /dev/null +++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test @@ -0,0 +1,70 @@ +; Used the data from recursion-compression.test, refer it for the unmerged output +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=8 +; RUN: FileCheck %s --input-file %t + +; Test --csprof-keep-cold +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-keep-cold +; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-KEEP-COLD + +; CHECK: [fa]:14:4 +; CHECK-NEXT: 1: 4 +; CHECK-NEXT: 3: 4 +; CHECK-NEXT: 4: 2 +; CHECK-NEXT: 5: 1 +; CHECK-NEXT: 7: 2 fb:2 +; CHECK-NEXT: 8: 1 fa:1 +; CHECK-NEXT: !CFGChecksum: 120515930909 +; CHECK-NEXT:[main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4 +; CHECK-NEXT: 1: 4 +; CHECK-NEXT: 2: 3 +; CHECK-NEXT: 3: 1 +; CHECK-NEXT: 5: 4 fb:4 +; CHECK-NEXT: 6: 1 fa:1 +; CHECK-NEXT: !CFGChecksum: 72617220756 + +; CHECK-KEEP-COLD: [fb]:19:6 +; CHECK-KEEP-COLD-NEXT: 1: 6 +; CHECK-KEEP-COLD-NEXT: 2: 3 +; CHECK-KEEP-COLD-NEXT: 3: 3 +; CHECK-KEEP-COLD-NEXT: 5: 4 fb:4 +; CHECK-KEEP-COLD-NEXT: 6: 3 fa:3 +; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 72617220756 +; CHECK-KEEP-COLD-NEXT:[fa]:14:4 +; CHECK-KEEP-COLD-NEXT: 1: 4 +; CHECK-KEEP-COLD-NEXT: 3: 4 +; CHECK-KEEP-COLD-NEXT: 4: 2 +; CHECK-KEEP-COLD-NEXT: 5: 1 +; CHECK-KEEP-COLD-NEXT: 7: 2 fb:2 +; CHECK-KEEP-COLD-NEXT: 8: 1 fa:1 +; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 120515930909 + + +; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling +; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls +; -g test.c -o a.out + +; Copied from recursion-compression.test +#include <stdio.h> + +int fb(int n) { + if(n > 10) return fb(n / 2); + return fa(n - 1); +} + +int fa(int n) { + if(n < 2) return n; + if(n % 2) return fb(n - 1); + return fa(n - 1); +} + +void foo() { + int s, i = 0; + while (i++ < 10000) + s += fa(i); + printf("sum is %d\n", s); +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test index 9beecb271fc0..2e60883afa62 100644 --- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test @@ -1,4 +1,4 @@ -; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER +; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t ; CHECK:[main:1 @ foo:3 @ bar]:12:3 diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test index 0491a62ff69b..a0e5507c70dd 100644 --- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test @@ -1,4 +1,4 @@ -; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER +; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t ; CHECK: [main:2 @ foo]:75:0 diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test index 47e0a51a4261..43f495398bb0 100644 --- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test @@ -1,7 +1,7 @@ ; Firstly test uncompression(--compress-recursion=0) -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0 +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0 --csprof-cold-thres=0 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --csprof-cold-thres=0 ; RUN: FileCheck %s --input-file %t ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0 diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test index 86afe6c632bd..0d4e7dbb1dd4 100644 --- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test @@ -1,7 +1,7 @@ ; Firstly test uncompression(--compress-recursion=0) -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --csprof-cold-thres=0 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS -; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER +; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER ; RUN: FileCheck %s --input-file %t ; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1 diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index f769bd592f87..b2a8d60d5caf 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -29,6 +29,19 @@ static cl::opt<int32_t, true> RecursionCompression( cl::Hidden, cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize)); +static cl::opt<uint64_t> CSProfColdThres( + "csprof-cold-thres", cl::init(100), cl::ZeroOrMore, + cl::desc("Specify the total samples threshold for a context profile to " + "be considered cold, any cold profiles will be merged into " + "context-less base profiles")); + +static cl::opt<bool> CSProfKeepCold( + "csprof-keep-cold", cl::init(false), cl::ZeroOrMore, + cl::desc("This works together with --csprof-cold-thres. If the total count " + "of the profile after all merge is done is still smaller than the " + "csprof-cold-thres, it will be trimmed unless csprof-keep-cold " + "flag is specified.")); + using namespace llvm; using namespace sampleprof; @@ -68,6 +81,7 @@ void ProfileGenerator::write() { if (std::error_code EC = WriterOrErr.getError()) exitWithError(EC, OutputFilename); auto Writer = std::move(WriterOrErr.get()); + mergeAndTrimColdProfile(ProfileMap); Writer->write(ProfileMap); } @@ -329,6 +343,49 @@ void CSProfileGenerator::populateInferredFunctionSamples() { } } +void CSProfileGenerator::mergeAndTrimColdProfile( + StringMap<FunctionSamples> &ProfileMap) { + // Nothing to merge if sample threshold is zero + if (!CSProfColdThres) + return; + + // Filter the cold profiles from ProfileMap and move them into a tmp + // container + std::vector<std::pair<StringRef, const FunctionSamples *>> ToRemoveVec; + for (const auto &I : ProfileMap) { + const FunctionSamples &FunctionProfile = I.second; + if (FunctionProfile.getTotalSamples() >= CSProfColdThres) + continue; + ToRemoveVec.emplace_back(I.getKey(), &I.second); + } + + // Remove the code profile from ProfileMap and merge them into BaseProileMap + StringMap<FunctionSamples> BaseProfileMap; + for (const auto &I : ToRemoveVec) { + auto Ret = + BaseProfileMap.try_emplace(I.second->getName(), FunctionSamples()); + FunctionSamples &BaseProfile = Ret.first->second; + BaseProfile.merge(*I.second); + ProfileMap.erase(I.first); + } + + // Merge the base profiles into ProfileMap; + for (const auto &I : BaseProfileMap) { + // Filter the cold base profile + if (!CSProfKeepCold && I.second.getTotalSamples() < CSProfColdThres && + ProfileMap.find(I.getKey()) == ProfileMap.end()) + continue; + // Merge the profile if the original profile exists, otherwise just insert + // as a new profile + FunctionSamples &OrigProfile = getFunctionProfileForContext(I.getKey()); + StringRef TmpName = OrigProfile.getName(); + OrigProfile.merge(I.second); + // Should use the name ref from ProfileMap's key to avoid name being freed + // from BaseProfileMap + OrigProfile.setName(TmpName); + } +} + // Helper function to extract context prefix string stack // Extract context stack for reusing, leaf context stack will // be added compressed while looking up function profile diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h index 14e58fc9c895..9cb04c4de34d 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.h +++ b/llvm/tools/llvm-profgen/ProfileGenerator.h @@ -28,7 +28,10 @@ public: create(const BinarySampleCounterMap &BinarySampleCounters, enum PerfScriptType SampleType); virtual void generateProfile() = 0; - + // Merge and trim profile with cold context before serialization, + // only eligible for CS profile + virtual void + mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap){}; // Use SampleProfileWriter to serialize profile map void write(); @@ -200,6 +203,9 @@ public: protected: // Lookup or create FunctionSamples for the context FunctionSamples &getFunctionProfileForContext(StringRef ContextId); + // Merge cold context profile whose total sample is below threshold + // into base profile. + void mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap) override; private: // Helper function for updating body sample for a leaf location in |