summaryrefslogtreecommitdiff
path: root/bolt
diff options
context:
space:
mode:
authorspupyrev <spupyrev@fb.com>2023-02-16 10:52:04 -0800
committerspupyrev <spupyrev@fb.com>2023-05-02 14:03:47 -0700
commit3e3a926be8a9787d2786e3e3ca879fac0656a824 (patch)
treebecaf47ce4537756c686e162b460328818efec82 /bolt
parentf305cafc589f08bf576363348aaa98962174fd88 (diff)
downloadllvm-3e3a926be8a9787d2786e3e3ca879fac0656a824.tar.gz
[BOLT][NFC] Add hash computation for basic blocks
Extending yaml profile format with block hashes, which are used for stale profile matching. To avoid duplication of the code, created a new class with a collection of utilities for computing hashes. Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D144306
Diffstat (limited to 'bolt')
-rw-r--r--bolt/include/bolt/Core/BinaryBasicBlock.h9
-rw-r--r--bolt/include/bolt/Core/BinaryFunction.h3
-rw-r--r--bolt/include/bolt/Core/HashUtilities.h41
-rw-r--r--bolt/include/bolt/Core/MCPlusBuilder.h5
-rw-r--r--bolt/include/bolt/Profile/ProfileYAMLMapping.h1
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp35
-rw-r--r--bolt/lib/Core/CMakeLists.txt1
-rw-r--r--bolt/lib/Core/HashUtilities.cpp135
-rw-r--r--bolt/lib/Passes/IdenticalCodeFolding.cpp69
-rw-r--r--bolt/lib/Profile/YAMLProfileWriter.cpp13
-rw-r--r--bolt/lib/Target/X86/X86MCPlusBuilder.cpp26
11 files changed, 232 insertions, 106 deletions
diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h
index 8c044597b5fd..c9694ab29d44 100644
--- a/bolt/include/bolt/Core/BinaryBasicBlock.h
+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h
@@ -144,6 +144,9 @@ private:
/// blocks may contain out of date or incorrect information.
bool IsValid{true};
+ /// Last computed hash value.
+ mutable uint64_t Hash{0};
+
private:
BinaryBasicBlock() = delete;
BinaryBasicBlock(const BinaryBasicBlock &) = delete;
@@ -939,6 +942,9 @@ public:
/// Check if the block has a jump table instruction.
bool hasJumpTable() const { return getJumpTable() != nullptr; }
+ /// Returns the last computed hash value of the block.
+ uint64_t getHash() const { return Hash; }
+
private:
void adjustNumPseudos(const MCInst &Inst, int Sign);
@@ -963,6 +969,9 @@ private:
/// Set the index of this basic block.
void setIndex(unsigned I) { Index = I; }
+ /// Sets the hash value of the basic block.
+ void setHash(uint64_t Value) const { Hash = Value; }
+
template <typename T> void clearList(T &List) {
T TempList;
TempList.swap(List);
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a96243c579eb..a54492c290f0 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2205,6 +2205,9 @@ public:
return std::string();
}) const;
+ /// Compute hash values for each block of the function.
+ void computeBlockHashes() const;
+
void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; }
/// Return DWARF compile unit for this function.
diff --git a/bolt/include/bolt/Core/HashUtilities.h b/bolt/include/bolt/Core/HashUtilities.h
new file mode 100644
index 000000000000..8d445ff83756
--- /dev/null
+++ b/bolt/include/bolt/Core/HashUtilities.h
@@ -0,0 +1,41 @@
+//===- bolt/Core/HashUtilities.h - Misc hash utilities --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions for computing hash values over BinaryFunction
+// and BinaryBasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_CORE_HASH_UTILITIES_H
+#define BOLT_CORE_HASH_UTILITIES_H
+
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryContext.h"
+
+namespace llvm {
+namespace bolt {
+
+uint16_t hash_64_to_16(const uint64_t Hash);
+
+std::string hashInteger(uint64_t Value);
+
+std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol);
+
+std::string hashExpr(BinaryContext &BC, const MCExpr &Expr);
+
+std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand);
+
+using OperandHashFuncTy = function_ref<typename std::string(const MCOperand &)>;
+
+std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
+ OperandHashFuncTy OperandHashFunc);
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e6eb1429d3a1..86756a4434ed 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -466,6 +466,11 @@ public:
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
+ virtual unsigned getShortBranchOpcode(unsigned Opcode) const {
+ llvm_unreachable("not implemented");
+ return 0;
+ }
+
/// Create increment contents of target by 1 for Instrumentation
virtual InstructionListType createInstrIncMemory(const MCSymbol *Target,
MCContext *Ctx,
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index e20d449e0b93..f02f2197c469 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -126,6 +126,7 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
static void mapping(IO &YamlIO, bolt::BinaryBasicBlockProfile &BBP) {
YamlIO.mapRequired("bid", BBP.Index);
YamlIO.mapRequired("insns", BBP.NumInstructions);
+ YamlIO.mapOptional("hash", BBP.Hash, (llvm::yaml::Hex64)0);
YamlIO.mapOptional("exec", BBP.ExecCount, (uint64_t)0);
YamlIO.mapOptional("events", BBP.EventCount, (uint64_t)0);
YamlIO.mapOptional("calls", BBP.CallSites,
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 89ba3627cdd7..47c6b7eddaf8 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -14,6 +14,7 @@
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryDomTree.h"
#include "bolt/Core/DynoStats.h"
+#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Utils/NameResolver.h"
#include "bolt/Utils/NameShortener.h"
@@ -3604,34 +3605,18 @@ size_t BinaryFunction::computeHash(bool UseDFS,
// The hash is computed by creating a string of all instruction opcodes and
// possibly their operands and then hashing that string with std::hash.
std::string HashString;
- for (const BinaryBasicBlock *BB : Order) {
- for (const MCInst &Inst : *BB) {
- unsigned Opcode = Inst.getOpcode();
-
- if (BC.MIB->isPseudo(Inst))
- continue;
+ for (const BinaryBasicBlock *BB : Order)
+ HashString.append(hashBlock(BC, *BB, OperandHashFunc));
- // Ignore unconditional jumps since we check CFG consistency by processing
- // basic blocks in order and do not rely on branches to be in-sync with
- // CFG. Note that we still use condition code of conditional jumps.
- if (BC.MIB->isUnconditionalBranch(Inst))
- continue;
-
- if (Opcode == 0)
- HashString.push_back(0);
-
- while (Opcode) {
- uint8_t LSB = Opcode & 0xff;
- HashString.push_back(LSB);
- Opcode = Opcode >> 8;
- }
+ return Hash = std::hash<std::string>{}(HashString);
+}
- for (const MCOperand &Op : MCPlus::primeOperands(Inst))
- HashString.append(OperandHashFunc(Op));
- }
+void BinaryFunction::computeBlockHashes() const {
+ for (const BinaryBasicBlock *BB : BasicBlocks) {
+ std::string Hash =
+ hashBlock(BC, *BB, [](const MCOperand &Op) { return std::string(); });
+ BB->setHash(std::hash<std::string>{}(Hash));
}
-
- return Hash = std::hash<std::string>{}(HashString);
}
void BinaryFunction::insertBasicBlocks(
diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt
index e7215b2babd6..7c633e37eeb7 100644
--- a/bolt/lib/Core/CMakeLists.txt
+++ b/bolt/lib/Core/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMBOLTCore
DynoStats.cpp
Exceptions.cpp
FunctionLayout.cpp
+ HashUtilities.cpp
JumpTable.cpp
MCPlusBuilder.cpp
ParallelUtilities.cpp
diff --git a/bolt/lib/Core/HashUtilities.cpp b/bolt/lib/Core/HashUtilities.cpp
new file mode 100644
index 000000000000..1c980e003f88
--- /dev/null
+++ b/bolt/lib/Core/HashUtilities.cpp
@@ -0,0 +1,135 @@
+//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Computation of hash values over BinaryFunction and BinaryBasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/HashUtilities.h"
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryFunction.h"
+
+namespace llvm {
+namespace bolt {
+
+/// Hashing a 64-bit integer to a 16-bit one.
+uint16_t hash_64_to_16(const uint64_t Hash) {
+ uint16_t Res = (uint16_t)(Hash & 0xFFFF);
+ Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
+ Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
+ Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
+ return Res;
+}
+
+std::string hashInteger(uint64_t Value) {
+ std::string HashString;
+ if (Value == 0)
+ HashString.push_back(0);
+
+ while (Value) {
+ uint8_t LSB = Value & 0xff;
+ HashString.push_back(LSB);
+ Value >>= 8;
+ }
+
+ return HashString;
+}
+
+std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
+ std::string HashString;
+
+ // Ignore function references.
+ if (BC.getFunctionForSymbol(&Symbol))
+ return HashString;
+
+ llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
+ if (!ErrorOrValue)
+ return HashString;
+
+ // Ignore jump table references.
+ if (BC.getJumpTableContainingAddress(*ErrorOrValue))
+ return HashString;
+
+ return HashString.append(hashInteger(*ErrorOrValue));
+}
+
+std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
+ switch (Expr.getKind()) {
+ case MCExpr::Constant:
+ return hashInteger(cast<MCConstantExpr>(Expr).getValue());
+ case MCExpr::SymbolRef:
+ return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
+ case MCExpr::Unary: {
+ const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
+ return hashInteger(UnaryExpr.getOpcode())
+ .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
+ }
+ case MCExpr::Binary: {
+ const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
+ return hashExpr(BC, *BinaryExpr.getLHS())
+ .append(hashInteger(BinaryExpr.getOpcode()))
+ .append(hashExpr(BC, *BinaryExpr.getRHS()));
+ }
+ case MCExpr::Target:
+ return std::string();
+ }
+
+ llvm_unreachable("invalid expression kind");
+}
+
+std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
+ if (Operand.isImm())
+ return hashInteger(Operand.getImm());
+ if (Operand.isReg())
+ return hashInteger(Operand.getReg());
+ if (Operand.isExpr())
+ return hashExpr(BC, *Operand.getExpr());
+
+ return std::string();
+}
+
+std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
+ OperandHashFuncTy OperandHashFunc) {
+ const bool IsX86 = BC.isX86();
+
+ // The hash is computed by creating a string of all instruction opcodes and
+ // possibly their operands and then hashing that string with std::hash.
+ std::string HashString;
+
+ for (const MCInst &Inst : BB) {
+ if (BC.MIB->isPseudo(Inst))
+ continue;
+
+ unsigned Opcode = Inst.getOpcode();
+
+ // Ignore unconditional jumps since we check CFG consistency by processing
+ // basic blocks in order and do not rely on branches to be in-sync with
+ // CFG. Note that we still use condition code of conditional jumps.
+ if (BC.MIB->isUnconditionalBranch(Inst))
+ continue;
+
+ if (IsX86 && BC.MIB->isConditionalBranch(Inst))
+ Opcode = BC.MIB->getShortBranchOpcode(Opcode);
+
+ if (Opcode == 0)
+ HashString.push_back(0);
+
+ while (Opcode) {
+ uint8_t LSB = Opcode & 0xff;
+ HashString.push_back(LSB);
+ Opcode = Opcode >> 8;
+ }
+
+ for (const MCOperand &Op : MCPlus::primeOperands(Inst))
+ HashString.append(OperandHashFunc(Op));
+ }
+ return HashString;
+}
+
+} // namespace bolt
+} // namespace llvm
diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp
index ccf1749c47bf..e96666b37bd9 100644
--- a/bolt/lib/Passes/IdenticalCodeFolding.cpp
+++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "bolt/Passes/IdenticalCodeFolding.h"
+#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/ParallelUtilities.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
@@ -337,74 +338,6 @@ typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
KeyHash, KeyEqual>
IdenticalBucketsMap;
-static std::string hashInteger(uint64_t Value) {
- std::string HashString;
- if (Value == 0)
- HashString.push_back(0);
-
- while (Value) {
- uint8_t LSB = Value & 0xff;
- HashString.push_back(LSB);
- Value >>= 8;
- }
-
- return HashString;
-}
-
-static std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
- std::string HashString;
-
- // Ignore function references.
- if (BC.getFunctionForSymbol(&Symbol))
- return HashString;
-
- llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
- if (!ErrorOrValue)
- return HashString;
-
- // Ignore jump table references.
- if (BC.getJumpTableContainingAddress(*ErrorOrValue))
- return HashString;
-
- return HashString.append(hashInteger(*ErrorOrValue));
-}
-
-static std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
- switch (Expr.getKind()) {
- case MCExpr::Constant:
- return hashInteger(cast<MCConstantExpr>(Expr).getValue());
- case MCExpr::SymbolRef:
- return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
- case MCExpr::Unary: {
- const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
- return hashInteger(UnaryExpr.getOpcode())
- .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
- }
- case MCExpr::Binary: {
- const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
- return hashExpr(BC, *BinaryExpr.getLHS())
- .append(hashInteger(BinaryExpr.getOpcode()))
- .append(hashExpr(BC, *BinaryExpr.getRHS()));
- }
- case MCExpr::Target:
- return std::string();
- }
-
- llvm_unreachable("invalid expression kind");
-}
-
-static std::string hashInstOperand(BinaryContext &BC,
- const MCOperand &Operand) {
- if (Operand.isImm())
- return hashInteger(Operand.getImm());
- if (Operand.isReg())
- return hashInteger(Operand.getReg());
- if (Operand.isExpr())
- return hashExpr(BC, *Operand.getExpr());
-
- return std::string();
-}
-
namespace llvm {
namespace bolt {
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index bee18a6c1627..01ff9e2c26de 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -28,9 +28,13 @@ void convert(const BinaryFunction &BF,
const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR;
+ // Prepare function and block hashes
+ BF.computeHash(/*UseDFS=*/true);
+ BF.computeBlockHashes();
+
YamlBF.Name = BF.getPrintName();
YamlBF.Id = BF.getFunctionNumber();
- YamlBF.Hash = BF.computeHash(/*UseDFS=*/true);
+ YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
@@ -38,6 +42,7 @@ void convert(const BinaryFunction &BF,
yaml::bolt::BinaryBasicBlockProfile YamlBB;
YamlBB.Index = BB->getLayoutIndex();
YamlBB.NumInstructions = BB->getNumNonPseudos();
+ YamlBB.Hash = BB->getHash();
if (!LBRProfile) {
YamlBB.EventCount = BB->getKnownExecutionCount();
@@ -112,11 +117,15 @@ void convert(const BinaryFunction &BF,
// Include landing pads with non-zero execution count.
if (YamlBB.CallSites.empty() && !BB->isEntryPoint() &&
!(BB->isLandingPad() && BB->getKnownExecutionCount() != 0)) {
+ // Include blocks having successors or predecessors with positive counts.
uint64_t SuccessorExecCount = 0;
for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo :
BB->branch_info())
SuccessorExecCount += BranchInfo.Count;
- if (!SuccessorExecCount)
+ uint64_t PredecessorExecCount = 0;
+ for (auto Pred : BB->predecessors())
+ PredecessorExecCount += Pred->getBranchInfo(*BB).Count;
+ if (!SuccessorExecCount && !PredecessorExecCount)
continue;
}
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index ad80255dcf35..5a45f14db597 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -50,17 +50,6 @@ static cl::opt<bool> X86StripRedundantAddressSize(
namespace {
-unsigned getShortBranchOpcode(unsigned Opcode) {
- switch (Opcode) {
- default:
- return Opcode;
- case X86::JMP_2: return X86::JMP_1;
- case X86::JMP_4: return X86::JMP_1;
- case X86::JCC_2: return X86::JCC_1;
- case X86::JCC_4: return X86::JCC_1;
- }
-}
-
unsigned getShortArithOpcode(unsigned Opcode) {
return X86::getShortOpcodeArith(Opcode);
}
@@ -2867,6 +2856,21 @@ public:
MCPhysReg getX86R11() const override { return X86::R11; }
+ unsigned getShortBranchOpcode(unsigned Opcode) const override {
+ switch (Opcode) {
+ default:
+ return Opcode;
+ case X86::JMP_2:
+ return X86::JMP_1;
+ case X86::JMP_4:
+ return X86::JMP_1;
+ case X86::JCC_2:
+ return X86::JCC_1;
+ case X86::JCC_4:
+ return X86::JCC_1;
+ }
+ }
+
MCPhysReg getIntArgRegister(unsigned ArgNo) const override {
// FIXME: this should depend on the calling convention.
switch (ArgNo) {