summaryrefslogtreecommitdiff
path: root/bolt/include/bolt/Passes/BinaryPasses.h
blob: dace07e903e7bc92f6bd59e24809e45017351745 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
//===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The set of optimization/analysis passes that run on BinaryFunctions.
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_PASSES_BINARY_PASSES_H
#define BOLT_PASSES_BINARY_PASSES_H

#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/DynoStats.h"
#include "llvm/Support/CommandLine.h"
#include <atomic>
#include <map>
#include <set>
#include <string>
#include <unordered_set>

namespace llvm {
namespace bolt {

/// An optimization/analysis pass that runs on functions.
class BinaryFunctionPass {
protected:
  bool PrintPass;

  explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {}

  /// Control whether a specific function should be skipped during
  /// optimization.
  virtual bool shouldOptimize(const BinaryFunction &BF) const;

public:
  virtual ~BinaryFunctionPass() = default;

  /// The name of this pass
  virtual const char *getName() const = 0;

  /// Control whether debug info is printed after this pass is completed.
  bool printPass() const { return PrintPass; }

  /// Control whether debug info is printed for an individual function after
  /// this pass is completed (printPass() must have returned true).
  virtual bool shouldPrint(const BinaryFunction &BF) const;

  /// Execute this pass on the given functions.
  virtual void runOnFunctions(BinaryContext &BC) = 0;
};

/// A pass to print program-wide dynostats.
class DynoStatsPrintPass : public BinaryFunctionPass {
protected:
  DynoStats PrevDynoStats;
  std::string Title;

public:
  DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title)
      : BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {}

  const char *getName() const override {
    return "print dyno-stats after optimizations";
  }

  bool shouldPrint(const BinaryFunction &BF) const override { return false; }

  void runOnFunctions(BinaryContext &BC) override {
    const DynoStats NewDynoStats =
        getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
    const bool Changed = (NewDynoStats != PrevDynoStats);
    outs() << "BOLT-INFO: program-wide dynostats " << Title
           << (Changed ? "" : " (no change)") << ":\n\n"
           << PrevDynoStats;
    if (Changed) {
      outs() << '\n';
      NewDynoStats.print(outs(), &PrevDynoStats, BC.InstPrinter.get());
    }
    outs() << '\n';
  }
};

/// The pass normalizes CFG by performing the following transformations:
///   * removes empty basic blocks
///   * merges duplicate edges and updates jump instructions
class NormalizeCFG : public BinaryFunctionPass {
  std::atomic<uint64_t> NumBlocksRemoved{0};
  std::atomic<uint64_t> NumDuplicateEdgesMerged{0};

  void runOnFunction(BinaryFunction &BF);

public:
  NormalizeCFG(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "normalize CFG"; }

  void runOnFunctions(BinaryContext &) override;
};

/// Detect and eliminate unreachable basic blocks. We could have those
/// filled with nops and they are used for alignment.
class EliminateUnreachableBlocks : public BinaryFunctionPass {
  std::unordered_set<const BinaryFunction *> Modified;
  std::atomic<unsigned> DeletedBlocks{0};
  std::atomic<uint64_t> DeletedBytes{0};
  void runOnFunction(BinaryFunction &Function);

public:
  EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "eliminate-unreachable"; }
  bool shouldPrint(const BinaryFunction &BF) const override {
    return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
  }
  void runOnFunctions(BinaryContext &) override;
};

// Reorder the basic blocks for each function based on hotness.
class ReorderBasicBlocks : public BinaryFunctionPass {
public:
  /// Choose which strategy should the block layout heuristic prioritize when
  /// facing conflicting goals.
  enum LayoutType : char {
    /// LT_NONE - do not change layout of basic blocks
    LT_NONE = 0, /// no reordering
    /// LT_REVERSE - reverse the order of basic blocks, meant for testing
    /// purposes. The first basic block is left intact and the rest are
    /// put in the reverse order.
    LT_REVERSE,
    /// LT_OPTIMIZE - optimize layout of basic blocks based on profile.
    LT_OPTIMIZE,
    /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis'
    /// paper (PLDI '90) about block reordering, trying to minimize branch
    /// mispredictions.
    LT_OPTIMIZE_BRANCH,
    /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04)
    /// that suggests putting frequently executed chains first in the layout.
    LT_OPTIMIZE_CACHE,
    // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation.
    LT_OPTIMIZE_CACHE_PLUS,
    /// Block reordering guided by the extended TSP metric.
    LT_OPTIMIZE_EXT_TSP,
    /// Create clusters and use random order for them.
    LT_OPTIMIZE_SHUFFLE,
  };

private:
  /// Run the specified layout algorithm on the given function. Returns `true`
  /// if the order of blocks was changed.
  bool modifyFunctionLayout(BinaryFunction &Function, LayoutType Type,
                            bool MinBranchClusters) const;

public:
  explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  bool shouldOptimize(const BinaryFunction &BF) const override;

  const char *getName() const override { return "reorder-blocks"; }
  bool shouldPrint(const BinaryFunction &BF) const override;
  void runOnFunctions(BinaryContext &BC) override;
};

/// Sync local branches with CFG.
class FixupBranches : public BinaryFunctionPass {
public:
  explicit FixupBranches(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "fix-branches"; }
  void runOnFunctions(BinaryContext &BC) override;
};

/// Fix the CFI state and exception handling information after all other
/// passes have completed.
class FinalizeFunctions : public BinaryFunctionPass {
public:
  explicit FinalizeFunctions(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "finalize-functions"; }
  void runOnFunctions(BinaryContext &BC) override;
};

/// Perform any necessary adjustments for functions that do not fit into their
/// original space in non-relocation mode.
class CheckLargeFunctions : public BinaryFunctionPass {
public:
  explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "check-large-functions"; }

  void runOnFunctions(BinaryContext &BC) override;

  bool shouldOptimize(const BinaryFunction &BF) const override;
};

/// Convert and remove all BOLT-related annotations before LLVM code emission.
class LowerAnnotations : public BinaryFunctionPass {
public:
  explicit LowerAnnotations(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "lower-annotations"; }
  void runOnFunctions(BinaryContext &BC) override;
};

/// Clean the state of the MC representation before sending it to emission
class CleanMCState : public BinaryFunctionPass {
public:
  explicit CleanMCState(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "clean-mc-state"; }
  void runOnFunctions(BinaryContext &BC) override;
};

/// An optimization to simplify conditional tail calls by removing
/// unnecessary branches.
///
/// This optimization considers both of the following cases:
///
/// foo: ...
///      jcc L1   original
///      ...
/// L1:  jmp bar  # TAILJMP
///
/// ->
///
/// foo: ...
///      jcc bar  iff jcc L1 is expected
///      ...
///
/// L1 is unreachable
///
/// OR
///
/// foo: ...
///      jcc  L2
/// L1:  jmp  dest  # TAILJMP
/// L2:  ...
///
/// ->
///
/// foo: jncc dest  # TAILJMP
/// L2:  ...
///
/// L1 is unreachable
///
/// For this particular case, the first basic block ends with
/// a conditional branch and has two successors, one fall-through
/// and one for when the condition is true.
/// The target of the conditional is a basic block with a single
/// unconditional branch (i.e. tail call) to another function.
/// We don't care about the contents of the fall-through block.
/// We assume that the target of the conditional branch is the
/// first successor.
class SimplifyConditionalTailCalls : public BinaryFunctionPass {
  uint64_t NumCandidateTailCalls{0};
  uint64_t NumTailCallsPatched{0};
  uint64_t CTCExecCount{0};
  uint64_t CTCTakenCount{0};
  uint64_t NumOrigForwardBranches{0};
  uint64_t NumOrigBackwardBranches{0};
  uint64_t NumDoubleJumps{0};
  uint64_t DeletedBlocks{0};
  uint64_t DeletedBytes{0};
  std::unordered_set<const BinaryFunction *> Modified;
  std::set<const BinaryBasicBlock *> BeenOptimized;

  bool shouldRewriteBranch(const BinaryBasicBlock *PredBB,
                           const MCInst &CondBranch, const BinaryBasicBlock *BB,
                           const bool DirectionFlag);

  uint64_t fixTailCalls(BinaryFunction &BF);

public:
  explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override {
    return "simplify-conditional-tail-calls";
  }
  bool shouldPrint(const BinaryFunction &BF) const override {
    return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
  }
  void runOnFunctions(BinaryContext &BC) override;
};

/// Convert instructions to the form with the minimum operand width.
class ShortenInstructions : public BinaryFunctionPass {
  uint64_t shortenInstructions(BinaryFunction &Function);

public:
  explicit ShortenInstructions(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "shorten-instructions"; }

  void runOnFunctions(BinaryContext &BC) override;
};

/// Perform simple peephole optimizations.
class Peepholes : public BinaryFunctionPass {
public:
  enum PeepholeOpts : char {
    PEEP_NONE = 0x0,
    PEEP_DOUBLE_JUMPS = 0x2,
    PEEP_TAILCALL_TRAPS = 0x4,
    PEEP_USELESS_BRANCHES = 0x8,
    PEEP_ALL = 0xf
  };

private:
  uint64_t NumDoubleJumps{0};
  uint64_t TailCallTraps{0};
  uint64_t NumUselessCondBranches{0};

  /// Add trap instructions immediately after indirect tail calls to prevent
  /// the processor from decoding instructions immediate following the
  /// tailcall.
  void addTailcallTraps(BinaryFunction &Function);

  /// Remove useless duplicate successors.  When the conditional
  /// successor is the same as the unconditional successor, we can
  /// remove the conditional successor and branch instruction.
  void removeUselessCondBranches(BinaryFunction &Function);

public:
  explicit Peepholes(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "peepholes"; }
  void runOnFunctions(BinaryContext &BC) override;
};

/// An optimization to simplify loads from read-only sections.The pass converts
/// load instructions with statically computed target address such as:
///
///      mov 0x12f(%rip), %eax
///
/// to their counterparts that use immediate operands instead of memory loads:
///
///     mov $0x4007dc, %eax
///
/// when the target address points somewhere inside a read-only section.
///
class SimplifyRODataLoads : public BinaryFunctionPass {
  uint64_t NumLoadsSimplified{0};
  uint64_t NumDynamicLoadsSimplified{0};
  uint64_t NumLoadsFound{0};
  uint64_t NumDynamicLoadsFound{0};
  std::unordered_set<const BinaryFunction *> Modified;

  bool simplifyRODataLoads(BinaryFunction &BF);

public:
  explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "simplify-read-only-loads"; }
  bool shouldPrint(const BinaryFunction &BF) const override {
    return BinaryFunctionPass::shouldPrint(BF) && Modified.count(&BF) > 0;
  }
  void runOnFunctions(BinaryContext &BC) override;
};

/// Assign output sections to all functions.
class AssignSections : public BinaryFunctionPass {
public:
  explicit AssignSections() : BinaryFunctionPass(false) {}

  const char *getName() const override { return "assign-sections"; }
  void runOnFunctions(BinaryContext &BC) override;
};

/// Compute and report to the user the imbalance in flow equations for all
/// CFGs, so we can detect bad quality profile. Prints average and standard
/// deviation of the absolute differences of outgoing flow minus incoming flow
/// for blocks of interest (excluding prologues, epilogues, and BB frequency
/// lower than 100).
class PrintProfileStats : public BinaryFunctionPass {
public:
  explicit PrintProfileStats(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "profile-stats"; }
  bool shouldPrint(const BinaryFunction &) const override { return false; }
  void runOnFunctions(BinaryContext &BC) override;
};

/// Prints a list of the top 100 functions sorted by a set of
/// dyno stats categories.
class PrintProgramStats : public BinaryFunctionPass {
public:
  explicit PrintProgramStats(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "print-stats"; }
  bool shouldPrint(const BinaryFunction &) const override { return false; }
  void runOnFunctions(BinaryContext &BC) override;
};

/// Pass for lowering any instructions that we have raised and that have
/// to be lowered.
class InstructionLowering : public BinaryFunctionPass {
public:
  explicit InstructionLowering(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "inst-lowering"; }

  void runOnFunctions(BinaryContext &BC) override;
};

/// Pass for stripping 'repz' from 'repz retq' sequence of instructions.
class StripRepRet : public BinaryFunctionPass {
public:
  explicit StripRepRet(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "strip-rep-ret"; }

  void runOnFunctions(BinaryContext &BC) override;
};

/// Pass for inlining calls to memcpy using 'rep movsb' on X86.
class InlineMemcpy : public BinaryFunctionPass {
public:
  explicit InlineMemcpy(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "inline-memcpy"; }

  void runOnFunctions(BinaryContext &BC) override;
};

/// Pass for specializing memcpy for a size of 1 byte.
class SpecializeMemcpy1 : public BinaryFunctionPass {
private:
  std::vector<std::string> Spec;

  /// Return indices of the call sites to optimize. Count starts at 1.
  /// Returns an empty set for all call sites in the function.
  std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const;

public:
  explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass,
                             cl::list<std::string> &Spec)
      : BinaryFunctionPass(PrintPass), Spec(Spec) {}

  bool shouldOptimize(const BinaryFunction &BF) const override;

  const char *getName() const override { return "specialize-memcpy"; }

  void runOnFunctions(BinaryContext &BC) override;
};

/// Pass to remove nops in code
class RemoveNops : public BinaryFunctionPass {
  void runOnFunction(BinaryFunction &Function);

public:
  explicit RemoveNops(const cl::opt<bool> &PrintPass)
      : BinaryFunctionPass(PrintPass) {}

  const char *getName() const override { return "remove-nops"; }

  /// Pass entry point
  void runOnFunctions(BinaryContext &BC) override;
};

enum FrameOptimizationType : char {
  FOP_NONE, /// Don't perform FOP.
  FOP_HOT,  /// Perform FOP on hot functions.
  FOP_ALL   /// Perform FOP on all functions.
};

} // namespace bolt
} // namespace llvm

#endif