diff options
author | David L. Jones <dlj@google.com> | 2017-11-15 01:40:05 +0000 |
---|---|---|
committer | David L. Jones <dlj@google.com> | 2017-11-15 01:40:05 +0000 |
commit | d5c2cca72463233df77a065f201db31b140eb44d (patch) | |
tree | 3f9a978131033302a58b7db7db1ecf2a4622bad2 /lib/CodeGen | |
parent | ce7676b8db6bac096dad4c4ad62e9e6bb8aa1064 (diff) | |
parent | dcf64df89bc6d775e266ebd6b0134d135f47a35b (diff) | |
download | llvm-d5c2cca72463233df77a065f201db31b140eb44d.tar.gz |
Creating branches/google/testing and tags/google/testing/2017-11-14 from r317716testing
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@318248 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen')
114 files changed, 2843 insertions, 1038 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index d7f91fc1ce3b..1dea746a6ac3 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -28,12 +28,12 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 876cca4bc7a0..9642368a0479 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -24,7 +25,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 8b1376ab363d..973816d56355 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -29,7 +29,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a35fcdaaf9aa..4ebc71769433 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -51,6 +51,8 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" @@ -100,8 +102,6 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index eae79ad101d3..5250f1b17872 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -32,7 +33,6 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 67bab8c76841..5aa3f4ae1030 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -68,7 +68,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index dd7f7931b06b..1a6cb9679925 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -31,7 +31,7 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 06b5b06c41bf..603d0f7f4700 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -36,7 +36,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 6e4625ba4116..167ca13c19c1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DIE.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include <memory> #include <utility> @@ -27,7 +28,6 @@ class DwarfCompileUnit; class DwarfUnit; class LexicalScope; class MCSection; -class MDNode; class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 5d485f213573..35ce1fec3858 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -33,7 +33,7 @@ #include "llvm/MC/MCWin64EH.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 40cb0c0cdf19..cd6056b674ca 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -52,7 +53,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -296,6 +296,11 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) { return HashMachineInstr(*I); } +/// Whether MI should be counted as an instruction when calculating common tail. +static bool countsAsInstruction(const MachineInstr &MI) { + return !(MI.isDebugValue() || MI.isCFIInstruction()); +} + /// ComputeCommonTailLength - Given two machine basic blocks, compute the number /// of instructions they actually have in common together at their end. Return /// iterators for the first shared instruction in each block. @@ -310,26 +315,27 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; // Skip debugging pseudos; necessary to avoid changing the code. - while (I1->isDebugValue()) { + while (!countsAsInstruction(*I1)) { if (I1==MBB1->begin()) { - while (I2->isDebugValue()) { - if (I2==MBB2->begin()) + while (!countsAsInstruction(*I2)) { + if (I2==MBB2->begin()) { // I1==DBG at begin; I2==DBG at begin - return TailLen; + goto SkipTopCFIAndReturn; + } --I2; } ++I2; // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin - return TailLen; + goto SkipTopCFIAndReturn; } --I1; } // I1==first (untested) non-DBG preceding known match - while (I2->isDebugValue()) { + while (!countsAsInstruction(*I2)) { if (I2==MBB2->begin()) { ++I1; // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin - return TailLen; + goto SkipTopCFIAndReturn; } --I2; } @@ -368,6 +374,37 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } ++I1; } + +SkipTopCFIAndReturn: + // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if + // I1 and I2 are non-identical when compared and then one or both of them ends + // up pointing to a CFI instruction after being incremented. For example: + /* + BB1: + ... + INSTRUCTION_A + ADD32ri8 <- last common instruction + ... + BB2: + ... + INSTRUCTION_B + CFI_INSTRUCTION + ADD32ri8 <- last common instruction + ... + */ + // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after + // incrementing the iterators, I1 will point to ADD, however I2 will point to + // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the + // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI + // instruction. + while (I1 != MBB1->end() && I1->isCFIInstruction()) { + ++I1; + } + + while (I2 != MBB2->end() && I2->isCFIInstruction()) { + ++I2; + } + return TailLen; } @@ -454,7 +491,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { unsigned Time = 0; for (; I != E; ++I) { - if (I->isDebugValue()) + if (!countsAsInstruction(*I)) continue; if (I->isCall()) Time += 10; @@ -814,12 +851,12 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos, assert(MBBI != MBBIE && "Reached BB end within common tail length!"); (void)MBBIE; - if (MBBI->isDebugValue()) { + if (!countsAsInstruction(*MBBI)) { ++MBBI; continue; } - while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) + while ((MBBICommon != MBBIECommon) && !countsAsInstruction(*MBBICommon)) ++MBBICommon; assert(MBBICommon != MBBIECommon && @@ -859,7 +896,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { } for (auto &MI : *MBB) { - if (MI.isDebugValue()) + if (!countsAsInstruction(MI)) continue; DebugLoc DL = MI.getDebugLoc(); for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { @@ -869,7 +906,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { auto &Pos = NextCommonInsts[i]; assert(Pos != SameTails[i].getBlock()->end() && "Reached BB end within common tail"); - while (Pos->isDebugValue()) { + while (!countsAsInstruction(*Pos)) { ++Pos; assert(Pos != SameTails[i].getBlock()->end() && "Reached BB end within common tail"); diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index 2d21fbeea395..73b399e44440 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" @@ -22,7 +23,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/CFIInstrInserter.cpp b/lib/CodeGen/CFIInstrInserter.cpp new file mode 100644 index 000000000000..5464ee443e0f --- /dev/null +++ b/lib/CodeGen/CFIInstrInserter.cpp @@ -0,0 +1,319 @@ +//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This pass verifies incoming and outgoing CFA information of basic +/// blocks. CFA information is information about offset and register set by CFI +/// directives, valid at the start and end of a basic block. This pass checks +/// that outgoing information of predecessors matches incoming information of +/// their successors. Then it checks if blocks have correct CFA calculation rule +/// set and inserts additional CFI instruction at their beginnings if they +/// don't. CFI instructions are inserted if basic blocks have incorrect offset +/// or register set by previous blocks, as a result of a non-linear layout of +/// blocks in a function. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +namespace { +class CFIInstrInserter : public MachineFunctionPass { + public: + static char ID; + + CFIInstrInserter() : MachineFunctionPass(ID) { + initializeCFIInstrInserterPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + + if (!MF.getMMI().hasDebugInfo() && + !MF.getFunction()->needsUnwindTableEntry()) + return false; + + MBBVector.resize(MF.getNumBlockIDs()); + calculateCFAInfo(MF); +#ifndef NDEBUG + unsigned ErrorNum = verify(MF); + if (ErrorNum) + report_fatal_error("Found " + Twine(ErrorNum) + + " in/out CFI information errors."); +#endif + bool insertedCFI = insertCFIInstrs(MF); + MBBVector.clear(); + return insertedCFI; + } + + private: + struct MBBCFAInfo { + MachineBasicBlock *MBB; + /// Value of cfa offset valid at basic block entry. + int IncomingCFAOffset = -1; + /// Value of cfa offset valid at basic block exit. + int OutgoingCFAOffset = -1; + /// Value of cfa register valid at basic block entry. + unsigned IncomingCFARegister = 0; + /// Value of cfa register valid at basic block exit. + unsigned OutgoingCFARegister = 0; + /// If in/out cfa offset and register values for this block have already + /// been set or not. + bool Processed = false; + }; + + /// Contains cfa offset and register values valid at entry and exit of basic + /// blocks. + SmallVector<struct MBBCFAInfo, 4> MBBVector; + + /// Calculate cfa offset and register values valid at entry and exit for all + /// basic blocks in a function. + void calculateCFAInfo(MachineFunction &MF); + /// Calculate cfa offset and register values valid at basic block exit by + /// checking the block for CFI instructions. Block's incoming CFA info remains + /// the same. + void calculateOutgoingCFAInfo(struct MBBCFAInfo &MBBInfo); + /// Update in/out cfa offset and register values for successors of the basic + /// block. + void updateSuccCFAInfo(struct MBBCFAInfo &MBBInfo); + + /// Check if incoming CFA information of a basic block matches outgoing CFA + /// information of the previous block. If it doesn't, insert CFI instruction + /// at the beginning of the block that corrects the CFA calculation rule for + /// that block. + bool insertCFIInstrs(MachineFunction &MF); + /// Return the cfa offset value that should be set at the beginning of a MBB + /// if needed. The negated value is needed when creating CFI instructions that + /// set absolute offset. + int getCorrectCFAOffset(MachineBasicBlock *MBB) { + return -MBBVector[MBB->getNumber()].IncomingCFAOffset; + } + + void report(const char *msg, MachineBasicBlock &MBB); + /// Go through each MBB in a function and check that outgoing offset and + /// register of its predecessors match incoming offset and register of that + /// MBB, as well as that incoming offset and register of its successors match + /// outgoing offset and register of the MBB. + unsigned verify(MachineFunction &MF); +}; +} + +char CFIInstrInserter::ID = 0; +INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter", + "Check CFA info and insert CFI instructions if needed", false, + false) +FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); } + +void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { + // Initial CFA offset value i.e. the one valid at the beginning of the + // function. + int InitialOffset = + MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF); + // Initial CFA register value i.e. the one valid at the beginning of the + // function. + unsigned InitialRegister = + MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF); + + // Initialize MBBMap. + for (MachineBasicBlock &MBB : MF) { + struct MBBCFAInfo MBBInfo; + MBBInfo.MBB = &MBB; + MBBInfo.IncomingCFAOffset = InitialOffset; + MBBInfo.OutgoingCFAOffset = InitialOffset; + MBBInfo.IncomingCFARegister = InitialRegister; + MBBInfo.OutgoingCFARegister = InitialRegister; + MBBVector[MBB.getNumber()] = MBBInfo; + } + + // Set in/out cfa info for all blocks in the function. This traversal is based + // on the assumption that the first block in the function is the entry block + // i.e. that it has initial cfa offset and register values as incoming CFA + // information. + for (MachineBasicBlock &MBB : MF) { + if (MBBVector[MBB.getNumber()].Processed) continue; + calculateOutgoingCFAInfo(MBBVector[MBB.getNumber()]); + updateSuccCFAInfo(MBBVector[MBB.getNumber()]); + } +} + +void CFIInstrInserter::calculateOutgoingCFAInfo(struct MBBCFAInfo &MBBInfo) { + // Outgoing cfa offset set by the block. + int SetOffset = MBBInfo.IncomingCFAOffset; + // Outgoing cfa register set by the block. + unsigned SetRegister = MBBInfo.IncomingCFARegister; + const std::vector<MCCFIInstruction> &Instrs = + MBBInfo.MBB->getParent()->getFrameInstructions(); + + // Determine cfa offset and register set by the block. + for (MachineInstr &MI : + make_range(MBBInfo.MBB->instr_begin(), MBBInfo.MBB->instr_end())) { + if (MI.isCFIInstruction()) { + unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); + const MCCFIInstruction &CFI = Instrs[CFIIndex]; + if (CFI.getOperation() == MCCFIInstruction::OpDefCfaRegister) { + SetRegister = CFI.getRegister(); + } else if (CFI.getOperation() == MCCFIInstruction::OpDefCfaOffset) { + SetOffset = CFI.getOffset(); + } else if (CFI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) { + SetOffset += CFI.getOffset(); + } else if (CFI.getOperation() == MCCFIInstruction::OpDefCfa) { + SetRegister = CFI.getRegister(); + SetOffset = CFI.getOffset(); + } + } + } + + MBBInfo.Processed = true; + + // Update outgoing CFA info. + MBBInfo.OutgoingCFAOffset = SetOffset; + MBBInfo.OutgoingCFARegister = SetRegister; +} + +void CFIInstrInserter::updateSuccCFAInfo(struct MBBCFAInfo &MBBInfo) { + + for (MachineBasicBlock *Succ : MBBInfo.MBB->successors()) { + struct MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()]; + if (SuccInfo.Processed) continue; + SuccInfo.IncomingCFAOffset = MBBInfo.OutgoingCFAOffset; + SuccInfo.IncomingCFARegister = MBBInfo.OutgoingCFARegister; + calculateOutgoingCFAInfo(SuccInfo); + updateSuccCFAInfo(SuccInfo); + } +} + +bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { + + const struct MBBCFAInfo *PrevMBBInfo = &MBBVector[MF.front().getNumber()]; + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + bool InsertedCFIInstr = false; + + for (MachineBasicBlock &MBB : MF) { + // Skip the first MBB in a function + if (MBB.getNumber() == MF.front().getNumber()) continue; + + const struct MBBCFAInfo& MBBInfo = MBBVector[MBB.getNumber()]; + auto MBBI = MBBInfo.MBB->begin(); + DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI); + + if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) { + // If both outgoing offset and register of a previous block don't match + // incoming offset and register of this block, add a def_cfa instruction + // with the correct offset and register for this block. + if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + // If outgoing offset of a previous block doesn't match incoming offset + // of this block, add a def_cfa_offset instruction with the correct + // offset for this block. + } else { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaOffset( + nullptr, getCorrectCFAOffset(&MBB))); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + InsertedCFIInstr = true; + // If outgoing register of a previous block doesn't match incoming + // register of this block, add a def_cfa_register instruction with the + // correct register for this block. + } else if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MBBInfo.IncomingCFARegister)); + BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + InsertedCFIInstr = true; + } + PrevMBBInfo = &MBBInfo; + } + return InsertedCFIInstr; +} + +void CFIInstrInserter::report(const char *msg, MachineBasicBlock &MBB) { + errs() << '\n'; + errs() << "*** " << msg << " ***\n" + << "- function: " << MBB.getParent()->getName() << "\n"; + errs() << "- basic block: BB#" << MBB.getNumber() << ' ' << MBB.getName() + << " (" << (const void *)&MBB << ')'; + errs() << '\n'; +} + +unsigned CFIInstrInserter::verify(MachineFunction &MF) { + unsigned ErrorNum = 0; + for (MachineBasicBlock &CurrMBB : MF) { + const struct MBBCFAInfo& CurrMBBInfo = MBBVector[CurrMBB.getNumber()]; + for (MachineBasicBlock *Pred : CurrMBB.predecessors()) { + const struct MBBCFAInfo& PredMBBInfo = MBBVector[Pred->getNumber()]; + // Check that outgoing offset values of predecessors match the incoming + // offset value of CurrMBB + if (PredMBBInfo.OutgoingCFAOffset != CurrMBBInfo.IncomingCFAOffset) { + report("The outgoing offset of a predecessor is inconsistent.", + CurrMBB); + errs() << "Predecessor BB#" << Pred->getNumber() + << " has outgoing offset (" << PredMBBInfo.OutgoingCFAOffset + << "), while BB#" << CurrMBB.getNumber() + << " has incoming offset (" << CurrMBBInfo.IncomingCFAOffset + << ").\n"; + ErrorNum++; + } + // Check that outgoing register values of predecessors match the incoming + // register value of CurrMBB + if (PredMBBInfo.OutgoingCFARegister != CurrMBBInfo.IncomingCFARegister) { + report("The outgoing register of a predecessor is inconsistent.", + CurrMBB); + errs() << "Predecessor BB#" << Pred->getNumber() + << " has outgoing register (" << PredMBBInfo.OutgoingCFARegister + << "), while BB#" << CurrMBB.getNumber() + << " has incoming register (" << CurrMBBInfo.IncomingCFARegister + << ").\n"; + ErrorNum++; + } + } + + for (MachineBasicBlock *Succ : CurrMBB.successors()) { + const struct MBBCFAInfo& SuccMBBInfo = MBBVector[Succ->getNumber()]; + // Check that incoming offset values of successors match the outgoing + // offset value of CurrMBB + if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset) { + report("The incoming offset of a successor is inconsistent.", CurrMBB); + errs() << "Successor BB#" << Succ->getNumber() + << " has incoming offset (" << SuccMBBInfo.IncomingCFAOffset + << "), while BB#" << CurrMBB.getNumber() + << " has outgoing offset (" << CurrMBBInfo.OutgoingCFAOffset + << ").\n"; + ErrorNum++; + } + // Check that incoming register values of successors match the outgoing + // register value of CurrMBB + if (SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) { + report("The incoming register of a successor is inconsistent.", + CurrMBB); + errs() << "Successor BB#" << Succ->getNumber() + << " has incoming register (" << SuccMBBInfo.IncomingCFARegister + << "), while BB#" << CurrMBB.getNumber() + << " has outgoing register (" << CurrMBBInfo.OutgoingCFARegister + << ").\n"; + ErrorNum++; + } + } + } + return ErrorNum; +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 7ec7fda4e445..1ae5aa80bf99 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMCodeGen BuiltinGCs.cpp CalcSpillWeights.cpp CallingConvLower.cpp + CFIInstrInserter.cpp CodeGen.cpp CodeGenPrepare.cpp CountingFunctionInserter.cpp @@ -21,6 +22,7 @@ add_llvm_library(LLVMCodeGen EdgeBundles.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp + ExpandMemCmp.cpp ExpandPostRAPseudos.cpp ExpandReductions.cpp FaultMaps.cpp @@ -113,6 +115,7 @@ add_llvm_library(LLVMCodeGen RegisterPressure.cpp RegisterScavenging.cpp RenameIndependentSubregs.cpp + MIRCanonicalizerPass.cpp RegisterUsageInfo.cpp RegUsageInfoCollector.cpp RegUsageInfoPropagate.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 588f1791ce3c..d4ac5fd040c3 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -16,10 +16,10 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index f4ccb4889d35..9d10d1b75f52 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -23,6 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); + initializeCFIInstrInserterPass(Registry); initializeCodeGenPreparePass(Registry); initializeCountingFunctionInserterPass(Registry); initializeDeadMachineInstructionElimPass(Registry); @@ -30,6 +31,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandISelPseudosPass(Registry); + initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); initializeFEntryInserterPass(Registry); initializeFinalizeMachineBundlesPass(Registry); @@ -99,6 +101,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeVirtRegRewriterPass(Registry); initializeWinEHPreparePass(Registry); initializeXRayInstrumentationPass(Registry); + initializeMIRCanonicalizerPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 51f2a320b299..d6633a508f5d 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -113,6 +113,12 @@ STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); +STATISTIC(NumMemoryInstsPhiCreated, + "Number of phis created when address " + "computations were sunk to memory instructions"); +STATISTIC(NumMemoryInstsSelectCreated, + "Number of select created when address " + "computations were sunk to memory instructions"); STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumAndsAdded, @@ -123,12 +129,6 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); -STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); -STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); -STATISTIC(NumMemCmpGreaterThanMax, - "Number of memcmp calls with size greater than max size"); -STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); - static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); @@ -189,10 +189,18 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true)); -static cl::opt<unsigned> MemCmpNumLoadsPerBlock( - "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), - cl::desc("The number of loads per basic block for inline expansion of " - "memcmp that is only being compared against zero.")); +static cl::opt<bool> DisableComplexAddrModes( + "disable-complex-addr-modes", cl::Hidden, cl::init(true), + cl::desc("Disables combining addressing modes with different parts " + "in optimizeMemoryInst.")); + +static cl::opt<bool> +AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), + cl::desc("Allow creation of Phis in Address sinking.")); + +static cl::opt<bool> +AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false), + cl::desc("Allow creation of selects in Address sinking.")); namespace { @@ -1182,6 +1190,7 @@ static bool SinkCast(CastInst *CI) { // If we removed all uses, nuke the cast. if (CI->use_empty()) { + salvageDebugInfo(*CI); CI->eraseFromParent(); MadeChange = true; } @@ -1697,699 +1706,6 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return true; } -namespace { - -// This class provides helper functions to expand a memcmp library call into an -// inline expansion. -class MemCmpExpansion { - struct ResultBlock { - BasicBlock *BB = nullptr; - PHINode *PhiSrc1 = nullptr; - PHINode *PhiSrc2 = nullptr; - - ResultBlock() = default; - }; - - CallInst *const CI; - ResultBlock ResBlock; - const uint64_t Size; - unsigned MaxLoadSize; - uint64_t NumLoadsNonOneByte; - const uint64_t NumLoadsPerBlock; - std::vector<BasicBlock *> LoadCmpBlocks; - BasicBlock *EndBlock; - PHINode *PhiRes; - const bool IsUsedForZeroCmp; - const DataLayout &DL; - IRBuilder<> Builder; - // Represents the decomposition in blocks of the expansion. For example, - // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and - // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. - // TODO(courbet): Involve the target more in this computation. On X86, 7 - // bytes can be done more efficiently with two overlaping 4-byte loads than - // covering the interval with [{4, 0},{2, 4},{1, 6}}. - struct LoadEntry { - LoadEntry(unsigned LoadSize, uint64_t Offset) - : LoadSize(LoadSize), Offset(Offset) { - assert(Offset % LoadSize == 0 && "invalid load entry"); - } - - uint64_t getGEPIndex() const { return Offset / LoadSize; } - - // The size of the load for this block, in bytes. - const unsigned LoadSize; - // The offset of this load WRT the base pointer, in bytes. - const uint64_t Offset; - }; - SmallVector<LoadEntry, 8> LoadSequence; - - void createLoadCmpBlocks(); - void createResultBlock(); - void setupResultBlockPHINodes(); - void setupEndBlockPHINodes(); - Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); - void emitLoadCompareBlock(unsigned BlockIndex); - void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, - unsigned &LoadIndex); - void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex); - void emitMemCmpResultBlock(); - Value *getMemCmpExpansionZeroCase(); - Value *getMemCmpEqZeroOneBlock(); - Value *getMemCmpOneBlock(); - - public: - MemCmpExpansion(CallInst *CI, uint64_t Size, - const TargetTransformInfo::MemCmpExpansionOptions &Options, - unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - unsigned NumLoadsPerBlock, const DataLayout &DL); - - unsigned getNumBlocks(); - uint64_t getNumLoads() const { return LoadSequence.size(); } - - Value *getMemCmpExpansion(); -}; - -} // end anonymous namespace - -// Initialize the basic block structure required for expansion of memcmp call -// with given maximum load size and memcmp size parameter. -// This structure includes: -// 1. A list of load compare blocks - LoadCmpBlocks. -// 2. An EndBlock, split from original instruction point, which is the block to -// return from. -// 3. ResultBlock, block to branch to for early exit when a -// LoadCmpBlock finds a difference. -MemCmpExpansion::MemCmpExpansion( - CallInst *const CI, uint64_t Size, - const TargetTransformInfo::MemCmpExpansionOptions &Options, - const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) - : CI(CI), - Size(Size), - MaxLoadSize(0), - NumLoadsNonOneByte(0), - NumLoadsPerBlock(NumLoadsPerBlock), - IsUsedForZeroCmp(IsUsedForZeroCmp), - DL(TheDataLayout), - Builder(CI) { - assert(Size > 0 && "zero blocks"); - // Scale the max size down if the target can load more bytes than we need. - size_t LoadSizeIndex = 0; - while (LoadSizeIndex < Options.LoadSizes.size() && - Options.LoadSizes[LoadSizeIndex] > Size) { - ++LoadSizeIndex; - } - this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex]; - // Compute the decomposition. - uint64_t CurSize = Size; - uint64_t Offset = 0; - while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) { - const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex]; - assert(LoadSize > 0 && "zero load size"); - const uint64_t NumLoadsForThisSize = CurSize / LoadSize; - if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { - // Do not expand if the total number of loads is larger than what the - // target allows. Note that it's important that we exit before completing - // the expansion to avoid using a ton of memory to store the expansion for - // large sizes. - LoadSequence.clear(); - return; - } - if (NumLoadsForThisSize > 0) { - for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { - LoadSequence.push_back({LoadSize, Offset}); - Offset += LoadSize; - } - if (LoadSize > 1) { - ++NumLoadsNonOneByte; - } - CurSize = CurSize % LoadSize; - } - ++LoadSizeIndex; - } - assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); -} - -unsigned MemCmpExpansion::getNumBlocks() { - if (IsUsedForZeroCmp) - return getNumLoads() / NumLoadsPerBlock + - (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); - return getNumLoads(); -} - -void MemCmpExpansion::createLoadCmpBlocks() { - for (unsigned i = 0; i < getNumBlocks(); i++) { - BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", - EndBlock->getParent(), EndBlock); - LoadCmpBlocks.push_back(BB); - } -} - -void MemCmpExpansion::createResultBlock() { - ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", - EndBlock->getParent(), EndBlock); -} - -// This function creates the IR instructions for loading and comparing 1 byte. -// It loads 1 byte from each source of the memcmp parameters with the given -// GEPIndex. It then subtracts the two loaded values and adds this result to the -// final phi node for selecting the memcmp result. -void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, - unsigned GEPIndex) { - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); - } - - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); - Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); - - PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); - - if (BlockIndex < (LoadCmpBlocks.size() - 1)) { - // Early exit branch if difference found to EndBlock. Otherwise, continue to - // next LoadCmpBlock, - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, - ConstantInt::get(Diff->getType(), 0)); - BranchInst *CmpBr = - BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); - Builder.Insert(CmpBr); - } else { - // The last block has an unconditional branch to EndBlock. - BranchInst *CmpBr = BranchInst::Create(EndBlock); - Builder.Insert(CmpBr); - } -} - -/// Generate an equality comparison for one or more pairs of loaded values. -/// This is used in the case where the memcmp() call is compared equal or not -/// equal to zero. -Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, - unsigned &LoadIndex) { - assert(LoadIndex < getNumLoads() && - "getCompareLoadPairs() called with no remaining loads"); - std::vector<Value *> XorList, OrList; - Value *Diff; - - const unsigned NumLoads = - std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); - - // For a single-block expansion, start inserting before the memcmp call. - if (LoadCmpBlocks.empty()) - Builder.SetInsertPoint(CI); - else - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - - Value *Cmp = nullptr; - // If we have multiple loads per block, we need to generate a composite - // comparison using xor+or. The type for the combinations is the largest load - // type. - IntegerType *const MaxLoadType = - NumLoads == 1 ? nullptr - : IntegerType::get(CI->getContext(), MaxLoadSize * 8); - for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { - const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; - - IntegerType *LoadSizeType = - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using a GEP. - if (CurLoadEntry.Offset != 0) { - Source1 = Builder.CreateGEP( - LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - Source2 = Builder.CreateGEP( - LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - } - - // Get a constant or load a value for each source address. - Value *LoadSrc1 = nullptr; - if (auto *Source1C = dyn_cast<Constant>(Source1)) - LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); - if (!LoadSrc1) - LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - - Value *LoadSrc2 = nullptr; - if (auto *Source2C = dyn_cast<Constant>(Source2)) - LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); - if (!LoadSrc2) - LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (NumLoads != 1) { - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } - // If we have multiple loads per block, we need to generate a composite - // comparison using xor+or. - Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); - Diff = Builder.CreateZExt(Diff, MaxLoadType); - XorList.push_back(Diff); - } else { - // If there's only one load per block, we just compare the loaded values. - Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); - } - } - - auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { - std::vector<Value *> OutList; - for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { - Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); - OutList.push_back(Or); - } - if (InList.size() % 2 != 0) - OutList.push_back(InList.back()); - return OutList; - }; - - if (!Cmp) { - // Pairwise OR the XOR results. - OrList = pairWiseOr(XorList); - - // Pairwise OR the OR results until one result left. - while (OrList.size() != 1) { - OrList = pairWiseOr(OrList); - } - Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); - } - - return Cmp; -} - -void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, - unsigned &LoadIndex) { - Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex); - - BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) - ? EndBlock - : LoadCmpBlocks[BlockIndex + 1]; - // Early exit branch if difference found to ResultBlock. Otherwise, - // continue to next LoadCmpBlock or EndBlock. - BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); - Builder.Insert(CmpBr); - - // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 - // since early exit to ResultBlock was not taken (no difference was found in - // any of the bytes). - if (BlockIndex == LoadCmpBlocks.size() - 1) { - Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); - PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); - } -} - -// This function creates the IR intructions for loading and comparing using the -// given LoadSize. It loads the number of bytes specified by LoadSize from each -// source of the memcmp parameters. It then does a subtract to see if there was -// a difference in the loaded values. If a difference is found, it branches -// with an early exit to the ResultBlock for calculating which source was -// larger. Otherwise, it falls through to the either the next LoadCmpBlock or -// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with -// a special case through emitLoadCompareByteBlock. The special handling can -// simply subtract the loaded values and add it to the result phi node. -void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { - // There is one load per block in this case, BlockIndex == LoadIndex. - const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; - - if (CurLoadEntry.LoadSize == 1) { - MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, - CurLoadEntry.getGEPIndex()); - return; - } - - Type *LoadSizeType = - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); - - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Get the base address using a GEP. - if (CurLoadEntry.Offset != 0) { - Source1 = Builder.CreateGEP( - LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - Source2 = Builder.CreateGEP( - LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); - } - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian()) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } - - // Add the loaded values to the phi nodes for calculating memcmp result only - // if result is not used in a zero equality. - if (!IsUsedForZeroCmp) { - ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); - ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); - } - - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); - BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) - ? EndBlock - : LoadCmpBlocks[BlockIndex + 1]; - // Early exit branch if difference found to ResultBlock. Otherwise, continue - // to next LoadCmpBlock or EndBlock. - BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); - Builder.Insert(CmpBr); - - // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 - // since early exit to ResultBlock was not taken (no difference was found in - // any of the bytes). - if (BlockIndex == LoadCmpBlocks.size() - 1) { - Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); - PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); - } -} - -// This function populates the ResultBlock with a sequence to calculate the -// memcmp result. It compares the two loaded source values and returns -1 if -// src1 < src2 and 1 if src1 > src2. -void MemCmpExpansion::emitMemCmpResultBlock() { - // Special case: if memcmp result is used in a zero equality, result does not - // need to be calculated and can simply return 1. - if (IsUsedForZeroCmp) { - BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); - Builder.SetInsertPoint(ResBlock.BB, InsertPt); - Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); - PhiRes->addIncoming(Res, ResBlock.BB); - BranchInst *NewBr = BranchInst::Create(EndBlock); - Builder.Insert(NewBr); - return; - } - BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); - Builder.SetInsertPoint(ResBlock.BB, InsertPt); - - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, - ResBlock.PhiSrc2); - - Value *Res = - Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), - ConstantInt::get(Builder.getInt32Ty(), 1)); - - BranchInst *NewBr = BranchInst::Create(EndBlock); - Builder.Insert(NewBr); - PhiRes->addIncoming(Res, ResBlock.BB); -} - -void MemCmpExpansion::setupResultBlockPHINodes() { - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - Builder.SetInsertPoint(ResBlock.BB); - // Note: this assumes one load per block. - ResBlock.PhiSrc1 = - Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1"); - ResBlock.PhiSrc2 = - Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2"); -} - -void MemCmpExpansion::setupEndBlockPHINodes() { - Builder.SetInsertPoint(&EndBlock->front()); - PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); -} - -Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { - unsigned LoadIndex = 0; - // This loop populates each of the LoadCmpBlocks with the IR sequence to - // handle multiple loads per block. - for (unsigned I = 0; I < getNumBlocks(); ++I) { - emitLoadCompareBlockMultipleLoads(I, LoadIndex); - } - - emitMemCmpResultBlock(); - return PhiRes; -} - -/// A memcmp expansion that compares equality with 0 and only has one block of -/// load and compare can bypass the compare, branch, and phi IR that is required -/// in the general case. -Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { - unsigned LoadIndex = 0; - Value *Cmp = getCompareLoadPairs(0, LoadIndex); - assert(LoadIndex == getNumLoads() && "some entries were not consumed"); - return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); -} - -/// A memcmp expansion that only has one block of load and compare can bypass -/// the compare, branch, and phi IR that is required in the general case. -Value *MemCmpExpansion::getMemCmpOneBlock() { - assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); - - Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian() && Size != 1) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - if (Size < 4) { - // The i8 and i16 cases don't need compares. We zext the loaded values and - // subtract them to get the suitable negative, zero, or positive i32 result. - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); - return Builder.CreateSub(LoadSrc1, LoadSrc2); - } - - // The result of memcmp is negative, zero, or positive, so produce that by - // subtracting 2 extended compare bits: sub (ugt, ult). - // If a target prefers to use selects to get -1/0/1, they should be able - // to transform this later. The inverse transform (going from selects to math) - // may not be possible in the DAG because the selects got converted into - // branches before we got there. - Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); - Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); - Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); - Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); - return Builder.CreateSub(ZextUGT, ZextULT); -} - -// This function expands the memcmp call into an inline expansion and returns -// the memcmp result. -Value *MemCmpExpansion::getMemCmpExpansion() { - // A memcmp with zero-comparison with only one block of load and compare does - // not need to set up any extra blocks. This case could be handled in the DAG, - // but since we have all of the machinery to flexibly expand any memcpy here, - // we choose to handle this case too to avoid fragmented lowering. - if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { - BasicBlock *StartBlock = CI->getParent(); - EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); - setupEndBlockPHINodes(); - createResultBlock(); - - // If return value of memcmp is not used in a zero equality, we need to - // calculate which source was larger. The calculation requires the - // two loaded source values of each load compare block. - // These will be saved in the phi nodes created by setupResultBlockPHINodes. - if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); - - // Create the number of required load compare basic blocks. - createLoadCmpBlocks(); - - // Update the terminator added by splitBasicBlock to branch to the first - // LoadCmpBlock. - StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); - } - - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - if (IsUsedForZeroCmp) - return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() - : getMemCmpExpansionZeroCase(); - - // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). - if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); - - for (unsigned I = 0; I < getNumBlocks(); ++I) { - emitLoadCompareBlock(I); - } - - emitMemCmpResultBlock(); - return PhiRes; -} - -// This function checks to see if an expansion of memcmp can be generated. -// It checks for constant compare size that is less than the max inline size. -// If an expansion cannot occur, returns false to leave as a library call. -// Otherwise, the library call is replaced with a new IR instruction sequence. -/// We want to transform: -/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) -/// To: -/// loadbb: -/// %0 = bitcast i32* %buffer2 to i8* -/// %1 = bitcast i32* %buffer1 to i8* -/// %2 = bitcast i8* %1 to i64* -/// %3 = bitcast i8* %0 to i64* -/// %4 = load i64, i64* %2 -/// %5 = load i64, i64* %3 -/// %6 = call i64 @llvm.bswap.i64(i64 %4) -/// %7 = call i64 @llvm.bswap.i64(i64 %5) -/// %8 = sub i64 %6, %7 -/// %9 = icmp ne i64 %8, 0 -/// br i1 %9, label %res_block, label %loadbb1 -/// res_block: ; preds = %loadbb2, -/// %loadbb1, %loadbb -/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] -/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] -/// %10 = icmp ult i64 %phi.src1, %phi.src2 -/// %11 = select i1 %10, i32 -1, i32 1 -/// br label %endblock -/// loadbb1: ; preds = %loadbb -/// %12 = bitcast i32* %buffer2 to i8* -/// %13 = bitcast i32* %buffer1 to i8* -/// %14 = bitcast i8* %13 to i32* -/// %15 = bitcast i8* %12 to i32* -/// %16 = getelementptr i32, i32* %14, i32 2 -/// %17 = getelementptr i32, i32* %15, i32 2 -/// %18 = load i32, i32* %16 -/// %19 = load i32, i32* %17 -/// %20 = call i32 @llvm.bswap.i32(i32 %18) -/// %21 = call i32 @llvm.bswap.i32(i32 %19) -/// %22 = zext i32 %20 to i64 -/// %23 = zext i32 %21 to i64 -/// %24 = sub i64 %22, %23 -/// %25 = icmp ne i64 %24, 0 -/// br i1 %25, label %res_block, label %loadbb2 -/// loadbb2: ; preds = %loadbb1 -/// %26 = bitcast i32* %buffer2 to i8* -/// %27 = bitcast i32* %buffer1 to i8* -/// %28 = bitcast i8* %27 to i16* -/// %29 = bitcast i8* %26 to i16* -/// %30 = getelementptr i16, i16* %28, i16 6 -/// %31 = getelementptr i16, i16* %29, i16 6 -/// %32 = load i16, i16* %30 -/// %33 = load i16, i16* %31 -/// %34 = call i16 @llvm.bswap.i16(i16 %32) -/// %35 = call i16 @llvm.bswap.i16(i16 %33) -/// %36 = zext i16 %34 to i64 -/// %37 = zext i16 %35 to i64 -/// %38 = sub i64 %36, %37 -/// %39 = icmp ne i64 %38, 0 -/// br i1 %39, label %res_block, label %loadbb3 -/// loadbb3: ; preds = %loadbb2 -/// %40 = bitcast i32* %buffer2 to i8* -/// %41 = bitcast i32* %buffer1 to i8* -/// %42 = getelementptr i8, i8* %41, i8 14 -/// %43 = getelementptr i8, i8* %40, i8 14 -/// %44 = load i8, i8* %42 -/// %45 = load i8, i8* %43 -/// %46 = zext i8 %44 to i32 -/// %47 = zext i8 %45 to i32 -/// %48 = sub i32 %46, %47 -/// br label %endblock -/// endblock: ; preds = %res_block, -/// %loadbb3 -/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] -/// ret i32 %phi.res -static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, - const TargetLowering *TLI, const DataLayout *DL) { - NumMemCmpCalls++; - - // Early exit from expansion if -Oz. - if (CI->getFunction()->optForMinSize()) - return false; - - // Early exit from expansion if size is not a constant. - ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!SizeCast) { - NumMemCmpNotConstant++; - return false; - } - const uint64_t SizeVal = SizeCast->getZExtValue(); - - if (SizeVal == 0) { - return false; - } - - // TTI call to check if target would like to expand memcmp. Also, get the - // available load sizes. - const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp); - if (!Options) return false; - - const unsigned MaxNumLoads = - TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); - - MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, - IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); - - // Don't expand if this will require more loads than desired by the target. - if (Expansion.getNumLoads() == 0) { - NumMemCmpGreaterThanMax++; - return false; - } - - NumMemCmpInlined++; - - Value *Res = Expansion.getMemCmpExpansion(); - - // Replace call with result of expansion and erase call. - CI->replaceAllUsesWith(Res); - CI->eraseFromParent(); - - return true; -} - bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { BasicBlock *BB = CI->getParent(); @@ -2542,12 +1858,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { return true; } - LibFunc Func; - if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) && - Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) { - ModifiedDT = true; - return true; - } return false; } @@ -3377,8 +2687,65 @@ private: Value *PromotedOperand) const; }; +/// \brief Keep track of simplification of Phi nodes. +/// Accept the set of all phi nodes and erase phi node from this set +/// if it is simplified. +class SimplificationTracker { + DenseMap<Value *, Value *> Storage; + const SimplifyQuery &SQ; + SmallPtrSetImpl<PHINode *> &AllPhiNodes; + SmallPtrSetImpl<SelectInst *> &AllSelectNodes; + +public: + SimplificationTracker(const SimplifyQuery &sq, + SmallPtrSetImpl<PHINode *> &APN, + SmallPtrSetImpl<SelectInst *> &ASN) + : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {} + + Value *Get(Value *V) { + do { + auto SV = Storage.find(V); + if (SV == Storage.end()) + return V; + V = SV->second; + } while (true); + } + + Value *Simplify(Value *Val) { + SmallVector<Value *, 32> WorkList; + SmallPtrSet<Value *, 32> Visited; + WorkList.push_back(Val); + while (!WorkList.empty()) { + auto P = WorkList.pop_back_val(); + if (!Visited.insert(P).second) + continue; + if (auto *PI = dyn_cast<Instruction>(P)) + if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) { + for (auto *U : PI->users()) + WorkList.push_back(cast<Value>(U)); + Put(PI, V); + PI->replaceAllUsesWith(V); + if (auto *PHI = dyn_cast<PHINode>(PI)) + AllPhiNodes.erase(PHI); + if (auto *Select = dyn_cast<SelectInst>(PI)) + AllSelectNodes.erase(Select); + PI->eraseFromParent(); + } + } + return Get(Val); + } + + void Put(Value *From, Value *To) { + Storage.insert({ From, To }); + } +}; + /// \brief A helper class for combining addressing modes. class AddressingModeCombiner { + typedef std::pair<Value *, BasicBlock *> ValueInBB; + typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping; + typedef std::pair<PHINode *, PHINode *> PHIPair; + private: /// The addressing modes we've collected. SmallVector<ExtAddrMode, 16> AddrModes; @@ -3389,7 +2756,19 @@ private: /// Are the AddrModes that we have all just equal to their original values? bool AllAddrModesTrivial = true; + /// Common Type for all different fields in addressing modes. + Type *CommonType; + + /// SimplifyQuery for simplifyInstruction utility. + const SimplifyQuery &SQ; + + /// Original Address. + ValueInBB Original; + public: + AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue) + : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} + /// \brief Get the combined AddrMode const ExtAddrMode &getAddrMode() const { return AddrModes[0]; @@ -3457,12 +2836,356 @@ public: if (AllAddrModesTrivial) return false; - // TODO: Combine multiple AddrModes by inserting a select or phi for the - // field in which the AddrModes differ. - return false; + if (DisableComplexAddrModes) + return false; + + // For now we support only different base registers. + // TODO: enable others. + if (DifferentField != ExtAddrMode::BaseRegField) + return false; + + // Build a map between <original value, basic block where we saw it> to + // value of base register. + FoldAddrToValueMapping Map; + initializeMap(Map); + + Value *CommonValue = findCommon(Map); + if (CommonValue) + AddrModes[0].BaseReg = CommonValue; + return CommonValue != nullptr; + } + +private: + /// \brief Initialize Map with anchor values. For address seen in some BB + /// we set the value of different field saw in this address. + /// If address is not an instruction than basic block is set to null. + /// At the same time we find a common type for different field we will + /// use to create new Phi/Select nodes. Keep it in CommonType field. + void initializeMap(FoldAddrToValueMapping &Map) { + // Keep track of keys where the value is null. We will need to replace it + // with constant null when we know the common type. + SmallVector<ValueInBB, 2> NullValue; + for (auto &AM : AddrModes) { + BasicBlock *BB = nullptr; + if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue)) + BB = I->getParent(); + + // For now we support only base register as different field. + // TODO: Enable others. + Value *DV = AM.BaseReg; + if (DV) { + if (CommonType) + assert(CommonType == DV->getType() && "Different types detected!"); + else + CommonType = DV->getType(); + Map[{ AM.OriginalValue, BB }] = DV; + } else { + NullValue.push_back({ AM.OriginalValue, BB }); + } + } + assert(CommonType && "At least one non-null value must be!"); + for (auto VIBB : NullValue) + Map[VIBB] = Constant::getNullValue(CommonType); + } + + /// \brief We have mapping between value A and basic block where value A + /// seen to other value B where B was a field in addressing mode represented + /// by A. Also we have an original value C representin an address in some + /// basic block. Traversing from C through phi and selects we ended up with + /// A's in a map. This utility function tries to find a value V which is a + /// field in addressing mode C and traversing through phi nodes and selects + /// we will end up in corresponded values B in a map. + /// The utility will create a new Phi/Selects if needed. + // The simple example looks as follows: + // BB1: + // p1 = b1 + 40 + // br cond BB2, BB3 + // BB2: + // p2 = b2 + 40 + // br BB3 + // BB3: + // p = phi [p1, BB1], [p2, BB2] + // v = load p + // Map is + // <p1, BB1> -> b1 + // <p2, BB2> -> b2 + // Request is + // <p, BB3> -> ? + // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3 + Value *findCommon(FoldAddrToValueMapping &Map) { + // Tracks of new created Phi nodes. + SmallPtrSet<PHINode *, 32> NewPhiNodes; + // Tracks of new created Select nodes. + SmallPtrSet<SelectInst *, 32> NewSelectNodes; + // Tracks the simplification of new created phi nodes. The reason we use + // this mapping is because we will add new created Phi nodes in AddrToBase. + // Simplification of Phi nodes is recursive, so some Phi node may + // be simplified after we added it to AddrToBase. + // Using this mapping we can find the current value in AddrToBase. + SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes); + + // First step, DFS to create PHI nodes for all intermediate blocks. + // Also fill traverse order for the second step. + SmallVector<ValueInBB, 32> TraverseOrder; + InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes); + + // Second Step, fill new nodes by merged values and simplify if possible. + FillPlaceholders(Map, TraverseOrder, ST); + + if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) { + DestroyNodes(NewPhiNodes); + DestroyNodes(NewSelectNodes); + return nullptr; + } + + // Now we'd like to match New Phi nodes to existed ones. + unsigned PhiNotMatchedCount = 0; + if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) { + DestroyNodes(NewPhiNodes); + DestroyNodes(NewSelectNodes); + return nullptr; + } + + auto *Result = ST.Get(Map.find(Original)->second); + if (Result) { + NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount; + NumMemoryInstsSelectCreated += NewSelectNodes.size(); + } + return Result; + } + + /// \brief Destroy nodes from a set. + template <typename T> void DestroyNodes(SmallPtrSetImpl<T *> &Instructions) { + // For safe erasing, replace the Phi with dummy value first. + auto Dummy = UndefValue::get(CommonType); + for (auto I : Instructions) { + I->replaceAllUsesWith(Dummy); + I->eraseFromParent(); + } + } + + /// \brief Try to match PHI node to Candidate. + /// Matcher tracks the matched Phi nodes. + bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, + DenseSet<PHIPair> &Matcher, + SmallPtrSetImpl<PHINode *> &PhiNodesToMatch) { + SmallVector<PHIPair, 8> WorkList; + Matcher.insert({ PHI, Candidate }); + WorkList.push_back({ PHI, Candidate }); + SmallSet<PHIPair, 8> Visited; + while (!WorkList.empty()) { + auto Item = WorkList.pop_back_val(); + if (!Visited.insert(Item).second) + continue; + // We iterate over all incoming values to Phi to compare them. + // If values are different and both of them Phi and the first one is a + // Phi we added (subject to match) and both of them is in the same basic + // block then we can match our pair if values match. So we state that + // these values match and add it to work list to verify that. + for (auto B : Item.first->blocks()) { + Value *FirstValue = Item.first->getIncomingValueForBlock(B); + Value *SecondValue = Item.second->getIncomingValueForBlock(B); + if (FirstValue == SecondValue) + continue; + + PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue); + PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue); + + // One of them is not Phi or + // The first one is not Phi node from the set we'd like to match or + // Phi nodes from different basic blocks then + // we will not be able to match. + if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) || + FirstPhi->getParent() != SecondPhi->getParent()) + return false; + + // If we already matched them then continue. + if (Matcher.count({ FirstPhi, SecondPhi })) + continue; + // So the values are different and does not match. So we need them to + // match. + Matcher.insert({ FirstPhi, SecondPhi }); + // But me must check it. + WorkList.push_back({ FirstPhi, SecondPhi }); + } + } + return true; } -}; + /// \brief For the given set of PHI nodes try to find their equivalents. + /// Returns false if this matching fails and creation of new Phi is disabled. + bool MatchPhiSet(SmallPtrSetImpl<PHINode *> &PhiNodesToMatch, + SimplificationTracker &ST, bool AllowNewPhiNodes, + unsigned &PhiNotMatchedCount) { + DenseSet<PHIPair> Matched; + SmallPtrSet<PHINode *, 8> WillNotMatch; + while (PhiNodesToMatch.size()) { + PHINode *PHI = *PhiNodesToMatch.begin(); + + // Add us, if no Phi nodes in the basic block we do not match. + WillNotMatch.clear(); + WillNotMatch.insert(PHI); + + // Traverse all Phis until we found equivalent or fail to do that. + bool IsMatched = false; + for (auto &P : PHI->getParent()->phis()) { + if (&P == PHI) + continue; + if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) + break; + // If it does not match, collect all Phi nodes from matcher. + // if we end up with no match, them all these Phi nodes will not match + // later. + for (auto M : Matched) + WillNotMatch.insert(M.first); + Matched.clear(); + } + if (IsMatched) { + // Replace all matched values and erase them. + for (auto MV : Matched) { + MV.first->replaceAllUsesWith(MV.second); + PhiNodesToMatch.erase(MV.first); + ST.Put(MV.first, MV.second); + MV.first->eraseFromParent(); + } + Matched.clear(); + continue; + } + // If we are not allowed to create new nodes then bail out. + if (!AllowNewPhiNodes) + return false; + // Just remove all seen values in matcher. They will not match anything. + PhiNotMatchedCount += WillNotMatch.size(); + for (auto *P : WillNotMatch) + PhiNodesToMatch.erase(P); + } + return true; + } + /// \brief Fill the placeholder with values from predecessors and simplify it. + void FillPlaceholders(FoldAddrToValueMapping &Map, + SmallVectorImpl<ValueInBB> &TraverseOrder, + SimplificationTracker &ST) { + while (!TraverseOrder.empty()) { + auto Current = TraverseOrder.pop_back_val(); + assert(Map.find(Current) != Map.end() && "No node to fill!!!"); + Value *CurrentValue = Current.first; + BasicBlock *CurrentBlock = Current.second; + Value *V = Map[Current]; + + if (SelectInst *Select = dyn_cast<SelectInst>(V)) { + // CurrentValue also must be Select. + auto *CurrentSelect = cast<SelectInst>(CurrentValue); + auto *TrueValue = CurrentSelect->getTrueValue(); + ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue) + ? CurrentBlock + : nullptr }; + assert(Map.find(TrueItem) != Map.end() && "No True Value!"); + Select->setTrueValue(Map[TrueItem]); + auto *FalseValue = CurrentSelect->getFalseValue(); + ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue) + ? CurrentBlock + : nullptr }; + assert(Map.find(FalseItem) != Map.end() && "No False Value!"); + Select->setFalseValue(Map[FalseItem]); + } else { + // Must be a Phi node then. + PHINode *PHI = cast<PHINode>(V); + // Fill the Phi node with values from predecessors. + bool IsDefinedInThisBB = + cast<Instruction>(CurrentValue)->getParent() == CurrentBlock; + auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue); + for (auto B : predecessors(CurrentBlock)) { + Value *PV = IsDefinedInThisBB + ? CurrentPhi->getIncomingValueForBlock(B) + : CurrentValue; + ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr }; + assert(Map.find(item) != Map.end() && "No predecessor Value!"); + PHI->addIncoming(ST.Get(Map[item]), B); + } + } + // Simplify if possible. + Map[Current] = ST.Simplify(V); + } + } + + /// Starting from value recursively iterates over predecessors up to known + /// ending values represented in a map. For each traversed block inserts + /// a placeholder Phi or Select. + /// Reports all new created Phi/Select nodes by adding them to set. + /// Also reports and order in what basic blocks have been traversed. + void InsertPlaceholders(FoldAddrToValueMapping &Map, + SmallVectorImpl<ValueInBB> &TraverseOrder, + SmallPtrSetImpl<PHINode *> &NewPhiNodes, + SmallPtrSetImpl<SelectInst *> &NewSelectNodes) { + SmallVector<ValueInBB, 32> Worklist; + assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) && + "Address must be a Phi or Select node"); + auto *Dummy = UndefValue::get(CommonType); + Worklist.push_back(Original); + while (!Worklist.empty()) { + auto Current = Worklist.pop_back_val(); + // If value is not an instruction it is something global, constant, + // parameter and we can say that this value is observable in any block. + // Set block to null to denote it. + // Also please take into account that it is how we build anchors. + if (!isa<Instruction>(Current.first)) + Current.second = nullptr; + // if it is already visited or it is an ending value then skip it. + if (Map.find(Current) != Map.end()) + continue; + TraverseOrder.push_back(Current); + + Value *CurrentValue = Current.first; + BasicBlock *CurrentBlock = Current.second; + // CurrentValue must be a Phi node or select. All others must be covered + // by anchors. + Instruction *CurrentI = cast<Instruction>(CurrentValue); + bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock; + + unsigned PredCount = + std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock)); + // if Current Value is not defined in this basic block we are interested + // in values in predecessors. + if (!IsDefinedInThisBB) { + assert(PredCount && "Unreachable block?!"); + PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", + &CurrentBlock->front()); + Map[Current] = PHI; + NewPhiNodes.insert(PHI); + // Add all predecessors in work list. + for (auto B : predecessors(CurrentBlock)) + Worklist.push_back({ CurrentValue, B }); + continue; + } + // Value is defined in this basic block. + if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) { + // Is it OK to get metadata from OrigSelect?! + // Create a Select placeholder with dummy value. + SelectInst *Select = + SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy, + OrigSelect->getName(), OrigSelect, OrigSelect); + Map[Current] = Select; + NewSelectNodes.insert(Select); + // We are interested in True and False value in this basic block. + Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock }); + Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock }); + } else { + // It must be a Phi node then. + auto *CurrentPhi = cast<PHINode>(CurrentI); + // Create new Phi node for merge of bases. + assert(PredCount && "Unreachable block?!"); + PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi", + &CurrentBlock->front()); + Map[Current] = PHI; + NewPhiNodes.insert(PHI); + + // Add all predecessors in work list. + for (auto B : predecessors(CurrentBlock)) + Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B }); + } + } + } +}; } // end anonymous namespace /// Try adding ScaleReg*Scale to the current addressing mode. @@ -4555,7 +4278,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the graph are compatible. bool PhiOrSelectSeen = false; SmallVector<Instruction*, 16> AddrModeInsts; - AddressingModeCombiner AddrModes; + const SimplifyQuery SQ(*DL, TLInfo); + AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() }); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); @@ -6715,7 +6439,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { Instruction *Insn = &*BI++; DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); // Leave dbg.values that refer to an alloca alone. These - // instrinsics describe the address of a variable (= the alloca) + // intrinsics describe the address of a variable (= the alloca) // being taken. They should not be moved next to the alloca // (and to the beginning of the scope), but rather stay close to // where said address is used. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index a791c01c48b3..9ef172274c16 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -26,11 +26,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index cf21316ec22d..0123afb4cd82 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -29,12 +29,12 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 91d18e2bcaa6..ef1452087f21 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -15,10 +15,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index ab9a0592e017..2613471714ef 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -34,12 +34,12 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 402afe75b141..cb6c3ae04c72 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -30,10 +30,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index e272d25047e6..28f716ee6c27 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -15,10 +15,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp new file mode 100644 index 000000000000..c5910c18d89b --- /dev/null +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -0,0 +1,828 @@ +//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to partially inline the fast path of well-known library +// functions, such as using square-root instructions for cases where sqrt() +// does not need to set errno. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "expandmemcmp" + +STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); +STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); +STATISTIC(NumMemCmpGreaterThanMax, + "Number of memcmp calls with size greater than max size"); +STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); + +static cl::opt<unsigned> MemCmpNumLoadsPerBlock( + "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), + cl::desc("The number of loads per basic block for inline expansion of " + "memcmp that is only being compared against zero.")); + +namespace { + + +// This class provides helper functions to expand a memcmp library call into an +// inline expansion. +class MemCmpExpansion { + struct ResultBlock { + BasicBlock *BB = nullptr; + PHINode *PhiSrc1 = nullptr; + PHINode *PhiSrc2 = nullptr; + + ResultBlock() = default; + }; + + CallInst *const CI; + ResultBlock ResBlock; + const uint64_t Size; + unsigned MaxLoadSize; + uint64_t NumLoadsNonOneByte; + const uint64_t NumLoadsPerBlock; + std::vector<BasicBlock *> LoadCmpBlocks; + BasicBlock *EndBlock; + PHINode *PhiRes; + const bool IsUsedForZeroCmp; + const DataLayout &DL; + IRBuilder<> Builder; + // Represents the decomposition in blocks of the expansion. For example, + // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and + // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. + // TODO(courbet): Involve the target more in this computation. On X86, 7 + // bytes can be done more efficiently with two overlaping 4-byte loads than + // covering the interval with [{4, 0},{2, 4},{1, 6}}. + struct LoadEntry { + LoadEntry(unsigned LoadSize, uint64_t Offset) + : LoadSize(LoadSize), Offset(Offset) { + assert(Offset % LoadSize == 0 && "invalid load entry"); + } + + uint64_t getGEPIndex() const { return Offset / LoadSize; } + + // The size of the load for this block, in bytes. + const unsigned LoadSize; + // The offset of this load WRT the base pointer, in bytes. + const uint64_t Offset; + }; + SmallVector<LoadEntry, 8> LoadSequence; + + void createLoadCmpBlocks(); + void createResultBlock(); + void setupResultBlockPHINodes(); + void setupEndBlockPHINodes(); + Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); + void emitLoadCompareBlock(unsigned BlockIndex); + void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, + unsigned &LoadIndex); + void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex); + void emitMemCmpResultBlock(); + Value *getMemCmpExpansionZeroCase(); + Value *getMemCmpEqZeroOneBlock(); + Value *getMemCmpOneBlock(); + + public: + MemCmpExpansion(CallInst *CI, uint64_t Size, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + unsigned MaxNumLoads, const bool IsUsedForZeroCmp, + unsigned NumLoadsPerBlock, const DataLayout &DL); + + unsigned getNumBlocks(); + uint64_t getNumLoads() const { return LoadSequence.size(); } + + Value *getMemCmpExpansion(); +}; + +// Initialize the basic block structure required for expansion of memcmp call +// with given maximum load size and memcmp size parameter. +// This structure includes: +// 1. A list of load compare blocks - LoadCmpBlocks. +// 2. An EndBlock, split from original instruction point, which is the block to +// return from. +// 3. ResultBlock, block to branch to for early exit when a +// LoadCmpBlock finds a difference. +MemCmpExpansion::MemCmpExpansion( + CallInst *const CI, uint64_t Size, + const TargetTransformInfo::MemCmpExpansionOptions &Options, + const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, + const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) + : CI(CI), + Size(Size), + MaxLoadSize(0), + NumLoadsNonOneByte(0), + NumLoadsPerBlock(NumLoadsPerBlock), + IsUsedForZeroCmp(IsUsedForZeroCmp), + DL(TheDataLayout), + Builder(CI) { + assert(Size > 0 && "zero blocks"); + // Scale the max size down if the target can load more bytes than we need. + size_t LoadSizeIndex = 0; + while (LoadSizeIndex < Options.LoadSizes.size() && + Options.LoadSizes[LoadSizeIndex] > Size) { + ++LoadSizeIndex; + } + this->MaxLoadSize = Options.LoadSizes[LoadSizeIndex]; + // Compute the decomposition. + uint64_t CurSize = Size; + uint64_t Offset = 0; + while (CurSize && LoadSizeIndex < Options.LoadSizes.size()) { + const unsigned LoadSize = Options.LoadSizes[LoadSizeIndex]; + assert(LoadSize > 0 && "zero load size"); + const uint64_t NumLoadsForThisSize = CurSize / LoadSize; + if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) { + // Do not expand if the total number of loads is larger than what the + // target allows. Note that it's important that we exit before completing + // the expansion to avoid using a ton of memory to store the expansion for + // large sizes. + LoadSequence.clear(); + return; + } + if (NumLoadsForThisSize > 0) { + for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) { + LoadSequence.push_back({LoadSize, Offset}); + Offset += LoadSize; + } + if (LoadSize > 1) { + ++NumLoadsNonOneByte; + } + CurSize = CurSize % LoadSize; + } + ++LoadSizeIndex; + } + assert(LoadSequence.size() <= MaxNumLoads && "broken invariant"); +} + +unsigned MemCmpExpansion::getNumBlocks() { + if (IsUsedForZeroCmp) + return getNumLoads() / NumLoadsPerBlock + + (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); + return getNumLoads(); +} + +void MemCmpExpansion::createLoadCmpBlocks() { + for (unsigned i = 0; i < getNumBlocks(); i++) { + BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", + EndBlock->getParent(), EndBlock); + LoadCmpBlocks.push_back(BB); + } +} + +void MemCmpExpansion::createResultBlock() { + ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", + EndBlock->getParent(), EndBlock); +} + +// This function creates the IR instructions for loading and comparing 1 byte. +// It loads 1 byte from each source of the memcmp parameters with the given +// GEPIndex. It then subtracts the two loaded values and adds this result to the +// final phi node for selecting the memcmp result. +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, + unsigned GEPIndex) { + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); + Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + + PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); + + if (BlockIndex < (LoadCmpBlocks.size() - 1)) { + // Early exit branch if difference found to EndBlock. Otherwise, continue to + // next LoadCmpBlock, + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, + ConstantInt::get(Diff->getType(), 0)); + BranchInst *CmpBr = + BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); + Builder.Insert(CmpBr); + } else { + // The last block has an unconditional branch to EndBlock. + BranchInst *CmpBr = BranchInst::Create(EndBlock); + Builder.Insert(CmpBr); + } +} + +/// Generate an equality comparison for one or more pairs of loaded values. +/// This is used in the case where the memcmp() call is compared equal or not +/// equal to zero. +Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, + unsigned &LoadIndex) { + assert(LoadIndex < getNumLoads() && + "getCompareLoadPairs() called with no remaining loads"); + std::vector<Value *> XorList, OrList; + Value *Diff; + + const unsigned NumLoads = + std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); + + // For a single-block expansion, start inserting before the memcmp call. + if (LoadCmpBlocks.empty()) + Builder.SetInsertPoint(CI); + else + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + + Value *Cmp = nullptr; + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. The type for the combinations is the largest load + // type. + IntegerType *const MaxLoadType = + NumLoads == 1 ? nullptr + : IntegerType::get(CI->getContext(), MaxLoadSize * 8); + for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { + const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; + + IntegerType *LoadSizeType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using a GEP. + if (CurLoadEntry.Offset != 0) { + Source1 = Builder.CreateGEP( + LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + Source2 = Builder.CreateGEP( + LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + } + + // Get a constant or load a value for each source address. + Value *LoadSrc1 = nullptr; + if (auto *Source1C = dyn_cast<Constant>(Source1)) + LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); + if (!LoadSrc1) + LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + + Value *LoadSrc2 = nullptr; + if (auto *Source2C = dyn_cast<Constant>(Source2)) + LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); + if (!LoadSrc2) + LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (NumLoads != 1) { + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); + } + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. + Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateZExt(Diff, MaxLoadType); + XorList.push_back(Diff); + } else { + // If there's only one load per block, we just compare the loaded values. + Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + } + } + + auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { + std::vector<Value *> OutList; + for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { + Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); + OutList.push_back(Or); + } + if (InList.size() % 2 != 0) + OutList.push_back(InList.back()); + return OutList; + }; + + if (!Cmp) { + // Pairwise OR the XOR results. + OrList = pairWiseOr(XorList); + + // Pairwise OR the OR results until one result left. + while (OrList.size() != 1) { + OrList = pairWiseOr(OrList); + } + Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); + } + + return Cmp; +} + +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, + unsigned &LoadIndex) { + Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex); + + BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[BlockIndex + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, + // continue to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (BlockIndex == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); + } +} + +// This function creates the IR intructions for loading and comparing using the +// given LoadSize. It loads the number of bytes specified by LoadSize from each +// source of the memcmp parameters. It then does a subtract to see if there was +// a difference in the loaded values. If a difference is found, it branches +// with an early exit to the ResultBlock for calculating which source was +// larger. Otherwise, it falls through to the either the next LoadCmpBlock or +// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with +// a special case through emitLoadCompareByteBlock. The special handling can +// simply subtract the loaded values and add it to the result phi node. +void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { + // There is one load per block in this case, BlockIndex == LoadIndex. + const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; + + if (CurLoadEntry.LoadSize == 1) { + MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, + CurLoadEntry.getGEPIndex()); + return; + } + + Type *LoadSizeType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using a GEP. + if (CurLoadEntry.Offset != 0) { + Source1 = Builder.CreateGEP( + LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + Source2 = Builder.CreateGEP( + LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + } + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian()) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); + } + + // Add the loaded values to the phi nodes for calculating memcmp result only + // if result is not used in a zero equality. + if (!IsUsedForZeroCmp) { + ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); + } + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); + BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[BlockIndex + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, continue + // to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (BlockIndex == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); + } +} + +// This function populates the ResultBlock with a sequence to calculate the +// memcmp result. It compares the two loaded source values and returns -1 if +// src1 < src2 and 1 if src1 > src2. +void MemCmpExpansion::emitMemCmpResultBlock() { + // Special case: if memcmp result is used in a zero equality, result does not + // need to be calculated and can simply return 1. + if (IsUsedForZeroCmp) { + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); + PhiRes->addIncoming(Res, ResBlock.BB); + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + return; + } + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, + ResBlock.PhiSrc2); + + Value *Res = + Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), + ConstantInt::get(Builder.getInt32Ty(), 1)); + + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + PhiRes->addIncoming(Res, ResBlock.BB); +} + +void MemCmpExpansion::setupResultBlockPHINodes() { + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + Builder.SetInsertPoint(ResBlock.BB); + // Note: this assumes one load per block. + ResBlock.PhiSrc1 = + Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1"); + ResBlock.PhiSrc2 = + Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2"); +} + +void MemCmpExpansion::setupEndBlockPHINodes() { + Builder.SetInsertPoint(&EndBlock->front()); + PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); +} + +Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { + unsigned LoadIndex = 0; + // This loop populates each of the LoadCmpBlocks with the IR sequence to + // handle multiple loads per block. + for (unsigned I = 0; I < getNumBlocks(); ++I) { + emitLoadCompareBlockMultipleLoads(I, LoadIndex); + } + + emitMemCmpResultBlock(); + return PhiRes; +} + +/// A memcmp expansion that compares equality with 0 and only has one block of +/// load and compare can bypass the compare, branch, and phi IR that is required +/// in the general case. +Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { + unsigned LoadIndex = 0; + Value *Cmp = getCompareLoadPairs(0, LoadIndex); + assert(LoadIndex == getNumLoads() && "some entries were not consumed"); + return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); +} + +/// A memcmp expansion that only has one block of load and compare can bypass +/// the compare, branch, and phi IR that is required in the general case. +Value *MemCmpExpansion::getMemCmpOneBlock() { + assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); + + Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian() && Size != 1) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (Size < 4) { + // The i8 and i16 cases don't need compares. We zext the loaded values and + // subtract them to get the suitable negative, zero, or positive i32 result. + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); + return Builder.CreateSub(LoadSrc1, LoadSrc2); + } + + // The result of memcmp is negative, zero, or positive, so produce that by + // subtracting 2 extended compare bits: sub (ugt, ult). + // If a target prefers to use selects to get -1/0/1, they should be able + // to transform this later. The inverse transform (going from selects to math) + // may not be possible in the DAG because the selects got converted into + // branches before we got there. + Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); + Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); + Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); + return Builder.CreateSub(ZextUGT, ZextULT); +} + +// This function expands the memcmp call into an inline expansion and returns +// the memcmp result. +Value *MemCmpExpansion::getMemCmpExpansion() { + // A memcmp with zero-comparison with only one block of load and compare does + // not need to set up any extra blocks. This case could be handled in the DAG, + // but since we have all of the machinery to flexibly expand any memcpy here, + // we choose to handle this case too to avoid fragmented lowering. + if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + createResultBlock(); + + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); + + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + } + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + if (IsUsedForZeroCmp) + return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() + : getMemCmpExpansionZeroCase(); + + // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). + if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); + + for (unsigned I = 0; I < getNumBlocks(); ++I) { + emitLoadCompareBlock(I); + } + + emitMemCmpResultBlock(); + return PhiRes; +} + +// This function checks to see if an expansion of memcmp can be generated. +// It checks for constant compare size that is less than the max inline size. +// If an expansion cannot occur, returns false to leave as a library call. +// Otherwise, the library call is replaced with a new IR instruction sequence. +/// We want to transform: +/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) +/// To: +/// loadbb: +/// %0 = bitcast i32* %buffer2 to i8* +/// %1 = bitcast i32* %buffer1 to i8* +/// %2 = bitcast i8* %1 to i64* +/// %3 = bitcast i8* %0 to i64* +/// %4 = load i64, i64* %2 +/// %5 = load i64, i64* %3 +/// %6 = call i64 @llvm.bswap.i64(i64 %4) +/// %7 = call i64 @llvm.bswap.i64(i64 %5) +/// %8 = sub i64 %6, %7 +/// %9 = icmp ne i64 %8, 0 +/// br i1 %9, label %res_block, label %loadbb1 +/// res_block: ; preds = %loadbb2, +/// %loadbb1, %loadbb +/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] +/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] +/// %10 = icmp ult i64 %phi.src1, %phi.src2 +/// %11 = select i1 %10, i32 -1, i32 1 +/// br label %endblock +/// loadbb1: ; preds = %loadbb +/// %12 = bitcast i32* %buffer2 to i8* +/// %13 = bitcast i32* %buffer1 to i8* +/// %14 = bitcast i8* %13 to i32* +/// %15 = bitcast i8* %12 to i32* +/// %16 = getelementptr i32, i32* %14, i32 2 +/// %17 = getelementptr i32, i32* %15, i32 2 +/// %18 = load i32, i32* %16 +/// %19 = load i32, i32* %17 +/// %20 = call i32 @llvm.bswap.i32(i32 %18) +/// %21 = call i32 @llvm.bswap.i32(i32 %19) +/// %22 = zext i32 %20 to i64 +/// %23 = zext i32 %21 to i64 +/// %24 = sub i64 %22, %23 +/// %25 = icmp ne i64 %24, 0 +/// br i1 %25, label %res_block, label %loadbb2 +/// loadbb2: ; preds = %loadbb1 +/// %26 = bitcast i32* %buffer2 to i8* +/// %27 = bitcast i32* %buffer1 to i8* +/// %28 = bitcast i8* %27 to i16* +/// %29 = bitcast i8* %26 to i16* +/// %30 = getelementptr i16, i16* %28, i16 6 +/// %31 = getelementptr i16, i16* %29, i16 6 +/// %32 = load i16, i16* %30 +/// %33 = load i16, i16* %31 +/// %34 = call i16 @llvm.bswap.i16(i16 %32) +/// %35 = call i16 @llvm.bswap.i16(i16 %33) +/// %36 = zext i16 %34 to i64 +/// %37 = zext i16 %35 to i64 +/// %38 = sub i64 %36, %37 +/// %39 = icmp ne i64 %38, 0 +/// br i1 %39, label %res_block, label %loadbb3 +/// loadbb3: ; preds = %loadbb2 +/// %40 = bitcast i32* %buffer2 to i8* +/// %41 = bitcast i32* %buffer1 to i8* +/// %42 = getelementptr i8, i8* %41, i8 14 +/// %43 = getelementptr i8, i8* %40, i8 14 +/// %44 = load i8, i8* %42 +/// %45 = load i8, i8* %43 +/// %46 = zext i8 %44 to i32 +/// %47 = zext i8 %45 to i32 +/// %48 = sub i32 %46, %47 +/// br label %endblock +/// endblock: ; preds = %res_block, +/// %loadbb3 +/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] +/// ret i32 %phi.res +static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, + const TargetLowering *TLI, const DataLayout *DL) { + NumMemCmpCalls++; + + // Early exit from expansion if -Oz. + if (CI->getFunction()->optForMinSize()) + return false; + + // Early exit from expansion if size is not a constant. + ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeCast) { + NumMemCmpNotConstant++; + return false; + } + const uint64_t SizeVal = SizeCast->getZExtValue(); + + if (SizeVal == 0) { + return false; + } + + // TTI call to check if target would like to expand memcmp. Also, get the + // available load sizes. + const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp); + if (!Options) return false; + + const unsigned MaxNumLoads = + TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); + + MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, + IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); + + // Don't expand if this will require more loads than desired by the target. + if (Expansion.getNumLoads() == 0) { + NumMemCmpGreaterThanMax++; + return false; + } + + NumMemCmpInlined++; + + Value *Res = Expansion.getMemCmpExpansion(); + + // Replace call with result of expansion and erase call. + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + + return true; +} + + + +class ExpandMemCmpPass : public FunctionPass { +public: + static char ID; + + ExpandMemCmpPass() : FunctionPass(ID) { + initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) return false; + + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) { + return false; + } + const TargetLowering* TL = + TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering(); + + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + const TargetTransformInfo *TTI = + &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto PA = runImpl(F, TLI, TTI, TL); + return !PA.areAllPreserved(); + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, + const TargetLowering* TL); + // Returns true if a change was made. + bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, const TargetLowering* TL, + const DataLayout& DL); +}; + +bool ExpandMemCmpPass::runOnBlock( + BasicBlock &BB, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, const TargetLowering* TL, + const DataLayout& DL) { + for (Instruction& I : BB) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI) { + continue; + } + LibFunc Func; + if (TLI->getLibFunc(ImmutableCallSite(CI), Func) && + Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) { + return true; + } + } + return false; +} + + +PreservedAnalyses ExpandMemCmpPass::runImpl( + Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, + const TargetLowering* TL) { + const DataLayout& DL = F.getParent()->getDataLayout(); + bool MadeChanges = false; + for (auto BBIt = F.begin(); BBIt != F.end();) { + if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) { + MadeChanges = true; + // If changes were made, restart the function from the beginning, since + // the structure of the function was changed. + BBIt = F.begin(); + } else { + ++BBIt; + } + } + return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +} // namespace + +char ExpandMemCmpPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp", + "Expand memcmp() to load/stores", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp", + "Expand memcmp() to load/stores", false, false) + +FunctionPass *llvm::createExpandMemCmpPass() { + return new ExpandMemCmpPass(); +} diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 4ce86f27a7dd..b73aeb183828 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp index 70dca3b74b2f..abf487a4f198 100644 --- a/lib/CodeGen/ExpandReductions.cpp +++ b/lib/CodeGen/ExpandReductions.cpp @@ -95,7 +95,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) { // and it can't be handled by generating this shuffle sequence. // TODO: Implement scalarization of ordered reductions here for targets // without native support. - if (!II->getFastMathFlags().unsafeAlgebra()) + if (!II->getFastMathFlags().isFast()) continue; Vec = II->getArgOperand(1); break; diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp index 9781338f9526..07cb7016139f 100644 --- a/lib/CodeGen/FEntryInserter.cpp +++ b/lib/CodeGen/FEntryInserter.cpp @@ -15,10 +15,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 35246545ca91..2064ce7ac7b3 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -18,14 +18,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8e31ed0a0153..45eb605c3c2c 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -54,7 +54,7 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index fb954f3c3f16..59f34d730d00 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -20,9 +20,9 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <iterator> diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 99f605abe065..a8cfe0b89a0c 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -173,12 +173,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MIRBuilder.setInstr(MI); + int64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + int64_t NarrowSize = NarrowTy.getSizeInBits(); + switch (MI.getOpcode()) { default: return UnableToLegalize; case TargetOpcode::G_IMPLICIT_DEF: { - int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / - NarrowTy.getSizeInBits(); + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; SmallVector<unsigned, 2> DstRegs; for (int i = 0; i < NumParts; ++i) { @@ -191,9 +197,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_ADD: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; // Expand in terms of carry-setting/consuming G_ADDE instructions. - int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / - NarrowTy.getSizeInBits(); + int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); @@ -221,9 +230,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (TypeIdx != 1) return UnableToLegalize; - int64_t NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize; + int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + // FIXME: add support for when SizeOp1 isn't an exact multiple of + // NarrowSize. + if (SizeOp1 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp1 / NarrowSize; SmallVector<unsigned, 2> SrcRegs, DstRegs; SmallVector<uint64_t, 2> Indexes; @@ -270,12 +282,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_INSERT: { - if (TypeIdx != 0) + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) return UnableToLegalize; - int64_t NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + int NumParts = SizeOp0 / NarrowSize; SmallVector<unsigned, 2> SrcRegs, DstRegs; SmallVector<uint64_t, 2> Indexes; @@ -330,9 +342,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_LOAD: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -357,9 +371,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_STORE: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; LLT OffsetTy = LLT::scalar( MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -381,9 +397,11 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_CONSTANT: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext(); @@ -410,9 +428,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // ... // AN = BinOp<Ty/N> BN, CN // A = G_MERGE_VALUES A1, ..., AN - unsigned NarrowSize = NarrowTy.getSizeInBits(); - int NumParts = - MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; // List the registers where the destination will be scattered. SmallVector<unsigned, 2> DstRegs; @@ -854,7 +875,12 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_ADD: { unsigned NarrowSize = NarrowTy.getSizeInBits(); unsigned DstReg = MI.getOperand(0).getReg(); - int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize; + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + int NumParts = Size / NarrowSize; + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; MIRBuilder.setInstr(MI); diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index e7a46eadb443..074cfa61a29a 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -28,46 +28,130 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOpcodes.h" #include <algorithm> -#include <cassert> -#include <tuple> -#include <utility> - +#include <map> using namespace llvm; -LegalizerInfo::LegalizerInfo() { - DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar; - - // FIXME: these two can be legalized to the fundamental load/store Jakob - // proposed. Once loads & stores are supported. - DefaultActions[TargetOpcode::G_ANYEXT] = Legal; - DefaultActions[TargetOpcode::G_TRUNC] = Legal; +LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { + // Set defaults. + // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the + // fundamental load/store Jakob proposed. Once loads & stores are supported. + setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}}); - DefaultActions[TargetOpcode::G_INTRINSIC] = Legal; - DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal; + setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}}); - DefaultActions[TargetOpcode::G_ADD] = NarrowScalar; - DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar; - DefaultActions[TargetOpcode::G_STORE] = NarrowScalar; - DefaultActions[TargetOpcode::G_OR] = NarrowScalar; + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall); - DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar; - DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar; - DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar; - DefaultActions[TargetOpcode::G_FNEG] = Lower; + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall); + setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}}); } void LegalizerInfo::computeTables() { - for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) { - for (unsigned Idx = 0, End = Actions[Opcode].size(); Idx != End; ++Idx) { - for (auto &Action : Actions[Opcode][Idx]) { - LLT Ty = Action.first; - if (!Ty.isVector()) - continue; - - auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp, - Ty.getElementType())]; - Entry = std::max(Entry, Ty.getNumElements()); + assert(TablesInitialized == false); + + for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) { + const unsigned Opcode = FirstOp + OpcodeIdx; + for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size(); + ++TypeIdx) { + // 0. Collect information specified through the setAction API, i.e. + // for specific bit sizes. + // For scalar types: + SizeAndActionsVec ScalarSpecifiedActions; + // For pointer types: + std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions; + // For vector types: + std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions; + for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) { + const LLT Type = LLT2Action.first; + const LegalizeAction Action = LLT2Action.second; + + auto SizeAction = std::make_pair(Type.getSizeInBits(), Action); + if (Type.isPointer()) + AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back( + SizeAction); + else if (Type.isVector()) + ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()] + .push_back(SizeAction); + else + ScalarSpecifiedActions.push_back(SizeAction); + } + + // 1. Handle scalar types + { + // Decide how to handle bit sizes for which no explicit specification + // was given. + SizeChangeStrategy S = &unsupportedForDifferentSizes; + if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && + ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; + std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end()); + checkPartialSizeAndActionsVector(ScalarSpecifiedActions); + setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); } + + // 2. Handle pointer types + for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { + std::sort(PointerSpecifiedActions.second.begin(), + PointerSpecifiedActions.second.end()); + checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); + // For pointer types, we assume that there isn't a meaningfull way + // to change the number of bits used in the pointer. + setPointerAction( + Opcode, TypeIdx, PointerSpecifiedActions.first, + unsupportedForDifferentSizes(PointerSpecifiedActions.second)); + } + + // 3. Handle vector types + SizeAndActionsVec ElementSizesSeen; + for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { + std::sort(VectorSpecifiedActions.second.begin(), + VectorSpecifiedActions.second.end()); + const uint16_t ElementSize = VectorSpecifiedActions.first; + ElementSizesSeen.push_back({ElementSize, Legal}); + checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); + // For vector types, we assume that the best way to adapt the number + // of elements is to the next larger number of elements type for which + // the vector type is legal, unless there is no such type. In that case, + // legalize towards a vector type with a smaller number of elements. + SizeAndActionsVec NumElementsActions; + for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) { + assert(BitsizeAndAction.first % ElementSize == 0); + const uint16_t NumElements = BitsizeAndAction.first / ElementSize; + NumElementsActions.push_back({NumElements, BitsizeAndAction.second}); + } + setVectorNumElementAction( + Opcode, TypeIdx, ElementSize, + moreToWiderTypesAndLessToWidest(NumElementsActions)); + } + std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); + SizeChangeStrategy VectorElementSizeChangeStrategy = + &unsupportedForDifferentSizes; + if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + VectorElementSizeChangeStrategy = + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx]; + setScalarInVectorAction( + Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen)); } } @@ -90,69 +174,24 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) return std::make_pair(Legal, Aspect.Type); - LLT Ty = Aspect.Type; - LegalizeAction Action = findInActions(Aspect); - // LegalizerHelper is not able to handle non-power-of-2 types right now, so do - // not try to legalize them unless they are marked as Legal or Custom. - // FIXME: This is a temporary hack until the general non-power-of-2 - // legalization works. - if (!isPowerOf2_64(Ty.getSizeInBits()) && - !(Action == Legal || Action == Custom)) - return std::make_pair(Unsupported, LLT()); - - if (Action != NotFound) - return findLegalAction(Aspect, Action); - - unsigned Opcode = Aspect.Opcode; - if (!Ty.isVector()) { - auto DefaultAction = DefaultActions.find(Aspect.Opcode); - if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal) - return std::make_pair(Legal, Ty); - - if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower) - return std::make_pair(Lower, Ty); - - if (DefaultAction == DefaultActions.end() || - DefaultAction->second != NarrowScalar) - return std::make_pair(Unsupported, LLT()); - return findLegalAction(Aspect, NarrowScalar); - } - - LLT EltTy = Ty.getElementType(); - int NumElts = Ty.getNumElements(); - - auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy)); - if (ScalarAction != ScalarInVectorActions.end() && - ScalarAction->second != Legal) - return findLegalAction(Aspect, ScalarAction->second); - - // The element type is legal in principle, but the number of elements is - // wrong. - auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy)); - if (MaxLegalElts > NumElts) - return findLegalAction(Aspect, MoreElements); - - if (MaxLegalElts == 0) { - // Scalarize if there's no legal vector type, which is just a special case - // of FewerElements. - return std::make_pair(FewerElements, EltTy); - } - - return findLegalAction(Aspect, FewerElements); + if (Aspect.Type.isScalar() || Aspect.Type.isPointer()) + return findScalarLegalAction(Aspect); + assert(Aspect.Type.isVector()); + return findVectorLegalAction(Aspect); } std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT> LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { SmallBitVector SeenTypes(8); - const MCInstrDesc &MCID = MI.getDesc(); - const MCOperandInfo *OpInfo = MCID.OpInfo; - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { + const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; + // FIXME: probably we'll need to cache the results here somehow? + for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) { if (!OpInfo[i].isGenericType()) continue; - // We don't want to repeatedly check the same operand index, that - // could get expensive. + // We must only record actions once for each TypeIdx; otherwise we'd + // try to legalize operands multiple times down the line. unsigned TypeIdx = OpInfo[i].getGenericTypeIndex(); if (SeenTypes[TypeIdx]) continue; @@ -172,38 +211,166 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI, return std::get<0>(getAction(MI, MRI)) == Legal; } -Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect, - LegalizeAction Action) const { - switch(Action) { - default: - llvm_unreachable("Cannot find legal type"); +bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + return false; +} + +LegalizerInfo::SizeAndActionsVec +LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest( + const SizeAndActionsVec &v, LegalizeAction IncreaseAction, + LegalizeAction DecreaseAction) { + SizeAndActionsVec result; + unsigned LargestSizeSoFar = 0; + if (v.size() >= 1 && v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + LargestSizeSoFar = v[i].first; + if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) { + result.push_back({LargestSizeSoFar + 1, IncreaseAction}); + LargestSizeSoFar = v[i].first + 1; + } + } + result.push_back({LargestSizeSoFar + 1, DecreaseAction}); + return result; +} + +LegalizerInfo::SizeAndActionsVec +LegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest( + const SizeAndActionsVec &v, LegalizeAction DecreaseAction, + LegalizeAction IncreaseAction) { + SizeAndActionsVec result; + if (v.size() == 0 || v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) { + result.push_back({v[i].first + 1, DecreaseAction}); + } + } + return result; +} + +LegalizerInfo::SizeAndAction +LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { + assert(Size >= 1); + // Find the last element in Vec that has a bitsize equal to or smaller than + // the requested bit size. + // That is the element just before the first element that is bigger than Size. + auto VecIt = std::upper_bound( + Vec.begin(), Vec.end(), Size, + [](const uint32_t Size, const SizeAndAction lhs) -> bool { + return Size < lhs.first; + }); + assert(VecIt != Vec.begin() && "Does Vec not start with size 1?"); + --VecIt; + int VecIdx = VecIt - Vec.begin(); + + LegalizeAction Action = Vec[VecIdx].second; + switch (Action) { case Legal: case Lower: case Libcall: case Custom: - return Aspect.Type; + return {Size, Action}; + case FewerElements: + // FIXME: is this special case still needed and correct? + // Special case for scalarization: + if (Vec == SizeAndActionsVec({{1, FewerElements}})) + return {1, FewerElements}; + LLVM_FALLTHROUGH; case NarrowScalar: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); - } - case WidenScalar: { - return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT { - return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize(); - }); - } - case FewerElements: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); }); + // The following needs to be a loop, as for now, we do allow needing to + // go over "Unsupported" bit sizes before finding a legalizable bit size. + // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8, + // we need to iterate over s9, and then to s32 to return (s32, Legal). + // If we want to get rid of the below loop, we should have stronger asserts + // when building the SizeAndActionsVecs, probably not allowing + // "Unsupported" unless at the ends of the vector. + for (int i = VecIdx - 1; i >= 0; --i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); } + case WidenScalar: case MoreElements: { - return findLegalizableSize( - Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); }); + // See above, the following needs to be a loop, at least for now. + for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); } + case Unsupported: + return {Size, Unsupported}; + case NotFound: + llvm_unreachable("NotFound"); } + llvm_unreachable("Action has an unknown enum value"); } -bool LegalizerInfo::legalizeCustom(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - return false; +std::pair<LegalizerInfo::LegalizeAction, LLT> +LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isScalar() || Aspect.Type.isPointer()); + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, LLT()}; + const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + if (Aspect.Type.isPointer() && + AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) == + AddrSpace2PointerActions[OpcodeIdx].end()) { + return {NotFound, LLT()}; + } + const SmallVector<SizeAndActionsVec, 1> &Actions = + Aspect.Type.isPointer() + ? AddrSpace2PointerActions[OpcodeIdx] + .find(Aspect.Type.getAddressSpace()) + ->second + : ScalarActions[OpcodeIdx]; + if (Aspect.Idx >= Actions.size()) + return {NotFound, LLT()}; + const SizeAndActionsVec &Vec = Actions[Aspect.Idx]; + // FIXME: speed up this search, e.g. by using a results cache for repeated + // queries? + auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits()); + return {SizeAndAction.second, + Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first) + : LLT::pointer(Aspect.Type.getAddressSpace(), + SizeAndAction.first)}; +} + +std::pair<LegalizerInfo::LegalizeAction, LLT> +LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isVector()); + // First legalize the vector element size, then legalize the number of + // lanes in the vector. + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, Aspect.Type}; + const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; + const unsigned TypeIdx = Aspect.Idx; + if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size()) + return {NotFound, Aspect.Type}; + const SizeAndActionsVec &ElemSizeVec = + ScalarInVectorActions[OpcodeIdx][TypeIdx]; + + LLT IntermediateType; + auto ElementSizeAndAction = + findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits()); + IntermediateType = + LLT::vector(Aspect.Type.getNumElements(), ElementSizeAndAction.first); + if (ElementSizeAndAction.second != Legal) + return {ElementSizeAndAction.second, IntermediateType}; + + auto i = NumElements2Actions[OpcodeIdx].find( + IntermediateType.getScalarSizeInBits()); + if (i == NumElements2Actions[OpcodeIdx].end()) { + return {NotFound, IntermediateType}; + } + const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx]; + auto NumElementsAndAction = + findAction(NumElementsVec, IntermediateType.getNumElements()); + return {NumElementsAndAction.second, + LLT::vector(NumElementsAndAction.first, + IntermediateType.getScalarSizeInBits())}; } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b4fe5f15fddc..230c9ef04c58 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -15,8 +15,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index f117c6094534..3854e9b263d4 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -19,10 +19,10 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index a9f3d73a294b..9ae0f970f422 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #define DEBUG_TYPE "globalisel-utils" diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 08720d1271f3..6d2a55c65f4d 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -40,7 +41,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index bf0f88d49a82..b1cac2a107d6 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -44,6 +44,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/LLVMContext.h" @@ -51,7 +52,6 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 2e991de62211..a383b72dce64 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -41,6 +41,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" @@ -49,7 +50,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index a45b1e39feed..9328e2d900af 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -36,6 +36,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -46,8 +48,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 0c81306a9a50..1923a8c25298 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -51,7 +52,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 92cca1a54951..61fbfdd64a2e 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -16,10 +16,10 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index a9aec926115a..f9c5652e8a17 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -34,10 +34,10 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <algorithm> using namespace llvm; diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 2eab0376da2f..33ae476bf4a0 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -30,7 +30,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp new file mode 100644 index 000000000000..62596440c73a --- /dev/null +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -0,0 +1,626 @@ +//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The purpose of this pass is to employ a canonical code transformation so +// that code compiled with slightly different IR passes can be diffed more +// effectively than otherwise. This is done by renaming vregs in a given +// LiveRange in a canonical way. This pass also does a pseudo-scheduling to +// move defs closer to their use inorder to reduce diffs caused by slightly +// different schedules. +// +// Basic Usage: +// +// llc -o - -run-pass mir-canonicalizer example.mir +// +// Reorders instructions canonically. +// Renames virtual register operands canonically. +// Strips certain MIR artifacts (optionally). +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/raw_ostream.h" + +#include <queue> + +using namespace llvm; + +namespace llvm { +extern char &MIRCanonicalizerID; +} // namespace llvm + +#define DEBUG_TYPE "mir-canonicalizer" + +static cl::opt<unsigned> +CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), + cl::value_desc("N"), + cl::desc("Function number to canonicalize.")); + +static cl::opt<unsigned> +CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), + cl::value_desc("N"), + cl::desc("BasicBlock number to canonicalize.")); + +namespace { + +class MIRCanonicalizer : public MachineFunctionPass { +public: + static char ID; + MIRCanonicalizer() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Rename register operands in a canonical ordering."; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end anonymous namespace + +enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate }; +class TypedVReg { + VRType type; + unsigned reg; + +public: + TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {} + TypedVReg(VRType type) : type(type), reg(~0U) { + assert(type != RSE_Reg && "Expected a non-register type."); + } + + bool isReg() const { return type == RSE_Reg; } + bool isFrameIndex() const { return type == RSE_FrameIndex; } + bool isCandidate() const { return type == RSE_NewCandidate; } + + VRType getType() const { return type; } + unsigned getReg() const { + assert(this->isReg() && "Expected a virtual or physical register."); + return reg; + } +}; + +char MIRCanonicalizer::ID; + +char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID; + +INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", + "Rename Register Operands Canonically", false, false) + +INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer", + "Rename Register Operands Canonically", false, false) + +static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) { + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + std::vector<MachineBasicBlock *> RPOList; + for (auto MBB : RPOT) { + RPOList.push_back(MBB); + } + + return RPOList; +} + +// Set a dummy vreg. We use this vregs register class to generate throw-away +// vregs that are used to skip vreg numbers so that vreg numbers line up. +static unsigned GetDummyVReg(const MachineFunction &MF) { + for (auto &MBB : MF) { + for (auto &MI : MBB) { + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + return MO.getReg(); + } + } + } + + return ~0U; +} + +static bool rescheduleCanonically(MachineBasicBlock *MBB) { + + bool Changed = false; + + // Calculates the distance of MI from the begining of its parent BB. + auto getInstrIdx = [](const MachineInstr &MI) { + unsigned i = 0; + for (auto &CurMI : *MI.getParent()) { + if (&CurMI == &MI) + return i; + i++; + } + return ~0U; + }; + + // Pre-Populate vector of instructions to reschedule so that we don't + // clobber the iterator. + std::vector<MachineInstr *> Instructions; + for (auto &MI : *MBB) { + Instructions.push_back(&MI); + } + + for (auto *II : Instructions) { + if (II->getNumOperands() == 0) + continue; + + MachineOperand &MO = II->getOperand(0); + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); + + MachineInstr *Def = II; + unsigned Distance = ~0U; + MachineInstr *UseToBringDefCloserTo = nullptr; + MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) { + MachineInstr *UseInst = UO.getParent(); + + const unsigned DefLoc = getInstrIdx(*Def); + const unsigned UseLoc = getInstrIdx(*UseInst); + const unsigned Delta = (UseLoc - DefLoc); + + if (UseInst->getParent() != Def->getParent()) + continue; + if (DefLoc >= UseLoc) + continue; + + if (Delta < Distance) { + Distance = Delta; + UseToBringDefCloserTo = UseInst; + } + } + + const auto BBE = MBB->instr_end(); + MachineBasicBlock::iterator DefI = BBE; + MachineBasicBlock::iterator UseI = BBE; + + for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) { + + if (DefI != BBE && UseI != BBE) + break; + + if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) + continue; + + if (&*BBI == Def) { + DefI = BBI; + continue; + } + + if (&*BBI == UseToBringDefCloserTo) { + UseI = BBI; + continue; + } + } + + if (DefI == BBE || UseI == BBE) + continue; + + DEBUG({ + dbgs() << "Splicing "; + DefI->dump(); + dbgs() << " right before: "; + UseI->dump(); + }); + + Changed = true; + MBB->splice(UseI, MBB, DefI); + } + + return Changed; +} + +/// Here we find our candidates. What makes an interesting candidate? +/// An candidate for a canonicalization tree root is normally any kind of +/// instruction that causes side effects such as a store to memory or a copy to +/// a physical register or a return instruction. We use these as an expression +/// tree root that we walk inorder to build a canonical walk which should result +/// in canoncal vreg renaming. +static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) { + std::vector<MachineInstr *> Candidates; + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + MachineInstr *MI = &*II; + + bool DoesMISideEffect = false; + + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) { + const unsigned Dst = MI->getOperand(0).getReg(); + DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst); + + for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { + if (DoesMISideEffect) break; + DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent()); + } + } + + if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect) + continue; + + DEBUG(dbgs() << "Found Candidate: "; MI->dump();); + Candidates.push_back(MI); + } + + return Candidates; +} + +void doCandidateWalk(std::vector<TypedVReg> &VRegs, + std::queue <TypedVReg> &RegQueue, + std::vector<MachineInstr *> &VisitedMIs, + const MachineBasicBlock *MBB) { + + const MachineFunction &MF = *MBB->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + while (!RegQueue.empty()) { + + auto TReg = RegQueue.front(); + RegQueue.pop(); + + if (TReg.isFrameIndex()) { + DEBUG(dbgs() << "Popping frame index.\n";); + VRegs.push_back(TypedVReg(RSE_FrameIndex)); + continue; + } + + assert(TReg.isReg() && "Expected vreg or physreg."); + unsigned Reg = TReg.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DEBUG({ + dbgs() << "Popping vreg "; + MRI.def_begin(Reg)->dump(); + dbgs() << "\n"; + }); + + if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) { + return TR.isReg() && TR.getReg() == Reg; + })) { + VRegs.push_back(TypedVReg(Reg)); + } + } else { + DEBUG(dbgs() << "Popping physreg.\n";); + VRegs.push_back(TypedVReg(Reg)); + continue; + } + + for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) { + MachineInstr *Def = RI->getParent(); + + if (Def->getParent() != MBB) + continue; + + if (llvm::any_of(VisitedMIs, + [&](const MachineInstr *VMI) { return Def == VMI; })) { + break; + } + + DEBUG({ + dbgs() << "\n========================\n"; + dbgs() << "Visited MI: "; + Def->dump(); + dbgs() << "BB Name: " << Def->getParent()->getName() << "\n"; + dbgs() << "\n========================\n"; + }); + VisitedMIs.push_back(Def); + for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) { + + MachineOperand &MO = Def->getOperand(I); + if (MO.isFI()) { + DEBUG(dbgs() << "Pushing frame index.\n";); + RegQueue.push(TypedVReg(RSE_FrameIndex)); + } + + if (!MO.isReg()) + continue; + RegQueue.push(TypedVReg(MO.getReg())); + } + } + } +} + +// TODO: Work to remove this in the future. One day when we have named vregs +// we should be able to form the canonical name based on some characteristic +// we see in that point of the expression tree (like if we were to name based +// on some sort of value numbering scheme). +static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) { + const unsigned VR_GAP = (++VRegGapIndex * 1000); + + DEBUG({ + dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " + << VR_GAP << "\n"; + }); + + unsigned I = MRI.createVirtualRegister(RC); + const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; + while (I != E) { + I = MRI.createVirtualRegister(RC); + } +} + +static std::map<unsigned, unsigned> +GetVRegRenameMap(const std::vector<TypedVReg> &VRegs, + const std::vector<unsigned> &renamedInOtherBB, + MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) { + std::map<unsigned, unsigned> VRegRenameMap; + unsigned LastRenameReg = MRI.createVirtualRegister(RC); + bool FirstCandidate = true; + + for (auto &vreg : VRegs) { + if (vreg.isFrameIndex()) { + // We skip one vreg for any frame index because there is a good chance + // (especially when comparing SelectionDAG to GlobalISel generated MIR) + // that in the other file we are just getting an incoming vreg that comes + // from a copy from a frame index. So it's safe to skip by one. + LastRenameReg = MRI.createVirtualRegister(RC); + DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); + continue; + } else if (vreg.isCandidate()) { + + // After the first candidate, for every subsequent candidate, we skip mod + // 10 registers so that the candidates are more likely to start at the + // same vreg number making it more likely that the canonical walk from the + // candidate insruction. We don't need to skip from the first candidate of + // the BasicBlock because we already skip ahead several vregs for each BB. + while (LastRenameReg % 10) { + if (!FirstCandidate) break; + LastRenameReg = MRI.createVirtualRegister(RC); + + DEBUG({ + dbgs() << "Skipping rename for new candidate " << LastRenameReg + << "\n"; + }); + } + FirstCandidate = false; + continue; + } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { + LastRenameReg = MRI.createVirtualRegister(RC); + DEBUG({ + dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n"; + }); + continue; + } + + auto Reg = vreg.getReg(); + if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { + DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); + continue; + } + + auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + LastRenameReg = Rename; + + if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { + DEBUG(dbgs() << "Mapping vreg ";); + if (MRI.reg_begin(Reg) != MRI.reg_end()) { + DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); + } else { + DEBUG(dbgs() << Reg;); + } + DEBUG(dbgs() << " to ";); + if (MRI.reg_begin(Rename) != MRI.reg_end()) { + DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); + } else { + DEBUG(dbgs() << Rename;); + } + DEBUG(dbgs() << "\n";); + + VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename)); + } + } + + return VRegRenameMap; +} + +static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB, + const std::map<unsigned, unsigned> &VRegRenameMap, + MachineRegisterInfo &MRI) { + bool Changed = false; + for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) { + + auto VReg = I->first; + auto Rename = I->second; + + RenamedInOtherBB.push_back(Rename); + + std::vector<MachineOperand *> RenameMOs; + for (auto &MO : MRI.reg_operands(VReg)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + Changed = true; + MO->setReg(Rename); + + if (!MO->isDef()) + MO->setIsKill(false); + } + } + + return Changed; +} + +static bool doDefKillClear(MachineBasicBlock *MBB) { + bool Changed = false; + + for (auto &MI : *MBB) { + for (auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (!MO.isDef() && MO.isKill()) { + Changed = true; + MO.setIsKill(false); + } + + if (MO.isDef() && MO.isDead()) { + Changed = true; + MO.setIsDead(false); + } + } + } + + return Changed; +} + +static bool runOnBasicBlock(MachineBasicBlock *MBB, + std::vector<StringRef> &bbNames, + std::vector<unsigned> &renamedInOtherBB, + unsigned &basicBlockNum, unsigned &VRegGapIndex) { + + if (CanonicalizeBasicBlockNumber != ~0U) { + if (CanonicalizeBasicBlockNumber != basicBlockNum++) + return false; + DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); + } + + if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { + DEBUG({ + dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName() + << "\n"; + }); + return false; + } + + DEBUG({ + dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + }); + + bool Changed = false; + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + const unsigned DummyVReg = GetDummyVReg(MF); + const TargetRegisterClass *DummyRC = + (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); + if (!DummyRC) return false; + + bbNames.push_back(MBB->getName()); + DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); + + DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); + Changed |= rescheduleCanonically(MBB); + DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); + + std::vector<MachineInstr *> Candidates = populateCandidates(MBB); + std::vector<MachineInstr *> VisitedMIs; + std::copy(Candidates.begin(), Candidates.end(), + std::back_inserter(VisitedMIs)); + + std::vector<TypedVReg> VRegs; + for (auto candidate : Candidates) { + VRegs.push_back(TypedVReg(RSE_NewCandidate)); + + std::queue<TypedVReg> RegQueue; + + // Here we walk the vreg operands of a non-root node along our walk. + // The root nodes are the original candidates (stores normally). + // These are normally not the root nodes (except for the case of copies to + // physical registers). + for (unsigned i = 1; i < candidate->getNumOperands(); i++) { + if (candidate->mayStore() || candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + + DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); + RegQueue.push(TypedVReg(MO.getReg())); + } + + // Here we walk the root candidates. We start from the 0th operand because + // the root is normally a store to a vreg. + for (unsigned i = 0; i < candidate->getNumOperands(); i++) { + + if (!candidate->mayStore() && !candidate->isBranch()) + break; + + MachineOperand &MO = candidate->getOperand(i); + + // TODO: Do we want to only add vregs here? + if (!MO.isReg() && !MO.isFI()) + continue; + + DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); + + RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : + TypedVReg(RSE_FrameIndex)); + } + + doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); + } + + // If we have populated no vregs to rename then bail. + // The rest of this function does the vreg remaping. + if (VRegs.size() == 0) + return Changed; + + // Skip some vregs, so we can recon where we'll land next. + SkipVRegs(VRegGapIndex, MRI, DummyRC); + + auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); + Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + Changed |= doDefKillClear(MBB); + + DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); + DEBUG(dbgs() << "\n\n================================================\n\n"); + return Changed; +} + +bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { + + static unsigned functionNum = 0; + if (CanonicalizeFunctionNumber != ~0U) { + if (CanonicalizeFunctionNumber != functionNum++) + return false; + DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); + } + + // we need a valid vreg to create a vreg type for skipping all those + // stray vreg numbers so reach alignment/canonical vreg values. + std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF); + + DEBUG( + dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n"; + dbgs() << "\n\n================================================\n\n"; + dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; + for (auto MBB : RPOList) { + dbgs() << MBB->getName() << "\n"; + } + dbgs() << "\n\n================================================\n\n"; + ); + + std::vector<StringRef> BBNames; + std::vector<unsigned> RenamedInOtherBB; + + unsigned GapIdx = 0; + unsigned BBNum = 0; + + bool Changed = false; + + for (auto MBB : RPOList) + Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); + + return Changed; +} + diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index 9c8743a71645..fbba60c43122 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MILexer.h" #include "MIParser.h" +#include "MILexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" @@ -21,8 +21,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" @@ -36,6 +36,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -64,7 +65,6 @@ #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index aae48587c5be..a817c62c9857 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -12,16 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MIRPrinter.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -31,19 +33,18 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Value.h" @@ -57,9 +58,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -162,8 +162,8 @@ public: void printStackObjectReference(int FrameIndex); void printOffset(int64_t Offset); void printTargetFlags(const MachineOperand &Op); - void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, + void print(const MachineInstr &MI, unsigned OpIdx, + const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies, LLT TypeToPrint, bool IsDef = false); void print(const LLVMContext &Context, const TargetInstrInfo &TII, const MachineMemOperand &Op); @@ -734,7 +734,7 @@ void MIPrinter::print(const MachineInstr &MI) { ++I) { if (I) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, + print(MI, I, TRI, ShouldPrintRegisterTies, getTypeToPrint(MI, I, PrintedTypes, MRI), /*IsDef=*/true); } @@ -751,7 +751,7 @@ void MIPrinter::print(const MachineInstr &MI) { for (; I < E; ++I) { if (NeedComma) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, + print(MI, I, TRI, ShouldPrintRegisterTies, getTypeToPrint(MI, I, PrintedTypes, MRI)); NeedComma = true; } @@ -923,9 +923,11 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) { return nullptr; } -void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint, +void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, + const TargetRegisterInfo *TRI, + bool ShouldPrintRegisterTies, LLT TypeToPrint, bool IsDef) { + const MachineOperand &Op = MI.getOperand(OpIdx); printTargetFlags(Op); switch (Op.getType()) { case MachineOperand::MO_Register: { @@ -959,13 +961,16 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, } } if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) - OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; + OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(OpIdx) << ")"; if (TypeToPrint.isValid()) OS << '(' << TypeToPrint << ')'; break; } case MachineOperand::MO_Immediate: - OS << Op.getImm(); + if (MI.isOperandSubregIdx(OpIdx)) + OS << "%subreg." << TRI->getSubRegIndexName(Op.getImm()); + else + OS << Op.getImm(); break; case MachineOperand::MO_CImmediate: Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index d5758da0464c..40ffbc465562 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -30,7 +31,6 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -42,6 +42,8 @@ using namespace llvm; MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) : BB(B), Number(-1), xParent(&MF) { Insts.Parent = this; + if (B) + IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight(); } MachineBasicBlock::~MachineBasicBlock() { @@ -338,6 +340,12 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, } OS << '\n'; } + if (IrrLoopHeaderWeight) { + if (Indexes) OS << '\t'; + OS << " Irreducible loop header weight: " + << IrrLoopHeaderWeight.getValue(); + OS << '\n'; + } } void MachineBasicBlock::printAsOperand(raw_ostream &OS, diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 14cd91206d80..2c336e450569 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -234,6 +234,12 @@ MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None; } +bool +MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) { + assert(MBFI && "Expected analysis to be available"); + return MBFI->isIrrLoopHeader(MBB); +} + const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { return MBFI ? MBFI->getFunction() : nullptr; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index c5991332f088..9fa4c0d5e4fd 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -55,7 +56,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index f0f63715d2f7..be197a48d801 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -35,7 +36,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 9fc990f5c240..5b576b68fcca 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -21,11 +21,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 61f56fffc887..1a39afe655e6 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -23,11 +23,11 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index be8adf75fb7c..75c05e2f0021 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -16,10 +16,10 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 250a10c7d076..570c410e1fe2 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -58,7 +58,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index bb2dda980e41..2c81218f8f6c 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" @@ -58,7 +59,6 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -320,8 +320,45 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { } case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); - case MachineOperand::MO_CFIIndex: - return getCFIIndex() == Other.getCFIIndex(); + case MachineOperand::MO_CFIIndex: { + const MachineFunction *MF = getParent()->getParent()->getParent(); + const MachineFunction *OtherMF = + Other.getParent()->getParent()->getParent(); + MCCFIInstruction Inst = MF->getFrameInstructions()[getCFIIndex()]; + MCCFIInstruction OtherInst = + OtherMF->getFrameInstructions()[Other.getCFIIndex()]; + MCCFIInstruction::OpType op = Inst.getOperation(); + if (op != OtherInst.getOperation()) return false; + switch (op) { + case MCCFIInstruction::OpDefCfa: + case MCCFIInstruction::OpOffset: + case MCCFIInstruction::OpRelOffset: + if (Inst.getRegister() != OtherInst.getRegister()) return false; + if (Inst.getOffset() != OtherInst.getOffset()) return false; + break; + case MCCFIInstruction::OpRestore: + case MCCFIInstruction::OpUndefined: + case MCCFIInstruction::OpSameValue: + case MCCFIInstruction::OpDefCfaRegister: + if (Inst.getRegister() != OtherInst.getRegister()) return false; + break; + case MCCFIInstruction::OpRegister: + if (Inst.getRegister() != OtherInst.getRegister()) return false; + if (Inst.getRegister2() != OtherInst.getRegister2()) return false; + break; + case MCCFIInstruction::OpDefCfaOffset: + case MCCFIInstruction::OpAdjustCfaOffset: + case MCCFIInstruction::OpGnuArgsSize: + if (Inst.getOffset() != OtherInst.getOffset()) return false; + break; + case MCCFIInstruction::OpRememberState: + case MCCFIInstruction::OpRestoreState: + case MCCFIInstruction::OpEscape: + case MCCFIInstruction::OpWindowSave: + break; + } + return true; + } case MachineOperand::MO_Metadata: return getMetadata() == Other.getMetadata(); case MachineOperand::MO_IntrinsicID: @@ -370,8 +407,13 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); case MachineOperand::MO_MCSymbol: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); - case MachineOperand::MO_CFIIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); + case MachineOperand::MO_CFIIndex: { + const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + MCCFIInstruction Inst = MF->getFrameInstructions()[MO.getCFIIndex()]; + return hash_combine(MO.getType(), MO.getTargetFlags(), Inst.getOperation(), + Inst.getRegister(), Inst.getRegister2(), + Inst.getOffset()); + } case MachineOperand::MO_IntrinsicID: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); case MachineOperand::MO_Predicate: diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index b5621a09c6ff..eceae9a968b9 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -13,7 +13,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index efb5c3371de2..d1147fea08eb 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInstrDesc.h" @@ -43,7 +44,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 1bc869e02e64..48b68e3b718b 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -65,11 +65,11 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index c852c2e1564f..d270b8e5d8f5 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -89,6 +89,7 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -102,7 +103,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index be06053f0040..1674aba0c82c 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" @@ -28,7 +29,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 65d823667676..a3e93de67bbe 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -21,11 +21,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 3e12bdcd689e..900a0a63e96e 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/LaneBitmask.h" @@ -52,7 +53,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index dd6e26d8f8b8..f52e39426640 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Pass.h" @@ -40,7 +41,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 0bd5c56871ca..f894f4704452 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -51,6 +51,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" @@ -66,7 +67,6 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 633a853b2c74..13ddad593829 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -19,10 +19,10 @@ #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #define DEBUG_TYPE "machine-scheduler" diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 6430e54a59cd..530040a33324 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index c7f0329b3c57..af26c170cb82 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -31,11 +31,11 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index 513e82716564..84dbebfd2b6f 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -16,8 +16,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 7cff85a3ab04..c267018c4d19 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -81,6 +81,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" @@ -88,7 +89,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 4a50d895340a..9770b336da6e 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -31,10 +31,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index f2249f9e37e0..fd92609613b8 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -34,12 +34,12 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 0118580a626a..feab36d39953 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -13,9 +13,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index d9e9b3360a05..e41ffc244d9a 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -39,6 +39,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -55,8 +57,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetOptions.h" @@ -76,12 +76,6 @@ using namespace llvm; using MBBVector = SmallVector<MachineBasicBlock *, 4>; -static void spillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, - unsigned &MinCSFrameIndex, - unsigned &MaxCXFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks); - namespace { class PEI : public MachineFunctionPass { @@ -125,6 +119,7 @@ private: void calculateCallFrameInfo(MachineFunction &Fn); void calculateSaveRestoreBlocks(MachineFunction &Fn); + void spillCalleeSavedRegs(MachineFunction &MF); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); @@ -197,8 +192,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Handle CSR spilling and restoring, for targets that need it. if (Fn.getTarget().usesPhysRegsForPEI()) - spillCalleeSavedRegs(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, SaveBlocks, - RestoreBlocks); + spillCalleeSavedRegs(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. @@ -505,11 +499,7 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, } } -static void spillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, - unsigned &MinCSFrameIndex, - unsigned &MaxCSFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks) { +void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { // We can't list this requirement in getRequiredProperties because some // targets (WebAssembly) use virtual registers past this point, and the pass // pipeline is set up without giving the passes a chance to look at the diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 5fa5587457d9..86fd87450521 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 7061c3ff6523..19467ae3b729 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Metadata.h" @@ -41,7 +42,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index e74ac79f0010..23e5f907c88e 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -54,6 +54,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -66,7 +67,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index 214c6d2c820d..3aaa5a4738d5 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -27,7 +27,7 @@ #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" using namespace llvm; diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 956dec39fc38..8e463ff272d2 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -24,7 +24,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1ef7e41b8ae3..84c2e2548ec0 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -35,17 +35,17 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/Pass.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 844ddb9ed3ff..18fe2d859544 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -28,13 +28,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index bd5ecbd28f29..19e0f30ecfc6 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -35,7 +35,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 5e95f760aaa2..627bc1946f3a 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -19,11 +19,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e2cb8cad6e18..b789e2d9c52c 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -15,12 +15,12 @@ #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <cassert> using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 491c56a7314d..6e245feb7358 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -63,6 +63,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -98,7 +99,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b736037d71dd..696855f80185 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -19,6 +19,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -32,8 +34,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 7a123e3e92ef..2760c6cbf867 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,12 +21,12 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ff49134f7b99..356f25850460 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -40,7 +40,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index b42edf8e751a..0d85bccdeac5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -849,13 +849,14 @@ static void transferDbgValues(SelectionDAG &DAG, SDValue From, SDValue To, break; DIVariable *Var = Dbg->getVariable(); - auto *Fragment = DIExpression::createFragmentExpression( - Dbg->getExpression(), OffsetInBits, To.getValueSizeInBits()); - SDDbgValue *Clone = - DAG.getDbgValue(Var, Fragment, ToNode, To.getResNo(), Dbg->isIndirect(), - Dbg->getDebugLoc(), Dbg->getOrder()); + if (auto Fragment = DIExpression::createFragmentExpression( + Dbg->getExpression(), OffsetInBits, To.getValueSizeInBits())) { + SDDbgValue *Clone = DAG.getDbgValue(Var, *Fragment, ToNode, To.getResNo(), + Dbg->isIndirect(), Dbg->getDebugLoc(), + Dbg->getOrder()); + ClonedDVs.push_back(Clone); + } Dbg->setIsInvalidated(); - ClonedDVs.push_back(Clone); } for (SDDbgValue *Dbg : ClonedDVs) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5d6c4998ecd5..b55414b51b8b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3844,7 +3844,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } LdOps.push_back(L); - + LdOp = L; LdWidth -= NewVTWidth; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 137994093277..87dcf6e6950f 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -18,12 +18,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 982029256293..24d9d376de28 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -42,7 +43,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 7ddb0dc07fd5..2e1abbe8bb22 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -23,11 +23,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 631cb34717c4..c25315f29837 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -25,11 +25,11 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <climits> diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e5de280508b9..2fb2615b0728 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2893,11 +2893,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::FrameIndex: case ISD::TargetFrameIndex: - if (unsigned Align = InferPtrAlignment(Op)) { - // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2_32(Align)); - break; - } + TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); break; default: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ccc06fa3ee15..f4f8879b5d85 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -55,6 +55,8 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Argument.h" @@ -98,8 +100,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -2585,7 +2585,7 @@ static bool isVectorReductionOp(const User *I) { case Instruction::FAdd: case Instruction::FMul: if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (FPOp->getFastMathFlags().unsafeAlgebra()) + if (FPOp->getFastMathFlags().isFast()) break; LLVM_FALLTHROUGH; default: @@ -2631,7 +2631,7 @@ static bool isVectorReductionOp(const User *I) { if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) return false; UsersToVisit.push_back(U); } else if (const ShuffleVectorInst *ShufInst = @@ -2725,7 +2725,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + Flags.setUnsafeAlgebra(FMF.isFast()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); @@ -3862,7 +3862,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // // When the first GEP operand is a single pointer - it is the uniform base we // are looking for. If first operand of the GEP is a splat vector - we -// extract the spalt value and use it as a uniform base. +// extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { @@ -4828,12 +4828,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Ignore inlined function arguments here. - // - // FIXME: Should we be checking DL->inlinedAt() to determine this? - if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) - return false; - bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. @@ -4873,11 +4867,13 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( for (unsigned E = I + RegCount; I != E; ++I) { // The vregs are guaranteed to be allocated in sequence. Op = MachineOperand::CreateReg(VMI->second + I, false); - auto *FragmentExpr = DIExpression::createFragmentExpression( + auto FragmentExpr = DIExpression::createFragmentExpression( Expr, Offset, RegisterSize); + if (!FragmentExpr) + continue; FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - Op->getReg(), Variable, FragmentExpr)); + Op->getReg(), Variable, *FragmentExpr)); Offset += RegisterSize; } } @@ -7959,13 +7955,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 1550347f0063..1097fd92eded 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -38,7 +39,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 4c4d196427e2..ae5eebd9545e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/APInt.h" @@ -45,9 +46,9 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -80,7 +81,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fe553bc986ae..1f6fafb039ed 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1288,6 +1288,19 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.resetAll(); } +void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex"); + + if (unsigned Align = DAG.InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2_32(Align)); + } +} + /// This method can be implemented by targets that want to expose additional /// information about sign bits to the DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 5fb6afee88a8..3230aff5ed82 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -56,15 +56,17 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" @@ -73,8 +75,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 3656832a7f1a..25a1c37b145d 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -10,9 +10,9 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 1467179b7a3c..7a9a65faacac 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/LaneBitmask.h" @@ -43,7 +44,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 8a47f3d2d6de..69614a55e147 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -28,12 +28,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index bd3a20f936d8..5b76e36382c2 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TailDuplicator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,14 +28,13 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" -#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> @@ -603,8 +603,8 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (PreRegAlloc && MI.isCall()) return false; - if (!MI.isPHI() && !MI.isDebugValue()) - InstrCount += 1; + if (!MI.isPHI() && !MI.isMetaInstruction()) + InstrCount += 1; if (InstrCount > MaxDuplicateCount) return false; diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 9dd98b4020d2..4d2130f5ebec 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -20,7 +20,7 @@ #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -104,3 +104,12 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( return 0; } + +int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { + llvm_unreachable("getInitialCFAOffset() not implemented!"); +} + +unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) + const { + llvm_unreachable("getInitialCFARegister() not implemented!"); +}
\ No newline at end of file diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index bac12efd6395..702b91b7f771 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -19,6 +19,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" @@ -26,7 +27,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index ed845e1706f8..99ff4931e2fd 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -15,7 +15,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index c5101b1ecfc2..59e88ba3bdae 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -600,8 +600,14 @@ void TargetPassConfig::addIRPasses() { addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); } - if (getOptLevel() != CodeGenOpt::None && EnableMergeICmps) { - addPass(createMergeICmpsPass()); + if (getOptLevel() != CodeGenOpt::None) { + // The MergeICmpsPass tries to create memcmp calls by grouping sequences of + // loads and compares. ExpandMemCmpPass then tries to expand those calls + // into optimally-sized loads and compares. The transforms are enabled by a + // target lowering hook. + if (EnableMergeICmps) + addPass(createMergeICmpsPass()); + addPass(createExpandMemCmpPass()); } // Run GC lowering passes for builtin collectors diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 55318237e95e..758fdabf5dd8 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -27,7 +27,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index e1db9157f901..659d6f522afc 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -16,13 +16,13 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index 29cfd9fb1786..d66272aed9b5 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -14,11 +14,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <string> using namespace llvm; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index efd40b209e9f..18f1485baca4 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -46,6 +46,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Pass.h" @@ -54,7 +55,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index bdd25f29aea4..5288ca672774 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" @@ -37,7 +38,6 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; static bool eliminateUnreachableBlock(Function &F) { diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 65c62b167194..d499c6c1e73d 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -31,12 +31,12 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 2063ab11a74f..fb621cab28b7 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -23,10 +23,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" |