diff options
author | Florian Hahn <flo@fhahn.com> | 2021-02-19 12:50:41 +0000 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2021-02-19 12:50:41 +0000 |
commit | edc92a1c42590a1fb5e852cea6ffbc253e5e0a7f (patch) | |
tree | f061b5bd84ff666d5f65b9336eea34fd8866bc63 | |
parent | 6329ce75da7a44c40d4406bf48ffffe973ef5fdb (diff) | |
download | llvm-edc92a1c42590a1fb5e852cea6ffbc253e5e0a7f.tar.gz |
[LV] Remove VPCallback.
Now that all state for generated instructions is managed directly in
VPTransformState, VPCallBack is no longer needed. This patch updates the
last use of `getOrCreateScalarValue` to instead manage the value
directly in VPTransformState and removes VPCallback.
Reviewed By: gilr
Differential Revision: https://reviews.llvm.org/D95383
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h | 12 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 312 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.h | 169 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll | 5 |
5 files changed, 64 insertions, 438 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 19797e6f7858..1f8d5c8aa195 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -222,18 +222,6 @@ class LoopVectorizationPlanner { SmallVector<VPlanPtr, 4> VPlans; - /// This class is used to enable the VPlan to invoke a method of ILV. This is - /// needed until the method is refactored out of ILV and becomes reusable. - struct VPCallbackILV : public VPCallback { - InnerLoopVectorizer &ILV; - - VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} - - Value *getOrCreateVectorValues(Value *V, unsigned Part) override; - Value *getOrCreateScalarValue(Value *V, - const VPIteration &Instance) override; - }; - /// A builder used to construct the current plan. VPBuilder Builder; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 109686c7742c..f89a04172b64 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -461,9 +461,8 @@ public: ProfileSummaryInfo *PSI) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor), - Builder(PSE.getSE()->getContext()), - VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM), - BFI(BFI), PSI(PSI) { + Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI), + PSI(PSI) { // Query this against the original loop and save it here because the profile // of the original loop header may change as the transformation happens. OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize( @@ -533,50 +532,7 @@ public: VPValue *Def, VPValue *CastDef, VPTransformState &State); - /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a - /// vector or scalar value on-demand if one is not yet available. When - /// vectorizing a loop, we visit the definition of an instruction before its - /// uses. When visiting the definition, we either vectorize or scalarize the - /// instruction, creating an entry for it in the corresponding map. (In some - /// cases, such as induction variables, we will create both vector and scalar - /// entries.) Then, as we encounter uses of the definition, we derive values - /// for each scalar or vector use unless such a value is already available. - /// For example, if we scalarize a definition and one of its uses is vector, - /// we build the required vector on-demand with an insertelement sequence - /// when visiting the use. Otherwise, if the use is scalar, we can use the - /// existing scalar definition. - /// - /// Return a value in the new loop corresponding to \p V from the original - /// loop at unroll index \p Part. If the value has already been vectorized, - /// the corresponding vector entry in VectorLoopValueMap is returned. If, - /// however, the value has a scalar entry in VectorLoopValueMap, we construct - /// a new vector value on-demand by inserting the scalar values into a vector - /// with an insertelement sequence. If the value has been neither vectorized - /// nor scalarized, it must be loop invariant, so we simply broadcast the - /// value into a vector. - Value *getOrCreateVectorValue(Value *V, unsigned Part); - - void setVectorValue(Value *Scalar, unsigned Part, Value *Vector) { - VectorLoopValueMap.setVectorValue(Scalar, Part, Vector); - } - - void resetVectorValue(Value *Scalar, unsigned Part, Value *Vector) { - VectorLoopValueMap.resetVectorValue(Scalar, Part, Vector); - } - - void setScalarValue(Value *Scalar, const VPIteration &Instance, Value *V) { - VectorLoopValueMap.setScalarValue(Scalar, Instance, V); - } - - /// Return a value in the new loop corresponding to \p V from the original - /// loop at unroll and vector indices \p Instance. If the value has been - /// vectorized but not scalarized, the necessary extractelement instruction - /// will be generated. - Value *getOrCreateScalarValue(Value *V, const VPIteration &Instance); - /// Construct the vector value of a scalarized value \p V one lane at a time. - void packScalarIntoVectorValue(Value *V, const VPIteration &Instance); - void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, VPTransformState &State); @@ -645,7 +601,8 @@ protected: void fixReduction(PHINode *Phi, VPTransformState &State); /// Clear NSW/NUW flags from reduction instructions if necessary. - void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc); + void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc, + VPTransformState &State); /// Fixup the LCSSA phi nodes in the unique exit block. This simply /// means we need to add the appropriate incoming value from the middle @@ -660,7 +617,7 @@ protected: /// Shrinks vector element sizes to the smallest bitwidth they can be legally /// represented as. - void truncateToMinimalBitwidths(); + void truncateToMinimalBitwidths(VPTransformState &State); /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...) /// to each vector element of Val. The sequence starts at StartIndex. @@ -876,12 +833,6 @@ protected: /// The induction variable of the old basic block. PHINode *OldInduction = nullptr; - /// Maps values from the original loop to their corresponding values in the - /// vectorized loop. A key value can map to either vector values, scalar - /// values or both kinds of values, depending on whether the key was - /// vectorized and scalarized. - VectorizerValueMap VectorLoopValueMap; - /// Store instructions that were predicated. SmallVector<Instruction *, 4> PredicatedInstructions; @@ -2104,7 +2055,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( VecInd->setDebugLoc(EntryVal->getDebugLoc()); Instruction *LastInduction = VecInd; for (unsigned Part = 0; Part < UF; ++Part) { - State.set(Def, EntryVal, LastInduction, Part); + State.set(Def, LastInduction, Part); if (isa<TruncInst>(EntryVal)) addMetadata(LastInduction, EntryVal); @@ -2236,7 +2187,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start, Value *EntryPart = getStepVector(Broadcasted, VF.getKnownMinValue() * Part, Step, ID.getInductionOpcode()); - State.set(Def, EntryVal, EntryPart, Part); + State.set(Def, EntryPart, Part); if (Trunc) addMetadata(EntryPart, Trunc); recordVectorLoopValueForInductionCast(ID, EntryVal, EntryPart, CastDef, @@ -2375,7 +2326,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, : VF.getKnownMinValue(); assert((!VF.isScalable() || Lanes == 1) && "Should never scalarize a scalable vector"); - // Compute the scalar steps and save the results in VectorLoopValueMap. + // Compute the scalar steps and save the results in State. for (unsigned Part = 0; Part < UF; ++Part) { for (unsigned Lane = 0; Lane < Lanes; ++Lane) { auto *IntStepTy = IntegerType::get(ScalarIVTy->getContext(), @@ -2400,132 +2351,6 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, } } -Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { - assert(V != Induction && "The new induction variable should not be used."); - assert(!V->getType()->isVectorTy() && "Can't widen a vector"); - assert(!V->getType()->isVoidTy() && "Type does not produce a value"); - - // If we have a stride that is replaced by one, do it here. Defer this for - // the VPlan-native path until we start running Legal checks in that path. - if (!EnableVPlanNativePath && Legal->hasStride(V)) - V = ConstantInt::get(V->getType(), 1); - - // If we have a vector mapped to this value, return it. - if (VectorLoopValueMap.hasVectorValue(V, Part)) - return VectorLoopValueMap.getVectorValue(V, Part); - - // If the value has not been vectorized, check if it has been scalarized - // instead. If it has been scalarized, and we actually need the value in - // vector form, we will construct the vector values on demand. - if (VectorLoopValueMap.hasAnyScalarValue(V)) { - Value *ScalarValue = - VectorLoopValueMap.getScalarValue(V, VPIteration(Part, 0)); - - // If we've scalarized a value, that value should be an instruction. - auto *I = cast<Instruction>(V); - - // If we aren't vectorizing, we can just copy the scalar map values over to - // the vector map. - if (VF.isScalar()) { - VectorLoopValueMap.setVectorValue(V, Part, ScalarValue); - return ScalarValue; - } - - // Get the last scalar instruction we generated for V and Part. If the value - // is known to be uniform after vectorization, this corresponds to lane zero - // of the Part unroll iteration. Otherwise, the last instruction is the one - // we created for the last vector lane of the Part unroll iteration. - unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) - ? 0 - : VF.getKnownMinValue() - 1; - assert((!VF.isScalable() || LastLane == 0) && - "Scalable vectorization can't lead to any scalarized values."); - auto *LastInst = cast<Instruction>( - VectorLoopValueMap.getScalarValue(V, VPIteration(Part, LastLane))); - - // Set the insert point after the last scalarized instruction. This ensures - // the insertelement sequence will directly follow the scalar definitions. - auto OldIP = Builder.saveIP(); - auto NewIP = std::next(BasicBlock::iterator(LastInst)); - Builder.SetInsertPoint(&*NewIP); - - // However, if we are vectorizing, we need to construct the vector values. - // If the value is known to be uniform after vectorization, we can just - // broadcast the scalar value corresponding to lane zero for each unroll - // iteration. Otherwise, we construct the vector values using insertelement - // instructions. Since the resulting vectors are stored in - // VectorLoopValueMap, we will only generate the insertelements once. - Value *VectorValue = nullptr; - if (Cost->isUniformAfterVectorization(I, VF)) { - VectorValue = getBroadcastInstrs(ScalarValue); - VectorLoopValueMap.setVectorValue(V, Part, VectorValue); - } else { - // Initialize packing with insertelements to start from poison. - assert(!VF.isScalable() && "VF is assumed to be non scalable."); - Value *Poison = PoisonValue::get(VectorType::get(V->getType(), VF)); - VectorLoopValueMap.setVectorValue(V, Part, Poison); - for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) - packScalarIntoVectorValue(V, VPIteration(Part, Lane)); - VectorValue = VectorLoopValueMap.getVectorValue(V, Part); - } - Builder.restoreIP(OldIP); - return VectorValue; - } - - // If this scalar is unknown, assume that it is a constant or that it is - // loop invariant. Broadcast V and save the value for future uses. - Value *B = getBroadcastInstrs(V); - VectorLoopValueMap.setVectorValue(V, Part, B); - return B; -} - -Value * -InnerLoopVectorizer::getOrCreateScalarValue(Value *V, - const VPIteration &Instance) { - // If the value is not an instruction contained in the loop, it should - // already be scalar. - if (OrigLoop->isLoopInvariant(V)) - return V; - - assert(Instance.Lane > 0 - ? !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF) - : true && "Uniform values only have lane zero"); - - // If the value from the original loop has not been vectorized, it is - // represented by UF x VF scalar values in the new loop. Return the requested - // scalar value. - if (VectorLoopValueMap.hasScalarValue(V, Instance)) - return VectorLoopValueMap.getScalarValue(V, Instance); - - // If the value has not been scalarized, get its entry in VectorLoopValueMap - // for the given unroll part. If this entry is not a vector type (i.e., the - // vectorization factor is one), there is no need to generate an - // extractelement instruction. - auto *U = getOrCreateVectorValue(V, Instance.Part); - if (!U->getType()->isVectorTy()) { - assert(VF.isScalar() && "Value not scalarized has non-vector type"); - return U; - } - - // Otherwise, the value from the original loop has been vectorized and is - // represented by UF vector values. Extract and return the requested scalar - // value from the appropriate vector lane. - return Builder.CreateExtractElement(U, Builder.getInt32(Instance.Lane)); -} - -void InnerLoopVectorizer::packScalarIntoVectorValue( - Value *V, const VPIteration &Instance) { - assert(V != Induction && "The new induction variable should not be used."); - assert(!V->getType()->isVectorTy() && "Can't pack a vector"); - assert(!V->getType()->isVoidTy() && "Type does not produce a value"); - - Value *ScalarInst = VectorLoopValueMap.getScalarValue(V, Instance); - Value *VectorValue = VectorLoopValueMap.getVectorValue(V, Instance.Part); - VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst, - Builder.getInt32(Instance.Lane)); - VectorLoopValueMap.resetVectorValue(V, Instance.Part, VectorValue); -} - void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, VPTransformState &State) { @@ -2715,7 +2540,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( if (Group->isReverse()) StridedVec = reverseVector(StridedVec); - State.set(VPDefs[J], Member, StridedVec, Part); + State.set(VPDefs[J], StridedVec, Part); } ++J; } @@ -2909,7 +2734,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction( NewLI = reverseVector(NewLI); } - State.set(Def, Instr, NewLI, Part); + State.set(Def, NewLI, Part); } } @@ -2953,7 +2778,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def, // Place the cloned scalar in the new loop. Builder.Insert(Cloned); - State.set(Def, Instr, Cloned, Instance); + State.set(Def, Cloned, Instance); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast<IntrinsicInst>(Cloned)) @@ -3832,19 +3657,20 @@ static Type *largestIntegerVectorType(Type *T1, Type *T2) { return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2; } -void InnerLoopVectorizer::truncateToMinimalBitwidths() { +void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) { // For every instruction `I` in MinBWs, truncate the operands, create a // truncated version of `I` and reextend its result. InstCombine runs // later and will remove any ext/trunc pairs. SmallPtrSet<Value *, 4> Erased; for (const auto &KV : Cost->getMinimalBitwidths()) { // If the value wasn't vectorized, we must maintain the original scalar - // type. The absence of the value from VectorLoopValueMap indicates that it + // type. The absence of the value from State indicates that it // wasn't vectorized. - if (!VectorLoopValueMap.hasAnyVectorValue(KV.first)) + VPValue *Def = State.Plan->getVPValue(KV.first); + if (!State.hasAnyVectorValue(Def)) continue; for (unsigned Part = 0; Part < UF; ++Part) { - Value *I = getOrCreateVectorValue(KV.first, Part); + Value *I = State.get(Def, Part); if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I)) continue; Type *OriginalTy = I->getType(); @@ -3943,24 +3769,25 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { I->replaceAllUsesWith(Res); cast<Instruction>(I)->eraseFromParent(); Erased.insert(I); - VectorLoopValueMap.resetVectorValue(KV.first, Part, Res); + State.reset(Def, Res, Part); } } // We'll have created a bunch of ZExts that are now parentless. Clean up. for (const auto &KV : Cost->getMinimalBitwidths()) { // If the value wasn't vectorized, we must maintain the original scalar - // type. The absence of the value from VectorLoopValueMap indicates that it + // type. The absence of the value from State indicates that it // wasn't vectorized. - if (!VectorLoopValueMap.hasAnyVectorValue(KV.first)) + VPValue *Def = State.Plan->getVPValue(KV.first); + if (!State.hasAnyVectorValue(Def)) continue; for (unsigned Part = 0; Part < UF; ++Part) { - Value *I = getOrCreateVectorValue(KV.first, Part); + Value *I = State.get(Def, Part); ZExtInst *Inst = dyn_cast<ZExtInst>(I); if (Inst && Inst->use_empty()) { Value *NewI = Inst->getOperand(0); Inst->eraseFromParent(); - VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI); + State.reset(Def, NewI, Part); } } } @@ -3970,7 +3797,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Insert truncates and extends for any truncated instructions as hints to // InstCombine. if (VF.isVector()) - truncateToMinimalBitwidths(); + truncateToMinimalBitwidths(State); // Fix widened non-induction PHIs by setting up the PHI operands. if (OrigPHIsToFix.size()) { @@ -4163,7 +3990,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi, : Incoming; PhiPart->replaceAllUsesWith(Shuffle); cast<Instruction>(PhiPart)->eraseFromParent(); - State.reset(PhiDef, Phi, Shuffle, Part); + State.reset(PhiDef, Shuffle, Part); Incoming = PreviousPart; } @@ -4239,7 +4066,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) { Type *VecTy = State.get(LoopExitInstDef, 0)->getType(); // Wrap flags are in general invalid after vectorization, clear them. - clearReductionWrapFlags(RdxDesc); + clearReductionWrapFlags(RdxDesc, State); // Fix the vector-loop phi. @@ -4279,7 +4106,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) { assert(isa<PHINode>(U) && "Reduction exit must feed Phi's or select"); } assert(Sel && "Reduction exit feeds no select"); - State.reset(LoopExitInstDef, LoopExitInst, Sel, Part); + State.reset(LoopExitInstDef, Sel, Part); // If the target can create a predicated operator for the reduction at no // extra cost in the loop (for example a predicated vadd), it can be @@ -4326,7 +4153,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) { Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); - State.reset(LoopExitInstDef, LoopExitInst, RdxParts[Part], Part); + State.reset(LoopExitInstDef, RdxParts[Part], Part); } } @@ -4401,8 +4228,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) { Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); } -void InnerLoopVectorizer::clearReductionWrapFlags( - RecurrenceDescriptor &RdxDesc) { +void InnerLoopVectorizer::clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc, + VPTransformState &State) { RecurKind RK = RdxDesc.getRecurrenceKind(); if (RK != RecurKind::Add && RK != RecurKind::Mul) return; @@ -4418,7 +4245,7 @@ void InnerLoopVectorizer::clearReductionWrapFlags( Instruction *Cur = Worklist.pop_back_val(); if (isa<OverflowingBinaryOperator>(Cur)) for (unsigned Part = 0; Part < UF; ++Part) { - Value *V = getOrCreateVectorValue(Cur, Part); + Value *V = State.get(State.Plan->getVPValue(Cur), Part); cast<Instruction>(V)->dropPoisonGeneratingFlags(); } @@ -4540,7 +4367,7 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { // The insertion point in Builder may be invalidated by the time we get // here. Force the Builder insertion point to something valid so that we do // not run into issues during insertion point restore in - // getOrCreateVectorValue calls below. + // State::get() calls below. Builder.SetInsertPoint(NewPhi); // The predecessor order is preserved and we can rely on mapping between @@ -4554,7 +4381,7 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { OrigPhi->getIncomingValueForBlock(ScalarBBPredecessors[i]); // Scalar incoming value may need a broadcast - Value *NewIncV = getOrCreateVectorValue(ScIncV, 0); + Value *NewIncV = State.get(State.Plan->getOrAddVPValue(ScIncV), 0); NewPhi->addIncoming(NewIncV, NewPredBB); } } @@ -4587,7 +4414,7 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef, auto *Clone = Builder.Insert(GEP->clone()); for (unsigned Part = 0; Part < UF; ++Part) { Value *EntryPart = Builder.CreateVectorSplat(VF, Clone); - State.set(VPDef, GEP, EntryPart, Part); + State.set(VPDef, EntryPart, Part); addMetadata(EntryPart, GEP); } } else { @@ -4625,7 +4452,7 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef, : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices); assert((VF.isScalar() || NewGEP->getType()->isVectorTy()) && "NewGEP is not a pointer vector"); - State.set(VPDef, GEP, NewGEP, Part); + State.set(VPDef, NewGEP, Part); addMetadata(NewGEP, GEP); } } @@ -4645,7 +4472,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, ? PN->getType() : VectorType::get(PN->getType(), State.VF); Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi"); - State.set(Def, P, VecPhi, 0); + State.set(Def, VecPhi, 0); OrigPHIsToFix.push_back(P); return; @@ -4699,7 +4526,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, // This is phase one of vectorizing PHIs. Value *EntryPart = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); - State.set(Def, P, EntryPart, Part); + State.set(Def, EntryPart, Part); if (StartV) { // Make sure to add the reduction start value only to the // first unroll part. @@ -4752,7 +4579,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); SclrGep->setName("next.gep"); - State.set(Def, P, SclrGep, VPIteration(Part, Lane)); + State.set(Def, SclrGep, VPIteration(Part, Lane)); } } return; @@ -4800,7 +4627,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, Builder.CreateVectorSplat( State.VF.getKnownMinValue(), ScalarStepValue), "vector.gep")); - State.set(Def, P, GEP, Part); + State.set(Def, GEP, Part); } } } @@ -4867,7 +4694,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, VecOp->copyIRFlags(&I); // Use this vector value for all users of the original instruction. - State.set(Def, &I, V, Part); + State.set(Def, V, Part); addMetadata(V, &I); } @@ -4891,7 +4718,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, } else { C = Builder.CreateICmp(Cmp->getPredicate(), A, B); } - State.set(Def, &I, C, Part); + State.set(Def, C, Part); addMetadata(C, &I); } @@ -4920,7 +4747,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, for (unsigned Part = 0; Part < UF; ++Part) { Value *A = State.get(User.getOperand(0), Part); Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); - State.set(Def, &I, Cast, Part); + State.set(Def, Cast, Part); addMetadata(Cast, &I); } break; @@ -4997,7 +4824,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, if (isa<FPMathOperator>(V)) V->copyFastMathFlags(CI); - State.set(Def, &I, V, Part); + State.set(Def, V, Part); addMetadata(V, &I); } } @@ -5022,7 +4849,7 @@ void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I, VPValue *VPDef, Value *Op0 = State.get(Operands.getOperand(1), Part); Value *Op1 = State.get(Operands.getOperand(2), Part); Value *Sel = Builder.CreateSelect(Cond, Op0, Op1); - State.set(VPDef, &I, Sel, Part); + State.set(VPDef, Sel, Part); addMetadata(Sel, &I); } } @@ -7792,16 +7619,11 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV, // Perform the actual loop transformation. // 1. Create a new empty loop. Unlink the old loop and connect the new one. - VPCallbackILV CallbackILV(ILV); - assert(BestVF.hasValue() && "Vectorization Factor is missing"); assert(VPlans.size() == 1 && "Not a single VPlan to execute."); - VPTransformState State{*BestVF, BestUF, - LI, DT, - ILV.Builder, ILV.VectorLoopValueMap, - &ILV, VPlans.front().get(), - CallbackILV}; + VPTransformState State{ + *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; @@ -9016,16 +8838,6 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions( } } -Value* LoopVectorizationPlanner::VPCallbackILV:: -getOrCreateVectorValues(Value *V, unsigned Part) { - return ILV.getOrCreateVectorValue(V, Part); -} - -Value *LoopVectorizationPlanner::VPCallbackILV::getOrCreateScalarValue( - Value *V, const VPIteration &Instance) { - return ILV.getOrCreateScalarValue(V, Instance); -} - void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; @@ -9112,7 +8924,7 @@ void VPBlendRecipe::execute(VPTransformState &State) { } } for (unsigned Part = 0; Part < State.UF; ++Part) - State.set(this, Phi, Entry[Part], Part); + State.set(this, Entry[Part], Part); } void VPInterleaveRecipe::execute(VPTransformState &State) { @@ -9149,7 +8961,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed, PrevInChain); } - State.set(this, getUnderlyingInstr(), NextInChain, Part); + State.set(this, NextInChain, Part); } } @@ -9165,7 +8977,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Poison = PoisonValue::get( VectorType::get(getUnderlyingValue()->getType(), State.VF)); - State.set(this, getUnderlyingInstr(), Poison, State.Instance->Part); + State.set(this, Poison, State.Instance->Part); } State.ILV->packScalarIntoVectorValue(this, *State.Instance, State); } @@ -9314,31 +9126,17 @@ static ScalarEpilogueLowering getScalarEpilogueLowering( return CM_ScalarEpilogueAllowed; } -void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V, - const VPIteration &Instance) { - set(Def, V, Instance); - ILV->setScalarValue(IRDef, Instance, V); -} - -void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V, - unsigned Part) { - set(Def, V, Part); - ILV->setVectorValue(IRDef, Part, V); -} - -void VPTransformState::reset(VPValue *Def, Value *IRDef, Value *V, - unsigned Part) { - set(Def, V, Part); - ILV->resetVectorValue(IRDef, Part, V); -} - Value *VPTransformState::get(VPValue *Def, unsigned Part) { // If Values have been set for this Def return the one relevant for \p Part. if (hasVectorValue(Def, Part)) return Data.PerPartOutput[Def][Part]; - if (!hasScalarValue(Def, {Part, 0})) - return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part); + if (!hasScalarValue(Def, {Part, 0})) { + Value *IRV = Def->getLiveInIRValue(); + Value *B = ILV->getBroadcastInstrs(IRV); + set(Def, B, Part); + return B; + } Value *ScalarValue = get(Def, {Part, 0}); // If we aren't vectorizing, we can just copy the scalar map values over @@ -9366,7 +9164,7 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) { // broadcast the scalar value corresponding to lane zero for each unroll // iteration. Otherwise, we construct the vector values using // insertelement instructions. Since the resulting vectors are stored in - // VectorLoopValueMap, we will only generate the insertelements once. + // State, we will only generate the insertelements once. Value *VectorValue = nullptr; if (IsUniform) { VectorValue = ILV->getBroadcastInstrs(ScalarValue); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 942208d499f8..50edd32e293e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -306,9 +306,7 @@ void VPBasicBlock::execute(VPTransformState *State) { // branch instruction using the condition value from vector lane 0 and dummy // successors. The successors are fixed later when the successor blocks are // visited. - Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0); - NewCond = State->Builder.CreateExtractElement(NewCond, - State->Builder.getInt32(0)); + Value *NewCond = State->get(CBV, {0, 0}); // Replace the temporary unreachable terminator with the new conditional // branch. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 9a55f1c2555a..e729089023d2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -103,159 +103,14 @@ struct VPIteration { bool isFirstIteration() const { return Part == 0 && Lane == 0; } }; -/// This is a helper struct for maintaining vectorization state. It's used for -/// mapping values from the original loop to their corresponding values in -/// the new loop. Two mappings are maintained: one for vectorized values and -/// one for scalarized values. Vectorized values are represented with UF -/// vector values in the new loop, and scalarized values are represented with -/// UF x VF scalar values in the new loop. UF and VF are the unroll and -/// vectorization factors, respectively. -/// -/// Entries can be added to either map with setVectorValue and setScalarValue, -/// which assert that an entry was not already added before. If an entry is to -/// replace an existing one, call resetVectorValue and resetScalarValue. This is -/// currently needed to modify the mapped values during "fix-up" operations that -/// occur once the first phase of widening is complete. These operations include -/// type truncation and the second phase of recurrence widening. -/// -/// Entries from either map can be retrieved using the getVectorValue and -/// getScalarValue functions, which assert that the desired value exists. -struct VectorizerValueMap { - friend struct VPTransformState; - -private: - /// The unroll factor. Each entry in the vector map contains UF vector values. - unsigned UF; - - /// The vectorization factor. Each entry in the scalar map contains UF x VF - /// scalar values. - ElementCount VF; - - /// The vector and scalar map storage. We use std::map and not DenseMap - /// because insertions to DenseMap invalidate its iterators. - using VectorParts = SmallVector<Value *, 2>; - using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>; - std::map<Value *, VectorParts> VectorMapStorage; - std::map<Value *, ScalarParts> ScalarMapStorage; - -public: - /// Construct an empty map with the given unroll and vectorization factors. - VectorizerValueMap(unsigned UF, ElementCount VF) : UF(UF), VF(VF) {} - - /// \return True if the map has any vector entry for \p Key. - bool hasAnyVectorValue(Value *Key) const { - return VectorMapStorage.count(Key); - } - - /// \return True if the map has a vector entry for \p Key and \p Part. - bool hasVectorValue(Value *Key, unsigned Part) const { - assert(Part < UF && "Queried Vector Part is too large."); - if (!hasAnyVectorValue(Key)) - return false; - const VectorParts &Entry = VectorMapStorage.find(Key)->second; - assert(Entry.size() == UF && "VectorParts has wrong dimensions."); - return Entry[Part] != nullptr; - } - - /// \return True if the map has any scalar entry for \p Key. - bool hasAnyScalarValue(Value *Key) const { - return ScalarMapStorage.count(Key); - } - - /// \return True if the map has a scalar entry for \p Key and \p Instance. - bool hasScalarValue(Value *Key, const VPIteration &Instance) const { - assert(Instance.Part < UF && "Queried Scalar Part is too large."); - assert(Instance.Lane < VF.getKnownMinValue() && - "Queried Scalar Lane is too large."); - - if (!hasAnyScalarValue(Key)) - return false; - const ScalarParts &Entry = ScalarMapStorage.find(Key)->second; - assert(Entry.size() == UF && "ScalarParts has wrong dimensions."); - assert(Entry[Instance.Part].size() == VF.getKnownMinValue() && - "ScalarParts has wrong dimensions."); - return Entry[Instance.Part][Instance.Lane] != nullptr; - } - - /// Retrieve the existing vector value that corresponds to \p Key and - /// \p Part. - Value *getVectorValue(Value *Key, unsigned Part) { - assert(hasVectorValue(Key, Part) && "Getting non-existent value."); - return VectorMapStorage[Key][Part]; - } - - /// Retrieve the existing scalar value that corresponds to \p Key and - /// \p Instance. - Value *getScalarValue(Value *Key, const VPIteration &Instance) { - assert(hasScalarValue(Key, Instance) && "Getting non-existent value."); - return ScalarMapStorage[Key][Instance.Part][Instance.Lane]; - } - - /// Set a vector value associated with \p Key and \p Part. Assumes such a - /// value is not already set. If it is, use resetVectorValue() instead. - void setVectorValue(Value *Key, unsigned Part, Value *Vector) { - assert(!hasVectorValue(Key, Part) && "Vector value already set for part"); - if (!VectorMapStorage.count(Key)) { - VectorParts Entry(UF); - VectorMapStorage[Key] = Entry; - } - VectorMapStorage[Key][Part] = Vector; - } - - /// Set a scalar value associated with \p Key and \p Instance. Assumes such a - /// value is not already set. - void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) { - assert(!hasScalarValue(Key, Instance) && "Scalar value already set"); - if (!ScalarMapStorage.count(Key)) { - ScalarParts Entry(UF); - // TODO: Consider storing uniform values only per-part, as they occupy - // lane 0 only, keeping the other VF-1 redundant entries null. - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part].resize(VF.getKnownMinValue(), nullptr); - ScalarMapStorage[Key] = Entry; - } - ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; - } - - /// Reset the vector value associated with \p Key for the given \p Part. - /// This function can be used to update values that have already been - /// vectorized. This is the case for "fix-up" operations including type - /// truncation and the second phase of recurrence vectorization. - void resetVectorValue(Value *Key, unsigned Part, Value *Vector) { - assert(hasVectorValue(Key, Part) && "Vector value not set for part"); - VectorMapStorage[Key][Part] = Vector; - } - - /// Reset the scalar value associated with \p Key for \p Part and \p Lane. - /// This function can be used to update values that have already been - /// scalarized. This is the case for "fix-up" operations including scalar phi - /// nodes for scalarized and predicated instructions. - void resetScalarValue(Value *Key, const VPIteration &Instance, - Value *Scalar) { - assert(hasScalarValue(Key, Instance) && - "Scalar value not set for part and lane"); - ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; - } -}; - -/// This class is used to enable the VPlan to invoke a method of ILV. This is -/// needed until the method is refactored out of ILV and becomes reusable. -struct VPCallback { - virtual ~VPCallback() {} - virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0; - virtual Value *getOrCreateScalarValue(Value *V, - const VPIteration &Instance) = 0; -}; - /// VPTransformState holds information passed down when "executing" a VPlan, /// needed for generating the output IR. struct VPTransformState { VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, IRBuilder<> &Builder, - VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV, - VPlan *Plan, VPCallback &Callback) - : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), - ValueMap(ValueMap), ILV(ILV), Plan(Plan), Callback(Callback) {} + InnerLoopVectorizer *ILV, VPlan *Plan) + : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV), + Plan(Plan) {} /// The chosen Vectorization and Unroll Factors of the loop being vectorized. ElementCount VF; @@ -294,6 +149,10 @@ struct VPTransformState { I->second[Part]; } + bool hasAnyVectorValue(VPValue *Def) const { + return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end(); + } + bool hasScalarValue(VPValue *Def, VPIteration Instance) { auto I = Data.PerPartScalars.find(Def); if (I == Data.PerPartScalars.end()) @@ -319,12 +178,6 @@ struct VPTransformState { Iter->second[Part] = V; } - void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part); - void reset(VPValue *Def, Value *IRDef, Value *V, unsigned Part); - - /// Set the generated scalar \p V for \p Def and \p IRDef and the given \p - /// Instance. - void set(VPValue *Def, Value *IRDef, Value *V, const VPIteration &Instance); /// Set the generated scalar \p V for \p Def and the given \p Instance. void set(VPValue *Def, Value *V, const VPIteration &Instance) { auto Iter = Data.PerPartScalars.insert({Def, {}}); @@ -384,12 +237,6 @@ struct VPTransformState { /// Hold a reference to the IRBuilder used to generate output IR code. IRBuilder<> &Builder; - /// Hold a reference to the Value state information used when generating the - /// Values of the output IR. - VectorizerValueMap &ValueMap; - - /// Hold a reference to a mapping between VPValues in VPlan and original - /// Values they correspond to. VPValue2ValueTy VPValue2Value; /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF). @@ -403,8 +250,6 @@ struct VPTransformState { /// Pointer to the VPlan code is generated for. VPlan *Plan; - - VPCallback &Callback; }; /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll index 5a11cc531c2c..452281cdb19e 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll @@ -18,16 +18,13 @@ ; CHECK-LABEL: vector.ph: ; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> poison, i32 %c, i32 0 ; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK: %[[ZVal0:.*]] = insertelement <4 x i1> poison, i1 %[[ZeroTripChk]], i32 0 -; CHECK-NEXT: %[[ZSplat:.*]] = shufflevector <4 x i1> %[[ZVal0]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-LABEL: vector.body: ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] ; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, <4 x i64> %[[VecInd]] ; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[CSplat]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) -; CHECK: %[[ZCmpExtr:.*]] = extractelement <4 x i1> %[[ZSplat]], i32 0 -; CHECK: br i1 %[[ZCmpExtr]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]] +; CHECK: br i1 %[[ZeroTripChk]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]] ; CHECK: [[InnerForPh]]: ; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) |