diff options
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 70 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll | 10 |
2 files changed, 40 insertions, 40 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 7ffa30b54b5f..86ba0be5d8f7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1821,9 +1821,9 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask, /// Rebuild a new instruction just like 'I' but with the new operands given. /// In the event of type mismatch, the type of the operands is correct. -static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) { - // We don't want to use the IRBuilder here because we want the replacement - // instructions to appear next to 'I', not the builder's insertion point. +static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps, + IRBuilderBase &Builder) { + Builder.SetInsertPoint(I); switch (I->getOpcode()) { case Instruction::Add: case Instruction::FAdd: @@ -1845,28 +1845,29 @@ static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) { case Instruction::Xor: { BinaryOperator *BO = cast<BinaryOperator>(I); assert(NewOps.size() == 2 && "binary operator with #ops != 2"); - BinaryOperator *New = - BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), - NewOps[0], NewOps[1], "", BO); - if (isa<OverflowingBinaryOperator>(BO)) { - New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); - New->setHasNoSignedWrap(BO->hasNoSignedWrap()); - } - if (isa<PossiblyExactOperator>(BO)) { - New->setIsExact(BO->isExact()); + Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(), + NewOps[0], NewOps[1]); + if (auto *NewI = dyn_cast<Instruction>(New)) { + if (isa<OverflowingBinaryOperator>(BO)) { + NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); + NewI->setHasNoSignedWrap(BO->hasNoSignedWrap()); + } + if (isa<PossiblyExactOperator>(BO)) { + NewI->setIsExact(BO->isExact()); + } + if (isa<FPMathOperator>(BO)) + NewI->copyFastMathFlags(I); } - if (isa<FPMathOperator>(BO)) - New->copyFastMathFlags(I); return New; } case Instruction::ICmp: assert(NewOps.size() == 2 && "icmp with #ops != 2"); - return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), - NewOps[0], NewOps[1]); + return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0], + NewOps[1]); case Instruction::FCmp: assert(NewOps.size() == 2 && "fcmp with #ops != 2"); - return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), - NewOps[0], NewOps[1]); + return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0], + NewOps[1]); case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -1882,27 +1883,26 @@ static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) { I->getType()->getScalarType(), cast<VectorType>(NewOps[0]->getType())->getElementCount()); assert(NewOps.size() == 1 && "cast with #ops != 1"); - return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, - "", I); + return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0], + DestTy); } case Instruction::GetElementPtr: { Value *Ptr = NewOps[0]; ArrayRef<Value*> Idx = NewOps.slice(1); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); - GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); - return GEP; + return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(), + Ptr, Idx, "", + cast<GEPOperator>(I)->isInBounds()); } } llvm_unreachable("failed to rebuild vector instructions"); } -static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { +static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask, + IRBuilderBase &Builder) { // Mask.size() does not need to be equal to the number of vector elements. assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); Type *EltTy = V->getType()->getScalarType(); - Type *I32Ty = IntegerType::getInt32Ty(V->getContext()); if (match(V, m_Undef())) return UndefValue::get(FixedVectorType::get(EltTy, Mask.size())); @@ -1956,15 +1956,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { // as well. E.g. GetElementPtr may have scalar operands even if the // return value is a vector, so we need to examine the operand type. if (I->getOperand(i)->getType()->isVectorTy()) - V = evaluateInDifferentElementOrder(I->getOperand(i), Mask); + V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder); else V = I->getOperand(i); NewOps.push_back(V); NeedsRebuild |= (V != I->getOperand(i)); } - if (NeedsRebuild) { - return buildNew(I, NewOps); - } + if (NeedsRebuild) + return buildNew(I, NewOps, Builder); return I; } case Instruction::InsertElement: { @@ -1985,11 +1984,12 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { // If element is not in Mask, no need to handle the operand 1 (element to // be inserted). Just evaluate values in operand 0 according to Mask. if (!Found) - return evaluateInDifferentElementOrder(I->getOperand(0), Mask); + return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder); - Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask); - return InsertElementInst::Create(V, I->getOperand(1), - ConstantInt::get(I32Ty, Index), "", I); + Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask, + Builder); + Builder.SetInsertPoint(I); + return Builder.CreateInsertElement(V, I->getOperand(1), Index); } } llvm_unreachable("failed to reorder elements of vector instruction!"); @@ -2859,7 +2859,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { return I; if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) { - Value *V = evaluateInDifferentElementOrder(LHS, Mask); + Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder); return replaceInstUsesWith(SVI, V); } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll index 609dac52038e..11b8a4485924 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s +; RUN: opt -S -passes=instcombine -instcombine-infinite-loop-threshold=2 -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s ; -------------------------------------------------------------------- ; llvm.amdgcn.buffer.load @@ -3029,8 +3029,8 @@ define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[DATA]], i64 0 -; CHECK-NEXT: ret <2 x float> [[TMP1]] +; CHECK-NEXT: [[SHUF:%.*]] = insertelement <2 x float> undef, float [[DATA]], i64 0 +; CHECK-NEXT: ret <2 x float> [[SHUF]] ; %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> @@ -3070,8 +3070,8 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x float> undef, float [[DATA]], i64 0 -; CHECK-NEXT: ret <3 x float> [[TMP1]] +; CHECK-NEXT: [[SHUF:%.*]] = insertelement <3 x float> undef, float [[DATA]], i64 0 +; CHECK-NEXT: ret <3 x float> [[SHUF]] ; %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |