summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCaprYang <capryang@gmail.com>2023-05-17 21:39:36 +0100
committerMatt Arsenault <arsenm2@gmail.com>2023-05-17 23:40:06 +0100
commit44096e6904e10bb313fef2f6aaff25c25d1325f7 (patch)
tree946f887609dcfd38080a4fae4c93c9039d2a9fad
parentdd61b63b5ca1c8a15013e2154bf3b30243df66bd (diff)
downloadllvm-44096e6904e10bb313fef2f6aaff25c25d1325f7.tar.gz
[InferAddressSpaces] Handle vector of pointers type & Support intrinsic masked gather/scatter
-rw-r--r--llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp79
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll3
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll36
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/vector-of-pointers.ll115
4 files changed, 204 insertions, 29 deletions
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index b6713730bfa9..a82be5b973cf 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -256,6 +256,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
false, false)
+static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) {
+ assert(Ty->isPtrOrPtrVectorTy());
+ PointerType *NPT = PointerType::get(Ty->getContext(), NewAddrSpace);
+ return Ty->getWithNewType(NPT);
+}
+
// Check whether that's no-op pointer bicast using a pair of
// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
// different address spaces.
@@ -301,14 +307,14 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
switch (Op->getOpcode()) {
case Instruction::PHI:
- assert(Op->getType()->isPointerTy());
+ assert(Op->getType()->isPtrOrPtrVectorTy());
return true;
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
return true;
case Instruction::Select:
- return Op->getType()->isPointerTy();
+ return Op->getType()->isPtrOrPtrVectorTy();
case Instruction::Call: {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
return II && II->getIntrinsicID() == Intrinsic::ptrmask;
@@ -373,6 +379,24 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
case Intrinsic::ptrmask:
// This is handled as an address expression, not as a use memory operation.
return false;
+ case Intrinsic::masked_gather: {
+ Type *RetTy = II->getType();
+ Type *NewPtrTy = NewV->getType();
+ Function *NewDecl =
+ Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy});
+ II->setArgOperand(0, NewV);
+ II->setCalledFunction(NewDecl);
+ return true;
+ }
+ case Intrinsic::masked_scatter: {
+ Type *ValueTy = II->getOperand(0)->getType();
+ Type *NewPtrTy = NewV->getType();
+ Function *NewDecl =
+ Intrinsic::getDeclaration(M, II->getIntrinsicID(), {ValueTy, NewPtrTy});
+ II->setArgOperand(1, NewV);
+ II->setCalledFunction(NewDecl);
+ return true;
+ }
default: {
Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
if (!Rewrite)
@@ -394,6 +418,14 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
+ case Intrinsic::masked_gather:
+ appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
+ PostorderStack, Visited);
+ break;
+ case Intrinsic::masked_scatter:
+ appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1),
+ PostorderStack, Visited);
+ break;
default:
SmallVector<int, 2> OpIndexes;
if (TTI->collectFlatAddressOperands(OpIndexes, IID)) {
@@ -412,7 +444,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
DenseSet<Value *> &Visited) const {
- assert(V->getType()->isPointerTy());
+ assert(V->getType()->isPtrOrPtrVectorTy());
// Generic addressing expressions may be hidden in nested constant
// expressions.
@@ -460,8 +492,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
// addressing calculations may also be faster.
for (Instruction &I : instructions(F)) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- if (!GEP->getType()->isVectorTy())
- PushPtrOperand(GEP->getPointerOperand());
+ PushPtrOperand(GEP->getPointerOperand());
} else if (auto *LI = dyn_cast<LoadInst>(&I))
PushPtrOperand(LI->getPointerOperand());
else if (auto *SI = dyn_cast<StoreInst>(&I))
@@ -480,14 +511,12 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
} else if (auto *II = dyn_cast<IntrinsicInst>(&I))
collectRewritableIntrinsicOperands(II, PostorderStack, Visited);
else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {
- // FIXME: Handle vectors of pointers
- if (Cmp->getOperand(0)->getType()->isPointerTy()) {
+ if (Cmp->getOperand(0)->getType()->isPtrOrPtrVectorTy()) {
PushPtrOperand(Cmp->getOperand(0));
PushPtrOperand(Cmp->getOperand(1));
}
} else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (!ASC->getType()->isVectorTy())
- PushPtrOperand(ASC->getPointerOperand());
+ PushPtrOperand(ASC->getPointerOperand());
} else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {
if (isNoopPtrIntCastPair(cast<Operator>(I2P), *DL, TTI))
PushPtrOperand(
@@ -529,8 +558,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
SmallVectorImpl<const Use *> *UndefUsesToFix) {
Value *Operand = OperandUse.get();
- Type *NewPtrTy = PointerType::getWithSamePointeeType(
- cast<PointerType>(Operand->getType()), NewAddrSpace);
+ Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAddrSpace);
if (Constant *C = dyn_cast<Constant>(Operand))
return ConstantExpr::getAddrSpaceCast(C, NewPtrTy);
@@ -543,8 +571,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
if (I != PredicatedAS.end()) {
// Insert an addrspacecast on that operand before the user.
unsigned NewAS = I->second;
- Type *NewPtrTy = PointerType::getWithSamePointeeType(
- cast<PointerType>(Operand->getType()), NewAS);
+ Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAS);
auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
NewI->insertBefore(Inst);
NewI->setDebugLoc(Inst->getDebugLoc());
@@ -572,8 +599,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
const ValueToValueMapTy &ValueWithNewAddrSpace,
const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
- Type *NewPtrType = PointerType::getWithSamePointeeType(
- cast<PointerType>(I->getType()), NewAddrSpace);
+ Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(I->getType(), NewAddrSpace);
if (I->getOpcode() == Instruction::AddrSpaceCast) {
Value *Src = I->getOperand(0);
@@ -607,8 +633,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
if (AS != UninitializedAddressSpace) {
// For the assumed address space, insert an `addrspacecast` to make that
// explicit.
- Type *NewPtrTy = PointerType::getWithSamePointeeType(
- cast<PointerType>(I->getType()), AS);
+ Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(I->getType(), AS);
auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
NewI->insertAfter(I);
return NewI;
@@ -617,7 +642,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
// Computes the converted pointer operands.
SmallVector<Value *, 4> NewPointerOperands;
for (const Use &OperandUse : I->operands()) {
- if (!OperandUse.get()->getType()->isPointerTy())
+ if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy())
NewPointerOperands.push_back(nullptr);
else
NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
@@ -629,7 +654,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
case Instruction::BitCast:
return new BitCastInst(NewPointerOperands[0], NewPtrType);
case Instruction::PHI: {
- assert(I->getType()->isPointerTy());
+ assert(I->getType()->isPtrOrPtrVectorTy());
PHINode *PHI = cast<PHINode>(I);
PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues());
for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {
@@ -648,7 +673,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
return NewGEP;
}
case Instruction::Select:
- assert(I->getType()->isPointerTy());
+ assert(I->getType()->isPtrOrPtrVectorTy());
return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
NewPointerOperands[2], "", nullptr, I);
case Instruction::IntToPtr: {
@@ -674,10 +699,10 @@ static Value *cloneConstantExprWithNewAddressSpace(
ConstantExpr *CE, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
const TargetTransformInfo *TTI) {
- Type *TargetType = CE->getType()->isPointerTy()
- ? PointerType::getWithSamePointeeType(
- cast<PointerType>(CE->getType()), NewAddrSpace)
- : CE->getType();
+ Type *TargetType =
+ CE->getType()->isPtrOrPtrVectorTy()
+ ? getPtrOrVecOfPtrsWithNewAS(CE->getType(), NewAddrSpace)
+ : CE->getType();
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
// Because CE is flat, the source address space must be specific.
@@ -1226,9 +1251,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
unsigned NewAS = NewV->getType()->getPointerAddressSpace();
if (ASC->getDestAddressSpace() == NewAS) {
- if (!cast<PointerType>(ASC->getType())
- ->hasSameElementTypeAs(
- cast<PointerType>(NewV->getType()))) {
+ if (!cast<PointerType>(ASC->getType()->getScalarType())
+ ->hasSameElementTypeAs(
+ cast<PointerType>(NewV->getType()->getScalarType()))) {
BasicBlock::iterator InsertPos;
if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
InsertPos = std::next(NewVInst->getIterator());
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
index b9e0c1334908..0c8d7a26a513 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
@@ -147,9 +147,8 @@ define i1 @icmp_mismatch_flat_group_private_cmp_undef_swap(ptr addrspace(3) %gro
ret i1 %cmp
}
-; TODO: Should be handled
; CHECK-LABEL: @icmp_flat_flat_from_group_vector(
-; CHECK: %cmp = icmp eq <2 x ptr> %cast0, %cast1
+; CHECK: %cmp = icmp eq <2 x ptr addrspace(3)> %group.ptr.0, %group.ptr.1
define <2 x i1> @icmp_flat_flat_from_group_vector(<2 x ptr addrspace(3)> %group.ptr.0, <2 x ptr addrspace(3)> %group.ptr.1) #0 {
%cast0 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.0 to <2 x ptr>
%cast1 = addrspacecast <2 x ptr addrspace(3)> %group.ptr.1 to <2 x ptr>
diff --git a/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll b/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll
new file mode 100644
index 000000000000..9e051ae63bfc
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/masked-gather-scatter.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes=infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck %s
+
+define <4 x i32> @masked_gather_inferas(ptr addrspace(1) %out, <4 x i64> %index) {
+; CHECK-LABEL: define <4 x i32> @masked_gather_inferas
+; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <4 x i64> [[INDEX:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT]], <4 x i64> [[INDEX]]
+; CHECK-NEXT: [[VALUE:%.*]] = tail call <4 x i32> @llvm.masked.gather.v4i32.v4p1(<4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT: ret <4 x i32> [[VALUE]]
+;
+entry:
+ %out.1 = addrspacecast ptr addrspace(1) %out to ptr
+ %ptrs = getelementptr inbounds i32, ptr %out.1, <4 x i64> %index
+ %value = tail call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+ ret <4 x i32> %value
+}
+
+define void @masked_scatter_inferas(ptr addrspace(1) %out, <4 x i64> %index, <4 x i32> %value) {
+; CHECK-LABEL: define void @masked_scatter_inferas
+; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], <4 x i64> [[INDEX:%.*]], <4 x i32> [[VALUE:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTRS:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT]], <4 x i64> [[INDEX]]
+; CHECK-NEXT: tail call void @llvm.masked.scatter.v4i32.v4p1(<4 x i32> [[VALUE]], <4 x ptr addrspace(1)> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: ret void
+;
+entry:
+ %out.1 = addrspacecast ptr addrspace(1) %out to ptr
+ %ptrs = getelementptr inbounds i32, ptr %out.1, <4 x i64> %index
+ tail call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %value, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ ret void
+}
+
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x i32>)
+
+declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, <4 x i1>)
diff --git a/llvm/test/Transforms/InferAddressSpaces/vector-of-pointers.ll b/llvm/test/Transforms/InferAddressSpaces/vector-of-pointers.ll
new file mode 100644
index 000000000000..f489bbe32347
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/vector-of-pointers.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes=infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck %s
+
+define void @double_ascast(<4 x ptr addrspace(3)> %input) {
+; CHECK-LABEL: define void @double_ascast
+; CHECK-SAME: (<4 x ptr addrspace(3)> [[INPUT:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[INPUT]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %tmp0 = addrspacecast <4 x ptr addrspace(3)> %input to <4 x ptr>
+ %tmp1 = addrspacecast <4 x ptr> %tmp0 to <4 x ptr addrspace(3)>
+ call void @use(<4 x ptr addrspace(3)> %tmp1)
+ ret void
+}
+
+define void @double_gep(ptr addrspace(3) %input, <4 x i64> %i, i64 %j) {
+; CHECK-LABEL: define void @double_gep
+; CHECK-SAME: (ptr addrspace(3) [[INPUT:%.*]], <4 x i64> [[I:%.*]], i64 [[J:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(3) [[INPUT]], <4 x i64> [[I]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, <4 x ptr addrspace(3)> [[TMP1]], i64 [[J]]
+; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[TMP2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %tmp0 = addrspacecast ptr addrspace(3) %input to ptr
+ %tmp1 = getelementptr float, ptr %tmp0, <4 x i64> %i
+ %tmp2 = getelementptr float, <4 x ptr> %tmp1, i64 %j
+ %tmp3 = addrspacecast <4 x ptr> %tmp2 to <4 x ptr addrspace(3)>
+ call void @use(<4 x ptr addrspace(3)> %tmp3)
+ ret void
+}
+
+define void @inferas_phi(<4 x ptr addrspace(3)> %input, i1 %cond) {
+; CHECK-LABEL: define void @inferas_phi
+; CHECK-SAME: (<4 x ptr addrspace(3)> [[INPUT:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND]], label [[INC:%.*]], label [[END:%.*]]
+; CHECK: inc:
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, <4 x ptr addrspace(3)> [[INPUT]], i64 1
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x ptr addrspace(3)> [ [[INPUT]], [[ENTRY:%.*]] ], [ [[TMP1]], [[INC]] ]
+; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[TMP2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %tmp0 = addrspacecast <4 x ptr addrspace(3)> %input to <4 x ptr>
+ br i1 %cond, label %inc, label %end
+
+inc:
+ %tmp1 = getelementptr float, <4 x ptr> %tmp0, i64 1
+ br label %end
+
+end:
+ %tmp2 = phi <4 x ptr> [ %tmp0, %entry ], [ %tmp1, %inc ]
+ %tmp3 = addrspacecast <4 x ptr> %tmp2 to <4 x ptr addrspace(3)>
+ call void @use(<4 x ptr addrspace(3)> %tmp3)
+ ret void
+}
+
+define void @inferas_ptr2int2ptr(<4 x ptr addrspace(3)> %input) {
+; CHECK-LABEL: define void @inferas_ptr2int2ptr
+; CHECK-SAME: (<4 x ptr addrspace(3)> [[INPUT:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[INPUT]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %tmp0 = addrspacecast <4 x ptr addrspace(3)> %input to <4 x ptr>
+ %tmp1 = ptrtoint <4 x ptr> %tmp0 to <4 x i64>
+ %tmp2 = inttoptr <4 x i64> %tmp1 to <4 x ptr>
+ %tmp3 = addrspacecast <4 x ptr> %tmp2 to <4 x ptr addrspace(3)>
+ call void @use(<4 x ptr addrspace(3)> %tmp3)
+ ret void
+}
+
+define void @inferas_loop(<4 x ptr addrspace(3)> %begin, <4 x ptr addrspace(3)> %end) {
+; CHECK-LABEL: define void @inferas_loop
+; CHECK-SAME: (<4 x ptr addrspace(3)> [[BEGIN:%.*]], <4 x ptr addrspace(3)> [[END:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[NOW:%.*]] = phi <4 x ptr addrspace(3)> [ [[BEGIN]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: call void @use(<4 x ptr addrspace(3)> [[NOW]])
+; CHECK-NEXT: [[NEXT]] = getelementptr float, <4 x ptr addrspace(3)> [[NOW]], i64 1
+; CHECK-NEXT: [[VEQ:%.*]] = icmp eq <4 x ptr addrspace(3)> [[NEXT]], [[END]]
+; CHECK-NEXT: [[MASK:%.*]] = bitcast <4 x i1> [[VEQ]] to i4
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i4 [[MASK]], 0
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %begin0 = addrspacecast <4 x ptr addrspace(3)> %begin to <4 x ptr>
+ %end0 = addrspacecast <4 x ptr addrspace(3)> %end to <4 x ptr>
+ br label %loop
+
+loop:
+ %now = phi <4 x ptr> [ %begin0, %entry ], [ %next, %loop ]
+ %now3 = addrspacecast <4 x ptr> %now to <4 x ptr addrspace(3)>
+ call void @use(<4 x ptr addrspace(3)> %now3)
+ %next = getelementptr float, <4 x ptr> %now, i64 1
+ %veq = icmp eq <4 x ptr> %next, %end0
+ %mask = bitcast <4 x i1> %veq to i4
+ %cond = icmp eq i4 %mask, 0
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+declare void @use(<4 x ptr addrspace(3)>)