diff options
Diffstat (limited to 'test/Transforms')
29 files changed, 2884 insertions, 396 deletions
diff --git a/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll b/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll new file mode 100644 index 000000000000..d1d854d8f457 --- /dev/null +++ b/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll @@ -0,0 +1,339 @@ +; RUN: opt < %s -callsite-splitting -S | FileCheck %s +; RUN: opt < %s -passes='function(callsite-splitting)' -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linaro-linux-gnueabi" + +;CHECK-LABEL: @test_eq_eq +;CHECK-LABEL: Tail.predBB1.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 1) +;CHECK-LABEL: Tail.predBB2.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 2) +;CHECK-LABEL: Tail +;CHECK: %p = phi i32 [ 1, %Tail.predBB1.split ], [ 2, %Tail.predBB2.split ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +;CHECK: ret i32 %[[MERGED]] +define i32 @test_eq_eq(i32* %a, i32 %v) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %Tail, label %TBB + +TBB: + %cmp = icmp eq i32 %v, 1 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_ne_eq +;CHECK-LABEL: Tail.predBB1.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 1) +;CHECK-LABEL: Tail.predBB2.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 2) +;CHECK-LABEL: Tail +;CHECK: %p = phi i32 [ 1, %Tail.predBB1.split ], [ 2, %Tail.predBB2.split ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +;CHECK: ret i32 %[[MERGED]] +define i32 @test_ne_eq(i32* %a, i32 %v) { +Header: + %tobool1 = icmp ne i32* %a, null + br i1 %tobool1, label %Tail, label %TBB + +TBB: + %cmp = icmp eq i32 %v, 1 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_ne_ne +;CHECK-LABEL: Tail.predBB1.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 1) +;CHECK-LABEL: Tail.predBB2.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 2) +;CHECK-LABEL: Tail +;CHECK: %p = phi i32 [ 1, %Tail.predBB1.split ], [ 2, %Tail.predBB2.split ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +;CHECK: ret i32 %[[MERGED]] +define i32 @test_ne_ne(i32* %a, i32 %v) { +Header: + %tobool1 = icmp ne i32* %a, null + br i1 %tobool1, label %Tail, label %TBB + +TBB: + %cmp = icmp ne i32 %v, 1 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_eq_eq_untaken +;CHECK-LABEL: Tail.predBB1.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 1) +;CHECK-LABEL: Tail.predBB2.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 2) +;CHECK-LABEL: Tail +;CHECK: %p = phi i32 [ 1, %Tail.predBB1.split ], [ 2, %Tail.predBB2.split ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +;CHECK: ret i32 %[[MERGED]] +define i32 @test_eq_eq_untaken(i32* %a, i32 %v) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %TBB, label %Tail + +TBB: + %cmp = icmp eq i32 %v, 1 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_ne_eq_untaken +;CHECK-LABEL: Tail.predBB1.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 1) +;CHECK-LABEL: Tail.predBB2.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 2) +;CHECK-LABEL: Tail +;CHECK: %p = phi i32 [ 1, %Tail.predBB1.split ], [ 2, %Tail.predBB2.split ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +;CHECK: ret i32 %[[MERGED]] +define i32 @test_ne_eq_untaken(i32* %a, i32 %v) { +Header: + %tobool1 = icmp ne i32* %a, null + br i1 %tobool1, label %TBB, label %Tail + +TBB: + %cmp = icmp eq i32 %v, 1 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_ne_ne_untaken +;CHECK-LABEL: Tail.predBB1.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 1) +;CHECK-LABEL: Tail.predBB2.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 2) +;CHECK-LABEL: Tail +;CHECK: %p = phi i32 [ 1, %Tail.predBB1.split ], [ 2, %Tail.predBB2.split ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +;CHECK: ret i32 %[[MERGED]] +define i32 @test_ne_ne_untaken(i32* %a, i32 %v) { +Header: + %tobool1 = icmp ne i32* %a, null + br i1 %tobool1, label %TBB, label %Tail + +TBB: + %cmp = icmp ne i32 %v, 1 + br i1 %cmp, label %End, label %Tail + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_nonconst_const_phi +;CHECK-LABEL: Tail.predBB1.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 1) +;CHECK-LABEL: Tail.predBB2.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 2) +;CHECK-LABEL: Tail +;CHECK: %p = phi i32 [ 1, %Tail.predBB1.split ], [ 2, %Tail.predBB2.split ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +;CHECK: ret i32 %[[MERGED]] +define i32 @test_nonconst_const_phi(i32* %a, i32* %b, i32 %v) { +Header: + %tobool1 = icmp eq i32* %a, %b + br i1 %tobool1, label %Tail, label %TBB + +TBB: + %cmp = icmp eq i32 %v, 1 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_nonconst_nonconst_phi +;CHECK-LABEL: Tail.predBB1.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 1) +;CHECK-LABEL: Tail.predBB2.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 2) +;CHECK-LABEL: Tail +;CHECK: %p = phi i32 [ 1, %Tail.predBB1.split ], [ 2, %Tail.predBB2.split ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +;CHECK: ret i32 %[[MERGED]] +define i32 @test_nonconst_nonconst_phi(i32* %a, i32* %b, i32 %v, i32 %v2) { +Header: + %tobool1 = icmp eq i32* %a, %b + br i1 %tobool1, label %Tail, label %TBB + +TBB: + %cmp = icmp eq i32 %v, %v2 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_nonconst_nonconst_phi_noncost +;CHECK-NOT: Tail.predBB1.split: +;CHECK-NOT: Tail.predBB2.split: +;CHECK-LABEL: Tail: +;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) +;CHECK: ret i32 %r +define i32 @test_nonconst_nonconst_phi_noncost(i32* %a, i32* %b, i32 %v, i32 %v2) { +Header: + %tobool1 = icmp eq i32* %a, %b + br i1 %tobool1, label %Tail, label %TBB + +TBB: + %cmp = icmp eq i32 %v, %v2 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[%v,%Header], [%v2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_fisrtnonphi +;CHECK-NOT: Tail.predBB1.split: +;CHECK-NOT: Tail.predBB2.split: +;CHECK-LABEL: Tail: +;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) +;CHECK: ret i32 %r +define i32 @test_fisrtnonphi(i32* %a, i32 %v) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %Tail, label %TBB + +TBB: + %cmp = icmp eq i32 %v, 1 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + store i32 %v, i32* %a + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_3preds_constphi +;CHECK-NOT: Tail.predBB1.split: +;CHECK-NOT: Tail.predBB2.split: +;CHECK-LABEL: Tail: +;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) +;CHECK: ret i32 %r +define i32 @test_3preds_constphi(i32* %a, i32 %v, i1 %c1, i1 %c2, i1 %c3) { +Header: + br i1 %c1, label %Tail, label %TBB1 + +TBB1: + br i1 %c2, label %Tail, label %TBB2 + +TBB2: + br i1 %c3, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB1], [3, %TBB2] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +;CHECK-LABEL: @test_indirectbr_phi +;CHECK-NOT: Tail.predBB1.split: +;CHECK-NOT: Tail.predBB2.split: +;CHECK-LABEL: Tail: +;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) +;CHECK: ret i32 %r +define i32 @test_indirectbr_phi(i8* %address, i32* %a, i32* %b, i32 %v) { +Header: + %indirect.goto.dest = select i1 undef, i8* blockaddress(@test_indirectbr_phi, %End), i8* %address + indirectbr i8* %indirect.goto.dest, [label %TBB, label %Tail] + +TBB: + %indirect.goto.dest2 = select i1 undef, i8* blockaddress(@test_indirectbr_phi, %End), i8* %address + indirectbr i8* %indirect.goto.dest2, [label %Tail, label %End] + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +define i32 @callee(i32* %a, i32 %v, i32 %p) { +entry: + %c = icmp ne i32* %a, null + br i1 %c, label %BB1, label %BB2 + +BB1: + call void @dummy(i32* %a, i32 %p) + br label %End + +BB2: + call void @dummy2(i32 %v, i32 %p) + br label %End + +End: + ret i32 %p +} + +declare void @dummy(i32*, i32) +declare void @dummy2(i32, i32) diff --git a/test/Transforms/CallSiteSplitting/callsite-split.ll b/test/Transforms/CallSiteSplitting/callsite-split.ll new file mode 100644 index 000000000000..419fa738563c --- /dev/null +++ b/test/Transforms/CallSiteSplitting/callsite-split.ll @@ -0,0 +1,119 @@ +; RUN: opt < %s -callsite-splitting -inline -instcombine -jump-threading -S | FileCheck %s +; RUN: opt < %s -passes='function(callsite-splitting),cgscc(inline),function(instcombine,jump-threading)' -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linaro-linux-gnueabi" + +%struct.bitmap = type { i32, %struct.bitmap* } + +;CHECK-LABEL: @caller +;CHECK-LABEL: NextCond: +;CHECK: br {{.*}} label %callee.exit +;CHECK-LABEL: CallSiteBB.predBB1.split: +;CHECK: call void @callee(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false) +;CHECK-LABEL: callee.exit: +;CHECK: call void @dummy2(%struct.bitmap* %a_elt) + +define void @caller(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt) { +entry: + br label %Top + +Top: + %tobool1 = icmp eq %struct.bitmap* %a_elt, null + br i1 %tobool1, label %CallSiteBB, label %NextCond + +NextCond: + %cmp = icmp ne %struct.bitmap* %b_elt, null + br i1 %cmp, label %CallSiteBB, label %End + +CallSiteBB: + %p = phi i1 [0, %Top], [%c, %NextCond] + call void @callee(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %p) + br label %End + +End: + ret void +} + +define void @callee(%struct.bitmap* %dst_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %c) { +entry: + %tobool = icmp ne %struct.bitmap* %a_elt, null + %tobool1 = icmp ne %struct.bitmap* %b_elt, null + %or.cond = and i1 %tobool, %tobool1 + br i1 %or.cond, label %Cond, label %Big + +Cond: + %cmp = icmp eq %struct.bitmap* %dst_elt, %a_elt + br i1 %cmp, label %Small, label %Big + +Small: + call void @dummy2(%struct.bitmap* %a_elt) + br label %End + +Big: + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + br label %End + +End: + ret void +} + +declare void @dummy2(%struct.bitmap*) +declare void @dummy1(%struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*) + + +;CHECK-LABEL: @caller2 +;CHECK-LABEL: CallSiteBB.predBB1.split: +;CHECK: call void @dummy4() +;CHECK-LABEL: CallSiteBB.predBB2.split: +;CHECK: call void @dummy3() +;CheCK-LABEL: CallSiteBB: +;CHECK: %phi.call = phi i1 [ false, %CallSiteBB.predBB1.split ], [ true, %CallSiteBB.predBB2.split ] +;CHECK: call void @foo(i1 %phi.call) +define void @caller2(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, %struct.bitmap* %c_elt) { +entry: + br label %Top + +Top: + %tobool1 = icmp eq %struct.bitmap* %a_elt, %b_elt + br i1 %tobool1, label %CallSiteBB, label %NextCond + +NextCond: + %cmp = icmp ne %struct.bitmap* %b_elt, %c_elt + br i1 %cmp, label %CallSiteBB, label %End + +CallSiteBB: + %phi = phi i1 [0, %Top],[1, %NextCond] + %u = call i1 @callee2(i1 %phi) + call void @foo(i1 %u) + br label %End + +End: + ret void +} + +define i1 @callee2(i1 %b) { +entry: + br i1 %b, label %BB1, label %BB2 + +BB1: + call void @dummy3() + br label %End + +BB2: + call void @dummy4() + br label %End + +End: + ret i1 %b +} + +declare void @dummy3() +declare void @dummy4() +declare void @foo(i1) diff --git a/test/Transforms/CodeExtractor/PartialInlineNoInline.ll b/test/Transforms/CodeExtractor/PartialInlineNoInline.ll new file mode 100644 index 000000000000..6c0b83298d23 --- /dev/null +++ b/test/Transforms/CodeExtractor/PartialInlineNoInline.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -partial-inliner -S -stats -pass-remarks=partial-inlining 2>&1 | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -S -stats -pass-remarks=partial-inlining 2>&1 | FileCheck %s + +@stat = external global i32, align 4 + +define i32 @inline_fail(i32 %count, ...) { +entry: + %vargs = alloca i8*, align 8 + %vargs1 = bitcast i8** %vargs to i8* + call void @llvm.va_start(i8* %vargs1) + %stat1 = load i32, i32* @stat, align 4 + %cmp = icmp slt i32 %stat1, 0 + br i1 %cmp, label %bb2, label %bb1 + +bb1: ; preds = %entry + %vg1 = add nsw i32 %stat1, 1 + store i32 %vg1, i32* @stat, align 4 + %va1 = va_arg i8** %vargs, i32 + call void @foo(i32 %count, i32 %va1) #2 + br label %bb2 + +bb2: ; preds = %bb1, %entry + %res = phi i32 [ 1, %bb1 ], [ 0, %entry ] + call void @llvm.va_end(i8* %vargs1) + ret i32 %res +} + +define i32 @caller(i32 %arg) { +bb: + %res = tail call i32 (i32, ...) @inline_fail(i32 %arg, i32 %arg) + ret i32 %res +} + +declare void @foo(i32, i32) +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) + +; Check that no remarks have been emitted, inline_fail has not been partial +; inlined, no code has been extracted and the partial-inlining counter +; has not been incremented. + +; CHECK-NOT: remark +; CHECK: tail call i32 (i32, ...) @inline_fail(i32 %arg, i32 %arg) +; CHECK-NOT: inline_fail.1_bb1 +; CHECK-NOT: partial-inlining diff --git a/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll b/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll new file mode 100644 index 000000000000..06a513543c45 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -codegenprepare -mtriple=thumbv7m -disable-complex-addr-modes=false -addr-sink-new-select=true < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +; Select between two geps with different base, same constant offset +define void @test_select_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) { +; CHECK-LABEL: @test_select_twogep_base +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll new file mode 100644 index 000000000000..2bacbdd7f400 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll @@ -0,0 +1,475 @@ +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=true -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-YES +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-phis=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; Can we sink for different base if there is no phi for base? +define i32 @test1(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test1 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base? +define i32 @test2(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test2 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: getelementptr i8, {{.+}} 40 + %b = phi i64* [%b1, %entry], [%b2, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base but not valid one? +define i32 @test3(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test3 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %b = phi i64* [%b2, %entry], [%b1, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if both addresses are in the same block? +define i32 @test4(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test4 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base? +; Both addresses are in the same block. +define i32 @test5(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test5 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK: getelementptr i8, {{.+}} 40 + %b = phi i64* [%b1, %entry], [%b2, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; Can we sink for different base if there is phi for base but not valid one? +; Both addresses are in the same block. +define i32 @test6(i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test6 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: load + %b = phi i64* [%b2, %entry], [%b1, %if.then] + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %v = load i32, i32* %c, align 4 + ret i32 %v +} + +; case with a loop. No phi node. +define i32 @test7(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test7 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; case with a loop. There is phi node. +define i32 @test8(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test8 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + %b3 = phi i64* [%b1, %entry], [%b, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: getelementptr i8, {{.+}} 40 + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b = phi i64* [%b3, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; case with a loop. There is phi node but it does not fit. +define i32 @test9(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test9 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi + %iv = phi i32 [0, %entry], [%iv.inc, %fallthrough] + %c3 = phi i32* [%c1, %entry], [%c, %fallthrough] + %b3 = phi i64* [%b1, %entry], [%b2, %fallthrough] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b = phi i64* [%b3, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %v +} + +; Case through a loop. No phi node. +define i32 @test10(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test10 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: br + %c = phi i32* [%c1, %entry], [%c2, %if.then] + br label %loop + +loop: + %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop] + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: +; CHECK-YES: sunkaddr + %v = load volatile i32, i32* %c, align 4 + ret i32 %v +} + +; Case through a loop. There is a phi. +define i32 @test11(i32 %N, i1 %cond, i64* %b1, i64* %b2) { +; CHECK-LABEL: @test11 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK: phi +; CHECK: phi +; CHECK: br + %c = phi i32* [%c1, %entry], [%c2, %if.then] + %b = phi i64* [%b1, %entry], [%b2, %if.then] + br label %loop + +loop: + %iv = phi i32 [0, %fallthrough], [%iv.inc, %loop] + %iv.inc = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.inc, %N + br i1 %cmp, label %loop, label %exit + +exit: +; CHECK: sunkaddr + %v = load volatile i32, i32* %c, align 4 + ret i32 %v +} + +; Complex case with address value from previous iteration. +define i32 @test12(i32 %N, i1 %cond, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test12 +entry: + %a1 = getelementptr inbounds i64, i64* %b1, i64 5 + %c1 = bitcast i64* %a1 to i32* + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK-YES: sunk_phi +; CHECK-NO: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: br + %iv = phi i32 [0, %entry], [%iv.inc, %backedge] + %c3 = phi i32* [%c1, %entry], [%c, %backedge] + %b4 = phi i64* [%b1, %entry], [%b5, %backedge] + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %a2 = getelementptr inbounds i64, i64* %b2, i64 5 + %c2 = bitcast i64* %a2 to i32* + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO: phi +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: load + %c = phi i32* [%c3, %loop], [%c2, %if.then] + %b6 = phi i64* [%b4, %loop], [%b2, %if.then] + %v = load volatile i32, i32* %c, align 4 + %a4 = getelementptr inbounds i64, i64* %b4, i64 5 + %c4 = bitcast i64* %a4 to i32* + %cmp = icmp slt i32 %iv, 20 + br i1 %cmp, label %backedge, label %if.then.2 + +if.then.2: + br label %backedge + +backedge: + %b5 = phi i64* [%b4, %fallthrough], [%b6, %if.then.2] + %iv.inc = add i32 %iv, 1 + %cmp2 = icmp slt i32 %iv.inc, %N + br i1 %cmp2, label %loop, label %exit + +exit: + ret i32 %v +} + +%struct.S = type {i32, i32} +; Case with index +define i32 @test13(i1 %cond, %struct.S* %b1, %struct.S* %b2, i64 %Index) { +; CHECK-LABEL: @test13 +entry: + %a1 = getelementptr inbounds %struct.S, %struct.S* %b1, i64 %Index, i32 1 + br i1 %cond, label %if.then, label %fallthrough + +if.then: + %i2 = mul i64 %Index, 2 + %a2 = getelementptr inbounds %struct.S, %struct.S* %b2, i64 %Index, i32 1 + br label %fallthrough + +fallthrough: +; CHECK-YES: sunk_phi +; CHECK-NO-LABEL: fallthrough: +; CHECK-NO-NEXT: phi +; CHECK-NO-NEXT: load + %a = phi i32* [%a1, %entry], [%a2, %if.then] + %v = load i32, i32* %a, align 4 + ret i32 %v +} + +; Select of Select case. +define i64 @test14(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test14 +entry: +; CHECK-LABEL: entry: + %g1 = getelementptr inbounds i64, i64* %b1, i64 5 + %g2 = getelementptr inbounds i64, i64* %b2, i64 5 + %g3 = getelementptr inbounds i64, i64* %b3, i64 5 + %s1 = select i1 %c1, i64* %g1, i64* %g2 + %s2 = select i1 %c2, i64* %s1, i64* %g3 +; CHECK: sunkaddr + %v = load i64 , i64* %s2, align 8 + ret i64 %v +} + +; Select of Phi case. +define i64 @test15(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test15 +entry: + %g1 = getelementptr inbounds i64, i64* %b1, i64 5 + %g2 = getelementptr inbounds i64, i64* %b2, i64 5 + %g3 = getelementptr inbounds i64, i64* %b3, i64 5 + br i1 %c1, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: + %p1 = phi i64* [%g1, %entry], [%g2, %if.then] + %s1 = select i1 %c2, i64* %p1, i64* %g3 +; CHECK-YES: sunkaddr +; CHECK-NO: phi +; CHECK-NO-NEXT: select +; CHECK-NO-NEXT: load + %v = load i64 , i64* %s1, align 8 + ret i64 %v +} + +; Select of Phi case. Phi exists +define i64 @test16(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test16 +entry: + %g1 = getelementptr inbounds i64, i64* %b1, i64 5 + %g2 = getelementptr inbounds i64, i64* %b2, i64 5 + %g3 = getelementptr inbounds i64, i64* %b3, i64 5 + br i1 %c1, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: + %p = phi i64* [%b1, %entry], [%b2, %if.then] + %p1 = phi i64* [%g1, %entry], [%g2, %if.then] + %s1 = select i1 %c2, i64* %p1, i64* %g3 +; CHECK: sunkaddr + %v = load i64 , i64* %s1, align 8 + ret i64 %v +} + +; Phi of Select case. +define i64 @test17(i1 %c1, i1 %c2, i64* %b1, i64* %b2, i64* %b3) { +; CHECK-LABEL: @test17 +entry: + %g1 = getelementptr inbounds i64, i64* %b1, i64 5 + %g2 = getelementptr inbounds i64, i64* %b2, i64 5 + %g3 = getelementptr inbounds i64, i64* %b3, i64 5 + %s1 = select i1 %c2, i64* %g1, i64* %g2 + br i1 %c1, label %if.then, label %fallthrough + +if.then: + br label %fallthrough + +fallthrough: +; CHECK-LABEL: fallthrough: + %p1 = phi i64* [%s1, %entry], [%g3, %if.then] +; CHECK-YES: sunkaddr +; CHECK-NO: phi +; CHECK-NO-NEXT: load + %v = load i64 , i64* %p1, align 8 + ret i64 %v +} diff --git a/test/Transforms/ExpandMemCmp/X86/lit.local.cfg b/test/Transforms/ExpandMemCmp/X86/lit.local.cfg new file mode 100644 index 000000000000..e71f3cc4c41e --- /dev/null +++ b/test/Transforms/ExpandMemCmp/X86/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'X86' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/ExpandMemCmp/X86/memcmp.ll index a4f635c956df..1abfb20f3696 100644 --- a/test/Transforms/CodeGenPrepare/X86/memcmp.ll +++ b/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -codegenprepare -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32 -; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; RUN: opt -S -expandmemcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32 +; RUN: opt -S -expandmemcmp -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64 declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) @@ -23,30 +23,33 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp3( -; ALL-NEXT: loadbb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16* -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]] -; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) -; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; ALL-NEXT: [[TMP6:%.*]] = icmp eq i16 [[TMP4]], [[TMP5]] -; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: -; ALL-NEXT: [[TMP7:%.*]] = icmp ult i16 [[TMP4]], [[TMP5]] -; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 +; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i16 [ [[TMP7:%.*]], [[LOADBB]] ] +; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i16 [ [[TMP8:%.*]], [[LOADBB]] ] +; ALL-NEXT: [[TMP1:%.*]] = icmp ult i16 [[PHI_SRC1]], [[PHI_SRC2]] +; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; ALL-NEXT: br label [[ENDBLOCK:%.*]] +; ALL: loadbb: +; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i16* +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i16* +; ALL-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]] +; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]] +; ALL-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]]) +; ALL-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]]) +; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]] +; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; ALL: loadbb1: -; ALL-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 2 -; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 2 -; ALL-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]] +; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 2 +; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i8 2 ; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; ALL-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] ; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; ALL-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]] +; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 +; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] ; ALL-NEXT: br label [[ENDBLOCK]] ; ALL: endblock: -; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] +; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; ALL-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3) @@ -74,30 +77,33 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp5( -; ALL-NEXT: loadbb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; ALL-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: -; ALL-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]] -; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 +; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ] +; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ] +; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; ALL-NEXT: br label [[ENDBLOCK:%.*]] +; ALL: loadbb: +; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] +; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] +; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) +; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) +; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] +; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; ALL: loadbb1: -; ALL-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 4 -; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 4 -; ALL-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]] +; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 4 +; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i8 4 ; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; ALL-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] ; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; ALL-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]] +; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 +; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] ; ALL-NEXT: br label [[ENDBLOCK]] ; ALL: endblock: -; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] +; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; ALL-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5) @@ -106,36 +112,37 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp6( -; ALL-NEXT: loadbb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; ALL-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; ALL-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: -; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] -; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ] -; ALL-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; ALL-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 +; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] +; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; ALL-NEXT: br label [[ENDBLOCK:%.*]] +; ALL: loadbb: +; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] +; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] +; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) +; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) +; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] +; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; ALL: loadbb1: -; ALL-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16* -; ALL-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16* -; ALL-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2 +; ALL-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i16* +; ALL-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i16* ; ALL-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2 -; ALL-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]] +; ALL-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2 ; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] -; ALL-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]]) +; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] ; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; ALL-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i32 +; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) ; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32 -; ALL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]] -; ALL-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; ALL-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32 +; ALL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]] +; ALL-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; ALL: endblock: -; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] +; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; ALL-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6) @@ -153,34 +160,35 @@ define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp8( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) -; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] -; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ] -; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] -; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 +; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] +; X32: loadbb: +; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* +; X32-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] +; X32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] +; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) +; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) +; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] +; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1 +; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i32* +; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i32* ; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1 -; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] +; X32-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 1 ; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) +; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] ; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]] -; X32-NEXT: br i1 [[TMP17]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) +; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] +; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] +; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X32-NEXT: ret i32 [[PHI_RES]] ; ; X64-LABEL: @cmp8( @@ -207,30 +215,33 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp9( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 8 -; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 8 -; X64-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]] +; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i8 8 ; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] ; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X64-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]] +; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 +; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] ; X64-NEXT: br label [[ENDBLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9) @@ -243,36 +254,37 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp10( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 4 +; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i16* +; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i16* ; X64-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 4 -; X64-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]] +; X64-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 4 ; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]]) +; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] ; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X64-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i64 +; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) ; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64 -; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]] -; X64-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64 +; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]] +; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10) @@ -294,36 +306,37 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp12( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2 +; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i32* +; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i32* ; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2 -; X64-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] +; X64-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP11]], i32 2 ; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] -; X64-NEXT: [[TMP15:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP13]]) +; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] ; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17]] = zext i32 [[TMP15]] to i64 +; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) ; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64 -; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]] -; X64-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64 +; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]] +; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12) @@ -363,34 +376,35 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp16( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] +; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i64* -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i64* -; X64-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 1 +; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[X]] to i64* +; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[Y]] to i64* ; X64-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 1 -; X64-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP11]] +; X64-NEXT: [[TMP13:%.*]] = getelementptr i64, i64* [[TMP11]], i64 1 ; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] -; X64-NEXT: [[TMP15]] = call i64 @llvm.bswap.i64(i64 [[TMP13]]) +; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] ; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP15]], [[TMP16]] -; X64-NEXT: br i1 [[TMP17]], label [[ENDBLOCK]], label [[RES_BLOCK]] +; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) +; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] +; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) @@ -417,22 +431,23 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq3( -; ALL-NEXT: loadbb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16* -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]] -; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = icmp ne i16 [[TMP2]], [[TMP3]] -; ALL-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: ; ALL-NEXT: br label [[ENDBLOCK:%.*]] +; ALL: loadbb: +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* +; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] +; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] +; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] +; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; ALL: loadbb1: -; ALL-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[X]], i8 2 -; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[Y]], i8 2 -; ALL-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]] +; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 2 +; ALL-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 2 ; ALL-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; ALL-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP7]], [[TMP8]] -; ALL-NEXT: br i1 [[TMP9]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; ALL-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; ALL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; ALL-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; ALL: endblock: ; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -465,22 +480,23 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq5( -; ALL-NEXT: loadbb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; ALL-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: ; ALL-NEXT: br label [[ENDBLOCK:%.*]] +; ALL: loadbb: +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] +; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; ALL: loadbb1: -; ALL-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[X]], i8 4 -; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[Y]], i8 4 -; ALL-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]] +; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 4 +; ALL-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 4 ; ALL-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; ALL-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP7]], [[TMP8]] -; ALL-NEXT: br i1 [[TMP9]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; ALL-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; ALL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; ALL-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; ALL: endblock: ; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -495,24 +511,25 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq6( -; ALL-NEXT: loadbb: -; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; ALL-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: ; ALL-NEXT: br label [[ENDBLOCK:%.*]] +; ALL: loadbb: +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] +; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; ALL-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; ALL: loadbb1: -; ALL-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i16* -; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i16* -; ALL-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP5]], i16 2 +; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16* +; ALL-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16* ; ALL-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2 -; ALL-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]] +; ALL-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 2 ; ALL-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; ALL-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP9]], [[TMP10]] -; ALL-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; ALL-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; ALL-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] +; ALL-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; ALL: endblock: ; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -540,24 +557,25 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) { define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq8( -; X32-NEXT: loadbb: -; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]] -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]] -; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: ; X32-NEXT: br label [[ENDBLOCK:%.*]] +; X32: loadbb: +; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* +; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] +; X32-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; X32-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X32: loadbb1: -; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1 +; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32* +; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32* ; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1 -; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] +; X32-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 1 ; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] +; X32-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X32: endblock: ; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -589,22 +607,23 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-NEXT: ret i32 [[CONV]] ; ; X64-LABEL: @cmp_eq9( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: ; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[X]], i8 8 -; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[Y]], i8 8 -; X64-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]] +; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i8 8 +; X64-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i8 8 ; X64-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP7]], [[TMP8]] -; X64-NEXT: br i1 [[TMP9]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] +; X64-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; X64-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -625,24 +644,25 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-NEXT: ret i32 [[CONV]] ; ; X64-LABEL: @cmp_eq10( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: ; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i16* -; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i16* -; X64-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP5]], i16 4 +; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i16* +; X64-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i16* ; X64-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4 -; X64-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i16, i16* [[TMP7]], i16 4 ; X64-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] -; X64-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP9]], [[TMP10]] -; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] +; X64-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 @@ -676,24 +696,25 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-NEXT: ret i32 [[CONV]] ; ; X64-LABEL: @cmp_eq12( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] +; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: ; X64-NEXT: br label [[ENDBLOCK:%.*]] +; X64: loadbb: +; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* +; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] +; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64: loadbb1: -; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32* -; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32* -; X64-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 2 +; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[X]] to i32* +; X64-NEXT: [[TMP7:%.*]] = bitcast i8* [[Y]] to i32* ; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2 -; X64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]] +; X64-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 2 ; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] -; X64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]] -; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]] +; X64-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] +; X64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] +; X64-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] ; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 diff --git a/test/Transforms/IRCE/add-metadata-pre-post-loops.ll b/test/Transforms/IRCE/add-metadata-pre-post-loops.ll index 488d4b479bab..0225af903ef4 100644 --- a/test/Transforms/IRCE/add-metadata-pre-post-loops.ll +++ b/test/Transforms/IRCE/add-metadata-pre-post-loops.ll @@ -38,7 +38,7 @@ exit: ; preds = %in.bounds, %entry define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 %offset) { ; CHECK-LABEL: @single_access_with_preloop( ; CHECK-LABEL: in.bounds.preloop -; CHECK: br i1 %14, label %loop.preloop, label %preloop.exit.selector, !llvm.loop !8, !irce.loop.clone !7 +; CHECK: br i1 [[COND:%[^ ]+]], label %loop.preloop, label %preloop.exit.selector, !llvm.loop !8, !irce.loop.clone !7 ; CHECK-LABEL: in.bounds.postloop ; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit.loopexit, !llvm.loop !9, !irce.loop.clone !7 entry: diff --git a/test/Transforms/IndVarSimplify/scev-phi-debug-info.ll b/test/Transforms/IndVarSimplify/scev-phi-debug-info.ll new file mode 100644 index 000000000000..dc6aae8d8aa6 --- /dev/null +++ b/test/Transforms/IndVarSimplify/scev-phi-debug-info.ll @@ -0,0 +1,71 @@ +; RUN: opt %s -indvars -S -o - | FileCheck %s +source_filename = "/Data/llvm/test/Transforms/IndVarSimplify/scev-phi-debug-info.ll" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.status = type { i32, i8* } + +@status = internal unnamed_addr global [32 x %struct.status] zeroinitializer, align 16, !dbg !0 + +define void @f0() local_unnamed_addr !dbg !20 { +entry: + tail call void @llvm.dbg.value(metadata i32 0, metadata !23, metadata !DIExpression()), !dbg !24 + br label %for.cond, !dbg !24 + +for.cond: ; preds = %for.body, %entry + ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + ; CHECK: call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !23, metadata !DIExpression()), !dbg !24 + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + tail call void @llvm.dbg.value(metadata i32 %i.0, metadata !23, metadata !DIExpression()), !dbg !24 + %cmp = icmp slt i32 %i.0, 32, !dbg !24 + br i1 %cmp, label %for.body, label %for.end, !dbg !24 + +for.body: ; preds = %for.cond + %idxprom = sext i32 %i.0 to i64, !dbg !24 + %value = getelementptr inbounds [32 x %struct.status], [32 x %struct.status]* @status, i64 0, i64 %idxprom, i32 0, !dbg !24 + store i32 42, i32* %value, align 16, !dbg !24 + tail call void @use(i32 %i.0), !dbg !24 + %inc = add nsw i32 %i.0, 1, !dbg !24 + tail call void @llvm.dbg.value(metadata i32 %inc, metadata !23, metadata !DIExpression()), !dbg !24 + br label %for.cond, !dbg !24 + +for.end: ; preds = %for.cond + ret void, !dbg !24 +} + +declare void @use(i32) + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +attributes #0 = { nounwind readnone speculatable } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!16, !17, !18} +!llvm.ident = !{!19} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "status", scope: !2, file: !3, line: 5, type: !6, isLocal: true, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 6.0.0 (trunk 316001) (llvm/trunk 316171)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) +!3 = !DIFile(filename: "x.c", directory: "/home/davide/work/llvm/build-release/bin") +!4 = !{} +!5 = !{!0} +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 4096, elements: !14) +!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "status", file: !3, line: 2, size: 128, elements: !8) +!8 = !{!9, !11} +!9 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !7, file: !3, line: 3, baseType: !10, size: 32) +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_member, name: "p", scope: !7, file: !3, line: 4, baseType: !12, size: 64, offset: 64) +!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64) +!13 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) +!14 = !{!15} +!15 = !DISubrange(count: 32) +!16 = !{i32 2, !"Dwarf Version", i32 4} +!17 = !{i32 2, !"Debug Info Version", i32 3} +!18 = !{i32 1, !"wchar_size", i32 4} +!19 = !{!"clang version 6.0.0 (trunk 316001) (llvm/trunk 316171)"} +!20 = distinct !DISubprogram(name: "f0", scope: !3, file: !3, line: 6, type: !21, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: true, unit: !2, variables: !22) +!21 = !DISubroutineType(types: !4) +!22 = !{!23} +!23 = !DILocalVariable(name: "i", scope: !20, file: !3, line: 8, type: !10) +!24 = !DILocation(line: 9, scope: !20) diff --git a/test/Transforms/InstCombine/debuginfo_add.ll b/test/Transforms/InstCombine/debuginfo_add.ll new file mode 100644 index 000000000000..0d194cc65c7a --- /dev/null +++ b/test/Transforms/InstCombine/debuginfo_add.ll @@ -0,0 +1,108 @@ +; RUN: opt -instcombine %s -o - -S | FileCheck %s +; typedef struct v *v_t; +; struct v { +; unsigned long long p; +; }; +; +; void f(v_t object, unsigned long long *start) { +; unsigned head_size; +; unsigned long long orig_start; +; unsigned long long offset; +; orig_start = *start; +; for (offset = orig_start - (unsigned long long)(1 << 12); head_size; +; offset -= (unsigned long long)(1 << 12), head_size -= (1 << 12)) +; use(offset, (object)); +; } +source_filename = "test.i" +target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" +target triple = "thumbv7s-apple-ios5.0.0" + +%struct.vm_object = type { i64 } + +; Function Attrs: nounwind ssp +define void @f(%struct.vm_object* %object, i64* nocapture readonly %start) local_unnamed_addr #0 !dbg !11 { +entry: + tail call void @llvm.dbg.value(metadata %struct.vm_object* %object, metadata !21, metadata !DIExpression()), !dbg !27 + tail call void @llvm.dbg.value(metadata i64* %start, metadata !22, metadata !DIExpression()), !dbg !28 + %0 = load i64, i64* %start, align 4, !dbg !29 + tail call void @llvm.dbg.value(metadata i64 %0, metadata !25, metadata !DIExpression()), !dbg !30 + %offset.08 = add i64 %0, -4096 + tail call void @llvm.dbg.value(metadata i64 %offset.08, metadata !26, metadata !DIExpression()), !dbg !31 + ; CHECK: call void @llvm.dbg.value(metadata i64 %0, metadata !26, metadata !DIExpression(DW_OP_constu, 4096, DW_OP_minus, DW_OP_stack_value)), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 undef, metadata !23, metadata !DIExpression()), !dbg !32 + br i1 undef, label %for.end, label %for.body.lr.ph, !dbg !32 + +for.body.lr.ph: ; preds = %entry + br label %for.body, !dbg !32 + +for.body: ; preds = %for.body.lr.ph, %for.body + %offset.010 = phi i64 [ %offset.08, %for.body.lr.ph ], [ %offset.0, %for.body ] + %head_size.09 = phi i32 [ undef, %for.body.lr.ph ], [ %sub2, %for.body ] + tail call void @llvm.dbg.value(metadata i32 %head_size.09, metadata !23, metadata !DIExpression()), !dbg !31 + %call = tail call i32 bitcast (i32 (...)* @use to i32 (i64, %struct.vm_object*)*)(i64 %offset.010, %struct.vm_object* %object) #3, !dbg !34 + %sub2 = add i32 %head_size.09, -4096, !dbg !37 + %offset.0 = add i64 %offset.010, -4096 + tail call void @llvm.dbg.value(metadata i64 %offset.0, metadata !26, metadata !DIExpression()), !dbg !30 + ; CHECK: call void @llvm.dbg.value(metadata i64 %offset.010, metadata !26, metadata !DIExpression(DW_OP_constu, 4096, DW_OP_minus, DW_OP_stack_value)), !dbg !29 + tail call void @llvm.dbg.value(metadata i32 %sub2, metadata !23, metadata !DIExpression()), !dbg !31 + %tobool = icmp eq i32 %sub2, 0, !dbg !32 + br i1 %tobool, label %for.end, label %for.body, !dbg !32, !llvm.loop !38 + +for.end: ; preds = %for.body, %entry + ret void, !dbg !40 +} + +declare i32 @use(...) local_unnamed_addr + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + +attributes #0 = { nounwind ssp } +attributes #2 = { nounwind readnone speculatable } +attributes #3 = { nobuiltin } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!5, !6, !7, !8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 (trunk 317434) (llvm/trunk 317437)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3) +!1 = !DIFile(filename: "test.i", directory: "/Data/radar/31209283") +!2 = !{} +!3 = !{!4} +!4 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned) +!5 = !{i32 2, !"Dwarf Version", i32 2} +!6 = !{i32 2, !"Debug Info Version", i32 3} +!7 = !{i32 1, !"wchar_size", i32 4} +!8 = !{i32 1, !"min_enum_size", i32 4} +!9 = !{i32 7, !"PIC Level", i32 2} +!10 = !{!"clang version 6.0.0 (trunk 317434) (llvm/trunk 317437)"} +!11 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 6, type: !12, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20) +!12 = !DISubroutineType(types: !13) +!13 = !{null, !14, !19} +!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "v_t", file: !1, line: 1, baseType: !15) +!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 32) +!16 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "v", file: !1, line: 2, size: 64, elements: !17) +!17 = !{!18} +!18 = !DIDerivedType(tag: DW_TAG_member, name: "p", scope: !16, file: !1, line: 3, baseType: !4, size: 64) +!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !4, size: 32) +!20 = !{!21, !22, !23, !25, !26} +!21 = !DILocalVariable(name: "object", arg: 1, scope: !11, file: !1, line: 6, type: !14) +!22 = !DILocalVariable(name: "start", arg: 2, scope: !11, file: !1, line: 6, type: !19) +!23 = !DILocalVariable(name: "head_size", scope: !11, file: !1, line: 7, type: !24) +!24 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!25 = !DILocalVariable(name: "orig_start", scope: !11, file: !1, line: 8, type: !4) +!26 = !DILocalVariable(name: "offset", scope: !11, file: !1, line: 9, type: !4) +!27 = !DILocation(line: 6, column: 20, scope: !11) +!28 = !DILocation(line: 6, column: 48, scope: !11) +!29 = !DILocation(line: 8, column: 22, scope: !11) +!30 = !DILocation(line: 7, column: 12, scope: !11) +!31 = !DILocation(line: 10, column: 16, scope: !11) +!32 = !DILocation(line: 11, column: 5, scope: !33) +!33 = distinct !DILexicalBlock(scope: !11, file: !1, line: 11, column: 5) +!34 = !DILocation(line: 13, column: 7, scope: !35) +!35 = distinct !DILexicalBlock(scope: !36, file: !1, line: 12, column: 75) +!36 = distinct !DILexicalBlock(scope: !33, file: !1, line: 11, column: 5) +!37 = !DILocation(line: 12, column: 61, scope: !36) +!38 = distinct !{!38, !32, !39} +!39 = !DILocation(line: 14, column: 3, scope: !33) +!40 = !DILocation(line: 15, column: 1, scope: !11) diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll index cbb3d614db23..ba52023e0dbf 100644 --- a/test/Transforms/InstCombine/shift.ll +++ b/test/Transforms/InstCombine/shift.ll @@ -1332,3 +1332,263 @@ define i7 @test65(i7 %a, i7 %b) { %y = and i7 %x, 1 ; this extracts the lsb which should be 0 because we shifted an even number of bits and all even bits of the shift input are 0. ret i7 %y } + +define i32 @shl_select_add_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @shl_select_add_true( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = add i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = shl i32 %2, 1 + ret i32 %3 +} + +define i32 @shl_select_add_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @shl_select_add_false( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = add i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = shl i32 %2, 1 + ret i32 %3 +} + +define i32 @shl_select_and_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @shl_select_and_true( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = and i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = shl i32 %2, 1 + ret i32 %3 +} + +define i32 @shl_select_and_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @shl_select_and_false( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = and i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = shl i32 %2, 1 + ret i32 %3 +} + +define i32 @lshr_select_and_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @lshr_select_and_true( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = and i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = lshr i32 %2, 1 + ret i32 %3 +} + +define i32 @lshr_select_and_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @lshr_select_and_false( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = and i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = lshr i32 %2, 1 + ret i32 %3 +} + +define i32 @ashr_select_and_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @ashr_select_and_true( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -1073741821 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = and i32 %x, 2147483655 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = ashr i32 %2, 1 + ret i32 %3 +} + +define i32 @ashr_select_and_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @ashr_select_and_false( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -1073741821 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = and i32 %x, 2147483655 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = ashr i32 %2, 1 + ret i32 %3 +} + +define i32 @shl_select_or_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @shl_select_or_true( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = or i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = shl i32 %2, 1 + ret i32 %3 +} + +define i32 @shl_select_or_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @shl_select_or_false( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = or i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = shl i32 %2, 1 + ret i32 %3 +} + +define i32 @lshr_select_or_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @lshr_select_or_true( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = or i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = lshr i32 %2, 1 + ret i32 %3 +} + +define i32 @lshr_select_or_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @lshr_select_or_false( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = or i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = lshr i32 %2, 1 + ret i32 %3 +} + +define i32 @ashr_select_or_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @ashr_select_or_true( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = or i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = ashr i32 %2, 1 + ret i32 %3 +} + +define i32 @ashr_select_or_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @ashr_select_or_false( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = or i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = ashr i32 %2, 1 + ret i32 %3 +} + +define i32 @shl_select_xor_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @shl_select_xor_true( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = xor i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = shl i32 %2, 1 + ret i32 %3 +} + +define i32 @shl_select_xor_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @shl_select_xor_false( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 14 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = xor i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = shl i32 %2, 1 + ret i32 %3 +} + +define i32 @lshr_select_xor_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @lshr_select_xor_true( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = xor i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = lshr i32 %2, 1 + ret i32 %3 +} + +define i32 @lshr_select_xor_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @lshr_select_xor_false( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = xor i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = lshr i32 %2, 1 + ret i32 %3 +} + +define i32 @ashr_select_xor_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @ashr_select_xor_true( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = xor i32 %x, 7 + %2 = select i1 %cond, i32 %1, i32 %x + %3 = ashr i32 %2, 1 + ret i32 %3 +} + +define i32 @ashr_select_xor_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @ashr_select_xor_false( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = xor i32 %x, 7 + %2 = select i1 %cond, i32 %x, i32 %1 + %3 = ashr i32 %2, 1 + ret i32 %3 +} diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll index 6e9e8d4b7b6f..b28eea0bc2aa 100644 --- a/test/Transforms/LICM/sinking.ll +++ b/test/Transforms/LICM/sinking.ll @@ -392,6 +392,288 @@ lab60: indirectbr i8* undef, [label %lab21, label %lab19] } -declare void @f(i32*) +; Check if LICM can sink a sinkable instruction the exit blocks through +; a non-trivially replacable PHI node. +; +; CHECK-LABEL: @test14 +; CHECK-LABEL: Loop: +; CHECK-NOT: mul +; CHECK-NOT: sub +; +; CHECK-LABEL: Out12.split.loop.exit: +; CHECK: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn, %ContLoop ] +; CHECK: %[[MUL:.*]] = mul i32 %N, %[[LCSSAPHI]] +; CHECK: br label %Out12 +; +; CHECK-LABEL: Out12.split.loop.exit1: +; CHECK: %[[LCSSAPHI2:.*]] = phi i32 [ %N_addr.0.pn, %Loop ] +; CHECK: %[[MUL2:.*]] = mul i32 %N, %[[LCSSAPHI2]] +; CHECK: %[[SUB:.*]] = sub i32 %[[MUL2]], %N +; CHECK: br label %Out12 +; +; CHECK-LABEL: Out12: +; CHECK: phi i32 [ %[[MUL]], %Out12.split.loop.exit ], [ %[[SUB]], %Out12.split.loop.exit1 ] +define i32 @test14(i32 %N, i32 %N2, i1 %C) { +Entry: + br label %Loop +Loop: + %N_addr.0.pn = phi i32 [ %dec, %ContLoop ], [ %N, %Entry ] + %sink.mul = mul i32 %N, %N_addr.0.pn + %sink.sub = sub i32 %sink.mul, %N + %dec = add i32 %N_addr.0.pn, -1 + br i1 %C, label %ContLoop, label %Out12 +ContLoop: + %tmp.1 = icmp ne i32 %N_addr.0.pn, 1 + br i1 %tmp.1, label %Loop, label %Out12 +Out12: + %tmp = phi i32 [%sink.mul, %ContLoop], [%sink.sub, %Loop] + ret i32 %tmp +} + +; In this test, splitting predecessors is not really required because the +; operations of sinkable instructions (sub and mul) are same. In this case, we +; can sink the same sinkable operations and modify the PHI to pass the operands +; to the shared operations. As of now, we split predecessors of non-trivially +; replicalbe PHIs by default in LICM because all incoming edges of a +; non-trivially replacable PHI in LCSSA is critical. +; +; CHECK-LABEL: @test15 +; CHECK-LABEL: Loop: +; CHECK-NOT: mul +; CHECK-NOT: sub +; +; CHECK-LABEL: Out12.split.loop.exit: +; CHECK: %[[LCSSAPHI:.*]] = phi i32 [ %N_addr.0.pn, %ContLoop ] +; CHECK: %[[MUL:.*]] = mul i32 %N, %[[LCSSAPHI]] +; CHECK: %[[SUB:.*]] = sub i32 %[[MUL]], %N2 +; CHECK: br label %Out12 +; +; CHECK-LABEL: Out12.split.loop.exit1: +; CHECK: %[[LCSSAPHI2:.*]] = phi i32 [ %N_addr.0.pn, %Loop ] +; CHECK: %[[MUL2:.*]] = mul i32 %N, %[[LCSSAPHI2]] +; CHECK: %[[SUB2:.*]] = sub i32 %[[MUL2]], %N +; CHECK: br label %Out12 +; +; CHECK-LABEL: Out12: +; CHECK: phi i32 [ %[[SUB]], %Out12.split.loop.exit ], [ %[[SUB2]], %Out12.split.loop.exit1 ] +define i32 @test15(i32 %N, i32 %N2, i1 %C) { +Entry: + br label %Loop +Loop: + %N_addr.0.pn = phi i32 [ %dec, %ContLoop ], [ %N, %Entry ] + %sink.mul = mul i32 %N, %N_addr.0.pn + %sink.sub = sub i32 %sink.mul, %N + %sink.sub2 = sub i32 %sink.mul, %N2 + %dec = add i32 %N_addr.0.pn, -1 + br i1 %C, label %ContLoop, label %Out12 +ContLoop: + %tmp.1 = icmp ne i32 %N_addr.0.pn, 1 + br i1 %tmp.1, label %Loop, label %Out12 +Out12: + %tmp = phi i32 [%sink.sub2, %ContLoop], [%sink.sub, %Loop] + ret i32 %tmp +} + +; Sink through a non-trivially replacable PHI node which use the same sinkable +; instruction multiple times. +; +; CHECK-LABEL: @test16 +; CHECK-LABEL: Loop: +; CHECK-NOT: mul +; +; CHECK-LABEL: Out.split.loop.exit: +; CHECK: %[[PHI:.*]] = phi i32 [ %l2, %ContLoop ] +; CHECK: br label %Out +; +; CHECK-LABEL: Out.split.loop.exit1: +; CHECK: %[[SINKABLE:.*]] = mul i32 %l2.lcssa, %t.le +; CHECK: br label %Out +; +; CHECK-LABEL: Out: +; CHECK: %idx = phi i32 [ %[[PHI]], %Out.split.loop.exit ], [ %[[SINKABLE]], %Out.split.loop.exit1 ] +define i32 @test16(i1 %c, i8** %P, i32* %P2, i64 %V) { +entry: + br label %loop.ph +loop.ph: + br label %Loop +Loop: + %iv = phi i64 [ 0, %loop.ph ], [ %next, %ContLoop ] + %l2 = call i32 @getv() + %t = trunc i64 %iv to i32 + %sinkable = mul i32 %l2, %t + switch i32 %l2, label %ContLoop [ + i32 32, label %Out + i32 46, label %Out + i32 95, label %Out + ] +ContLoop: + %next = add nuw i64 %iv, 1 + %c1 = call i1 @getc() + br i1 %c1, label %Loop, label %Out +Out: + %idx = phi i32 [ %l2, %ContLoop ], [ %sinkable, %Loop ], [ %sinkable, %Loop ], [ %sinkable, %Loop ] + ret i32 %idx +} + +; Sink a sinkable instruction through multiple non-trivially replacable PHIs in +; differect exit blocks. +; +; CHECK-LABEL: @test17 +; CHECK-LABEL: Loop: +; CHECK-NOT: mul +; +; CHECK-LABEL:OutA.split.loop.exit{{.*}}: +; CHECK: %[[OP1:.*]] = phi i32 [ %N_addr.0.pn, %ContLoop1 ] +; CHECK: %[[SINKABLE:.*]] = mul i32 %N, %[[OP1]] +; CHECK: br label %OutA +; +; CHECK-LABEL:OutA: +; CHECK: phi i32{{.*}}[ %[[SINKABLE]], %OutA.split.loop.exit{{.*}} ] +; +; CHECK-LABEL:OutB.split.loop.exit{{.*}}: +; CHECK: %[[OP2:.*]] = phi i32 [ %N_addr.0.pn, %ContLoop2 ] +; CHECK: %[[SINKABLE2:.*]] = mul i32 %N, %[[OP2]] +; CHECK: br label %OutB +; +; CHECK-LABEL:OutB: +; CHECK: phi i32 {{.*}}[ %[[SINKABLE2]], %OutB.split.loop.exit{{.*}} ] +define i32 @test17(i32 %N, i32 %N2) { +Entry: + br label %Loop +Loop: + %N_addr.0.pn = phi i32 [ %dec, %ContLoop3 ], [ %N, %Entry ] + %sink.mul = mul i32 %N, %N_addr.0.pn + %c0 = call i1 @getc() + br i1 %c0 , label %ContLoop1, label %OutA +ContLoop1: + %c1 = call i1 @getc() + br i1 %c1, label %ContLoop2, label %OutA + +ContLoop2: + %c2 = call i1 @getc() + br i1 %c2, label %ContLoop3, label %OutB +ContLoop3: + %c3 = call i1 @getc() + %dec = add i32 %N_addr.0.pn, -1 + br i1 %c3, label %Loop, label %OutB +OutA: + %tmp1 = phi i32 [%sink.mul, %ContLoop1], [%N2, %Loop] + br label %Out12 +OutB: + %tmp2 = phi i32 [%sink.mul, %ContLoop2], [%dec, %ContLoop3] + br label %Out12 +Out12: + %tmp = phi i32 [%tmp1, %OutA], [%tmp2, %OutB] + ret i32 %tmp +} + + +; Sink a sinkable instruction through both trivially and non-trivially replacable PHIs. +; +; CHECK-LABEL: @test18 +; CHECK-LABEL: Loop: +; CHECK-NOT: mul +; CHECK-NOT: sub +; +; CHECK-LABEL:Out12.split.loop.exit: +; CHECK: %[[OP:.*]] = phi i32 [ %iv, %ContLoop ] +; CHECK: %[[DEC:.*]] = phi i32 [ %dec, %ContLoop ] +; CHECK: %[[SINKMUL:.*]] = mul i32 %N, %[[OP]] +; CHECK: %[[SINKSUB:.*]] = sub i32 %[[SINKMUL]], %N2 +; CHECK: br label %Out12 +; +; CHECK-LABEL:Out12.split.loop.exit1: +; CHECK: %[[OP2:.*]] = phi i32 [ %iv, %Loop ] +; CHECK: %[[SINKMUL2:.*]] = mul i32 %N, %[[OP2]] +; CHECK: %[[SINKSUB2:.*]] = sub i32 %[[SINKMUL2]], %N2 +; CHECK: br label %Out12 +; +; CHECK-LABEL:Out12: +; CHECK: %tmp1 = phi i32 [ %[[SINKSUB]], %Out12.split.loop.exit ], [ %[[SINKSUB2]], %Out12.split.loop.exit1 ] +; CHECK: %tmp2 = phi i32 [ %[[DEC]], %Out12.split.loop.exit ], [ %[[SINKSUB2]], %Out12.split.loop.exit1 ] +; CHECK: %add = add i32 %tmp1, %tmp2 +define i32 @test18(i32 %N, i32 %N2) { +Entry: + br label %Loop +Loop: + %iv = phi i32 [ %dec, %ContLoop ], [ %N, %Entry ] + %sink.mul = mul i32 %N, %iv + %sink.sub = sub i32 %sink.mul, %N2 + %c0 = call i1 @getc() + br i1 %c0, label %ContLoop, label %Out12 +ContLoop: + %dec = add i32 %iv, -1 + %c1 = call i1 @getc() + br i1 %c1, label %Loop, label %Out12 +Out12: + %tmp1 = phi i32 [%sink.sub, %ContLoop], [%sink.sub, %Loop] + %tmp2 = phi i32 [%dec, %ContLoop], [%sink.sub, %Loop] + %add = add i32 %tmp1, %tmp2 + ret i32 %add +} + +; Do not sink an instruction through a non-trivially replacable PHI, to avoid +; assert while splitting predecessors, if the terminator of predecessor is an +; indirectbr. +; CHECK-LABEL: @test19 +; CHECK-LABEL: L0: +; CHECK: %sinkable = mul +; CHECK: %sinkable2 = add + +define i32 @test19(i1 %cond, i1 %cond2, i8* %address, i32 %v1) nounwind { +entry: + br label %L0 +L0: + %indirect.goto.dest = select i1 %cond, i8* blockaddress(@test19, %exit), i8* %address + %v2 = call i32 @getv() + %sinkable = mul i32 %v1, %v2 + %sinkable2 = add i32 %v1, %v2 + indirectbr i8* %indirect.goto.dest, [label %L1, label %exit] + +L1: + %indirect.goto.dest2 = select i1 %cond2, i8* blockaddress(@test19, %exit), i8* %address + indirectbr i8* %indirect.goto.dest2, [label %L0, label %exit] + +exit: + %r = phi i32 [%sinkable, %L0], [%sinkable2, %L1] + ret i32 %r +} + +; Do not sink through a non-trivially replacable PHI if splitting predecessors +; not allowed in SplitBlockPredecessors(). +; +; CHECK-LABEL: @test20 +; CHECK-LABEL: while.cond +; CHECK: %sinkable = mul +; CHECK: %sinkable2 = add +define void @test20(i32* %s, i1 %b, i32 %v1, i32 %v2) personality i32 (...)* @__CxxFrameHandler3 { +entry: + br label %while.cond +while.cond: + %v = call i32 @getv() + %sinkable = mul i32 %v, %v2 + %sinkable2 = add i32 %v, %v2 + br i1 %b, label %try.cont, label %while.body +while.body: + invoke void @may_throw() + to label %while.body2 unwind label %catch.dispatch +while.body2: + invoke void @may_throw2() + to label %while.cond unwind label %catch.dispatch +catch.dispatch: + %.lcssa1 = phi i32 [ %sinkable, %while.body ], [ %sinkable2, %while.body2 ] + %cp = cleanuppad within none [] + store i32 %.lcssa1, i32* %s + cleanupret from %cp unwind to caller +try.cont: + ret void +} + +declare void @may_throw() +declare void @may_throw2() +declare i32 @__CxxFrameHandler3(...) +declare i32 @getv() +declare i1 @getc() +declare void @f(i32*) declare void @g() diff --git a/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll b/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll new file mode 100644 index 000000000000..3c283dcb6e53 --- /dev/null +++ b/test/Transforms/LoadStoreVectorizer/X86/merge-tbaa.ll @@ -0,0 +1,46 @@ +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S < %s | \ +; RUN: FileCheck %s +; +; The GPU Load & Store Vectorizer may merge differently-typed accesses into a +; single instruction. This test checks that we merge TBAA tags for such +; accesses correctly. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; struct S { +; float f; +; int i; +; }; +%struct.S = type { float, i32 } + +; float foo(S *p) { +; p->f -= 1; +; p->i -= 1; +; return p->f; +; } +define float @foo(%struct.S* %p) { +entry: +; CHECK-LABEL: foo +; CHECK: load <2 x i32>, {{.*}}, !tbaa [[TAG_char:!.*]] +; CHECK: store <2 x i32> {{.*}}, !tbaa [[TAG_char]] + %f = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0 + %0 = load float, float* %f, align 4, !tbaa !2 + %sub = fadd float %0, -1.000000e+00 + store float %sub, float* %f, align 4, !tbaa !2 + %i = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1 + %1 = load i32, i32* %i, align 4, !tbaa !8 + %sub1 = add nsw i32 %1, -1 + store i32 %sub1, i32* %i, align 4, !tbaa !8 + ret float %sub +} + +!2 = !{!3, !4, i64 0} +!3 = !{!"_ZTS1S", !4, i64 0, !7, i64 4} +!4 = !{!"float", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C++ TBAA"} +!7 = !{!"int", !5, i64 0} +!8 = !{!3, !7, i64 4} + +; CHECK-DAG: [[TYPE_char:!.*]] = !{!"omnipotent char", {{.*}}, i64 0} +; CHECK-FAG: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0} diff --git a/test/Transforms/LoopPredication/widened.ll b/test/Transforms/LoopPredication/widened.ll new file mode 100644 index 000000000000..33c4e2706133 --- /dev/null +++ b/test/Transforms/LoopPredication/widened.ll @@ -0,0 +1,138 @@ +; RUN: opt -S -loop-predication -loop-predication-enable-iv-truncation=true < %s 2>&1 | FileCheck %s +declare void @llvm.experimental.guard(i1, ...) + +declare i32 @length(i8*) + +declare i16 @short_length(i8*) +; Consider range check of type i16 and i32, while IV is of type i64 +; We can loop predicate this because the IV range is within i16 and within i32. +define i64 @iv_wider_type_rc_two_narrow_types(i32 %offA, i16 %offB, i8* %arrA, i8* %arrB) { +; CHECK-LABEL: iv_wider_type_rc_two_narrow_types +entry: +; CHECK-LABEL: entry: +; CHECK: [[idxB:[^ ]+]] = sub i16 %lengthB, %offB +; CHECK-NEXT: [[limit_checkB:[^ ]+]] = icmp ule i16 16, [[idxB]] +; CHECK-NEXT: [[first_iteration_checkB:[^ ]+]] = icmp ult i16 %offB, %lengthB +; CHECK-NEXT: [[WideChkB:[^ ]+]] = and i1 [[first_iteration_checkB]], [[limit_checkB]] +; CHECK-NEXT: [[idxA:[^ ]+]] = sub i32 %lengthA, %offA +; CHECK-NEXT: [[limit_checkA:[^ ]+]] = icmp ule i32 16, [[idxA]] +; CHECK-NEXT: [[first_iteration_checkA:[^ ]+]] = icmp ult i32 %offA, %lengthA +; CHECK-NEXT: [[WideChkA:[^ ]+]] = and i1 [[first_iteration_checkA]], [[limit_checkA]] + %lengthA = call i32 @length(i8* %arrA) + %lengthB = call i16 @short_length(i8* %arrB) + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: [[invariant_check:[^ ]+]] = and i1 [[WideChkB]], [[WideChkA]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[invariant_check]], i32 9) + %iv = phi i64 [0, %entry ], [ %iv.next, %loop ] + %iv.trunc.32 = trunc i64 %iv to i32 + %iv.trunc.16 = trunc i64 %iv to i16 + %indexA = add i32 %iv.trunc.32, %offA + %indexB = add i16 %iv.trunc.16, %offB + %rcA = icmp ult i32 %indexA, %lengthA + %rcB = icmp ult i16 %indexB, %lengthB + %wide.chk = and i1 %rcA, %rcB + call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk, i32 9) [ "deopt"() ] + %indexA.ext = zext i32 %indexA to i64 + %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext + %eltA = load i8, i8* %addrA + %indexB.ext = zext i16 %indexB to i64 + %addrB = getelementptr inbounds i8, i8* %arrB, i64 %indexB.ext + store i8 %eltA, i8* %addrB + %iv.next = add nuw nsw i64 %iv, 1 + %latch.check = icmp ult i64 %iv.next, 16 + br i1 %latch.check, label %loop, label %exit + +exit: + ret i64 %iv +} + + +; Consider an IV of type long and an array access into int array. +; IV is of type i64 while the range check operands are of type i32 and i64. +define i64 @iv_rc_different_types(i32 %offA, i32 %offB, i8* %arrA, i8* %arrB, i64 %max) +{ +; CHECK-LABEL: iv_rc_different_types +entry: +; CHECK-LABEL: entry: +; CHECK: [[lenB:[^ ]+]] = add i32 %lengthB, -1 +; CHECK-NEXT: [[idxB:[^ ]+]] = sub i32 [[lenB]], %offB +; CHECK-NEXT: [[limit_checkB:[^ ]+]] = icmp ule i32 15, [[idxB]] +; CHECK-NEXT: [[first_iteration_checkB:[^ ]+]] = icmp ult i32 %offB, %lengthB +; CHECK-NEXT: [[WideChkB:[^ ]+]] = and i1 [[first_iteration_checkB]], [[limit_checkB]] +; CHECK-NEXT: [[maxMinusOne:[^ ]+]] = add i64 %max, -1 +; CHECK-NEXT: [[limit_checkMax:[^ ]+]] = icmp ule i64 15, [[maxMinusOne]] +; CHECK-NEXT: [[first_iteration_checkMax:[^ ]+]] = icmp ult i64 0, %max +; CHECK-NEXT: [[WideChkMax:[^ ]+]] = and i1 [[first_iteration_checkMax]], [[limit_checkMax]] +; CHECK-NEXT: [[lenA:[^ ]+]] = add i32 %lengthA, -1 +; CHECK-NEXT: [[idxA:[^ ]+]] = sub i32 [[lenA]], %offA +; CHECK-NEXT: [[limit_checkA:[^ ]+]] = icmp ule i32 15, [[idxA]] +; CHECK-NEXT: [[first_iteration_checkA:[^ ]+]] = icmp ult i32 %offA, %lengthA +; CHECK-NEXT: [[WideChkA:[^ ]+]] = and i1 [[first_iteration_checkA]], [[limit_checkA]] + %lengthA = call i32 @length(i8* %arrA) + %lengthB = call i32 @length(i8* %arrB) + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: [[BandMax:[^ ]+]] = and i1 [[WideChkB]], [[WideChkMax]] +; CHECK: [[ABandMax:[^ ]+]] = and i1 [[BandMax]], [[WideChkA]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[ABandMax]], i32 9) + %iv = phi i64 [0, %entry ], [ %iv.next, %loop ] + %iv.trunc = trunc i64 %iv to i32 + %indexA = add i32 %iv.trunc, %offA + %indexB = add i32 %iv.trunc, %offB + %rcA = icmp ult i32 %indexA, %lengthA + %rcIV = icmp ult i64 %iv, %max + %wide.chk = and i1 %rcA, %rcIV + %rcB = icmp ult i32 %indexB, %lengthB + %wide.chk.final = and i1 %wide.chk, %rcB + call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk.final, i32 9) [ "deopt"() ] + %indexA.ext = zext i32 %indexA to i64 + %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext + %eltA = load i8, i8* %addrA + %indexB.ext = zext i32 %indexB to i64 + %addrB = getelementptr inbounds i8, i8* %arrB, i64 %indexB.ext + %eltB = load i8, i8* %addrB + %result = xor i8 %eltA, %eltB + store i8 %result, i8* %addrA + %iv.next = add nuw nsw i64 %iv, 1 + %latch.check = icmp ult i64 %iv, 15 + br i1 %latch.check, label %loop, label %exit + +exit: + ret i64 %iv +} + +; cannot narrow the IV to the range type, because we lose information. +; for (i64 i= 5; i>= 2; i++) +; this loop wraps around after reaching 2^64. +define i64 @iv_rc_different_type(i32 %offA, i8* %arrA) { +; CHECK-LABEL: iv_rc_different_type +entry: + %lengthA = call i32 @length(i8* %arrA) + br label %loop + +loop: +; CHECK-LABEL: loop: +; CHECK: %rcA = icmp ult i32 %indexA, %lengthA +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9) + %iv = phi i64 [ 5, %entry ], [ %iv.next, %loop ] + %iv.trunc.32 = trunc i64 %iv to i32 + %indexA = add i32 %iv.trunc.32, %offA + %rcA = icmp ult i32 %indexA, %lengthA + call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9) [ "deopt"() ] + %indexA.ext = zext i32 %indexA to i64 + %addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext + %eltA = load i8, i8* %addrA + %res = add i8 %eltA, 2 + store i8 %eltA, i8* %addrA + %iv.next = add i64 %iv, 1 + %latch.check = icmp sge i64 %iv.next, 2 + br i1 %latch.check, label %loop, label %exit + +exit: + ret i64 %iv +} diff --git a/test/Transforms/LoopVectorize/pr34681.ll b/test/Transforms/LoopVectorize/pr34681.ll new file mode 100644 index 000000000000..e93265e2ed5c --- /dev/null +++ b/test/Transforms/LoopVectorize/pr34681.ll @@ -0,0 +1,122 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Check the scenario where we have an unknown Stride, which happens to also be +; the loop iteration count, so if we specialize the loop for the Stride==1 case, +; this also implies that the loop will iterate no more than a single iteration, +; as in the following example: +; +; unsigned int N; +; int tmp = 0; +; for(unsigned int k=0;k<N;k++) { +; tmp+=(int)B[k*N+j]; +; } +; +; We check here that the following runtime scev guard for Stride==1 is NOT generated: +; vector.scevcheck: +; %ident.check = icmp ne i32 %N, 1 +; %0 = or i1 false, %ident.check +; br i1 %0, label %scalar.ph, label %vector.ph +; Instead the loop is vectorized with an unknown stride. + +; CHECK-LABEL: @foo1 +; CHECK: for.body.lr.ph +; CHECK-NOT: %ident.check = icmp ne i32 %N, 1 +; CHECK-NOT: %[[TEST:[0-9]+]] = or i1 false, %ident.check +; CHECK-NOT: br i1 %[[TEST]], label %scalar.ph, label %vector.ph +; CHECK: vector.ph +; CHECK: vector.body +; CHECK: <4 x i32> +; CHECK: middle.block +; CHECK: scalar.ph + + +define i32 @foo1(i32 %N, i16* nocapture readnone %A, i16* nocapture readonly %B, i32 %i, i32 %j) { +entry: + %cmp8 = icmp eq i32 %N, 0 + br i1 %cmp8, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.body: + %tmp.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add1, %for.body ] + %k.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %mul = mul i32 %k.09, %N + %add = add i32 %mul, %j + %arrayidx = getelementptr inbounds i16, i16* %B, i32 %add + %0 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %0 to i32 + %add1 = add nsw i32 %tmp.010, %conv + %inc = add nuw i32 %k.09, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + %add1.lcssa = phi i32 [ %add1, %for.body ] + br label %for.end + +for.end: + %tmp.0.lcssa = phi i32 [ 0, %entry ], [ %add1.lcssa, %for.end.loopexit ] + ret i32 %tmp.0.lcssa +} + + +; Check the same, but also where the Stride and the loop iteration count +; are not of the same data type. +; +; unsigned short N; +; int tmp = 0; +; for(unsigned int k=0;k<N;k++) { +; tmp+=(int)B[k*N+j]; +; } +; +; We check here that the following runtime scev guard for Stride==1 is NOT generated: +; vector.scevcheck: +; %ident.check = icmp ne i16 %N, 1 +; %0 = or i1 false, %ident.check +; br i1 %0, label %scalar.ph, label %vector.ph + + +; CHECK-LABEL: @foo2 +; CHECK: for.body.lr.ph +; CHECK-NOT: %ident.check = icmp ne i16 %N, 1 +; CHECK-NOT: %[[TEST:[0-9]+]] = or i1 false, %ident.check +; CHECK-NOT: br i1 %[[TEST]], label %scalar.ph, label %vector.ph +; CHECK: vector.ph +; CHECK: vector.body +; CHECK: <4 x i32> +; CHECK: middle.block +; CHECK: scalar.ph + +define i32 @foo2(i16 zeroext %N, i16* nocapture readnone %A, i16* nocapture readonly %B, i32 %i, i32 %j) { +entry: + %conv = zext i16 %N to i32 + %cmp11 = icmp eq i16 %N, 0 + br i1 %cmp11, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.body: + %tmp.013 = phi i32 [ 0, %for.body.lr.ph ], [ %add4, %for.body ] + %k.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %mul = mul nuw i32 %k.012, %conv + %add = add i32 %mul, %j + %arrayidx = getelementptr inbounds i16, i16* %B, i32 %add + %0 = load i16, i16* %arrayidx, align 2 + %conv3 = sext i16 %0 to i32 + %add4 = add nsw i32 %tmp.013, %conv3 + %inc = add nuw nsw i32 %k.012, 1 + %exitcond = icmp eq i32 %inc, %conv + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: + %add4.lcssa = phi i32 [ %add4, %for.body ] + br label %for.end + +for.end: + %tmp.0.lcssa = phi i32 [ 0, %entry ], [ %add4.lcssa, %for.end.loopexit ] + ret i32 %tmp.0.lcssa +} diff --git a/test/Transforms/LoopVectorize/version-mem-access.ll b/test/Transforms/LoopVectorize/version-mem-access.ll index a9d319e5a2dd..774b6f268599 100644 --- a/test/Transforms/LoopVectorize/version-mem-access.ll +++ b/test/Transforms/LoopVectorize/version-mem-access.ll @@ -65,7 +65,8 @@ for.end: define void @fn1(double* noalias %x, double* noalias %c, double %a) { entry: %conv = fptosi double %a to i32 - %cmp8 = icmp sgt i32 %conv, 0 + %conv2 = add i32 %conv, 4 + %cmp8 = icmp sgt i32 %conv2, 0 br i1 %cmp8, label %for.body.preheader, label %for.end for.body.preheader: @@ -82,7 +83,7 @@ for.body: store double %1, double* %arrayidx3, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %conv + %exitcond = icmp eq i32 %lftr.wideiv, %conv2 br i1 %exitcond, label %for.end.loopexit, label %for.body for.end.loopexit: diff --git a/test/Transforms/LowerTypeTests/blockaddress.ll b/test/Transforms/LowerTypeTests/blockaddress.ll new file mode 100644 index 000000000000..ecc4814cfd58 --- /dev/null +++ b/test/Transforms/LowerTypeTests/blockaddress.ll @@ -0,0 +1,27 @@ +; RUN: opt -S %s -lowertypetests | FileCheck %s + + +; CHECK: define internal i8* @f2.cfi() !type !0 { +; CHECK-NEXT: br label %b +; CHECK: b: +; CHECK-NEXT: ret i8* blockaddress(@f2.cfi, %b) +; CHECK-NEXT: } + +target triple = "x86_64-unknown-linux" + +define void @f1() { +entry: + %0 = call i1 @llvm.type.test(i8* bitcast (i8* ()* @f2 to i8*), metadata !"_ZTSFvP3bioE") + ret void +} + +declare i1 @llvm.type.test(i8*, metadata) + +define i8* @f2() !type !5 { + br label %b + +b: + ret i8* blockaddress(@f2, %b) +} + +!5 = !{i64 0, !"_ZTSFvP3bioE"} diff --git a/test/Transforms/LowerTypeTests/import-unsat.ll b/test/Transforms/LowerTypeTests/import-unsat.ll index 6cb9b26fb574..b9eb552dd662 100644 --- a/test/Transforms/LowerTypeTests/import-unsat.ll +++ b/test/Transforms/LowerTypeTests/import-unsat.ll @@ -7,6 +7,7 @@ ; SUMMARY-NEXT: - Linkage: 0 ; SUMMARY-NEXT: NotEligibleToImport: false ; SUMMARY-NEXT: Live: true +; SUMMARY-NEXT: Local: false ; SUMMARY-NEXT: TypeTests: [ 123 ] ; SUMMARY-NEXT: TypeIdMap: ; SUMMARY-NEXT: typeid1: diff --git a/test/Transforms/PGOProfile/Inputs/irreducible.proftext b/test/Transforms/PGOProfile/Inputs/irreducible.proftext new file mode 100644 index 000000000000..9b0210d9a309 --- /dev/null +++ b/test/Transforms/PGOProfile/Inputs/irreducible.proftext @@ -0,0 +1,29 @@ +:ir +_Z11irreducibleii +# Func Hash: +64451410787 +# Num Counters: +6 +# Counter Values: +1000 +950 +100 +373 +1 +0 + +_Z11irreduciblePh +# Func Hash: +104649601521 +# Num Counters: +9 +# Counter Values: +100 +300 +99 +300 +201 +1 +1 +0 +0 diff --git a/test/Transforms/PGOProfile/irreducible.ll b/test/Transforms/PGOProfile/irreducible.ll new file mode 100644 index 000000000000..37f6e206ee92 --- /dev/null +++ b/test/Transforms/PGOProfile/irreducible.ll @@ -0,0 +1,184 @@ +; RUN: llvm-profdata merge %S/Inputs/irreducible.proftext -o %t.profdata +; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE +; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE + +; GEN: $__llvm_profile_raw_version = comdat any + +; Function Attrs: noinline norecurse nounwind readnone uwtable +define i32 @_Z11irreducibleii(i32 %iter_outer, i32 %iter_inner) local_unnamed_addr #0 { +entry: + %cmp24 = icmp sgt i32 %iter_outer, 0 + br i1 %cmp24, label %for.body, label %entry.for.cond.cleanup_crit_edge + +entry.for.cond.cleanup_crit_edge: ; preds = %entry + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %entry.for.cond.cleanup_crit_edge, %for.end + %sum.0.lcssa = phi i32 [ 0, %entry.for.cond.cleanup_crit_edge ], [ %sum.1, %for.end ] + ret i32 %sum.0.lcssa + +for.body: ; preds = %entry, %for.end + %k.026 = phi i32 [ %inc12, %for.end ], [ 0, %entry ] + %sum.025 = phi i32 [ %sum.1, %for.end ], [ 0, %entry ] + %rem23 = and i32 %k.026, 1 + %cmp1 = icmp eq i32 %rem23, 0 + br i1 %cmp1, label %entry8, label %for.cond2 + +for.cond2: ; preds = %for.body, %if.end9 + %sum.1 = phi i32 [ %add10, %if.end9 ], [ %sum.025, %for.body ] + %i.0 = phi i32 [ %inc, %if.end9 ], [ 0, %for.body ] + %cmp3 = icmp slt i32 %i.0, %iter_inner + br i1 %cmp3, label %for.body4, label %for.end +; USE: br i1 %cmp3, label %for.body4, label %for.end, !prof !{{[0-9]+}}, +; USE-SAME: !irr_loop ![[FOR_COND2_IRR_LOOP:[0-9]+]] + +for.body4: ; preds = %for.cond2 + %rem5 = srem i32 %k.026, 3 + %cmp6 = icmp eq i32 %rem5, 0 + br i1 %cmp6, label %entry8, label %if.end9 + +entry8: ; preds = %for.body4, %for.body + %sum.2 = phi i32 [ %sum.025, %for.body ], [ %sum.1, %for.body4 ] + %i.1 = phi i32 [ 0, %for.body ], [ %i.0, %for.body4 ] + %add = add nsw i32 %sum.2, 4 + br label %if.end9 +; USE: br label %if.end9, +; USE-SAME: !irr_loop ![[ENTRY8_IRR_LOOP:[0-9]+]] + +if.end9: ; preds = %entry8, %for.body4 + %sum.3 = phi i32 [ %add, %entry8 ], [ %sum.1, %for.body4 ] + %i.2 = phi i32 [ %i.1, %entry8 ], [ %i.0, %for.body4 ] + %add10 = add nsw i32 %sum.3, 1 + %inc = add nsw i32 %i.2, 1 + br label %for.cond2 +; USE: br label %for.cond2, +; USE-SAME: !irr_loop ![[IF_END9_IRR_LOOP:[0-9]+]] + +for.end: ; preds = %for.cond2 + %inc12 = add nuw nsw i32 %k.026, 1 + %exitcond = icmp eq i32 %inc12, %iter_outer + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + + + +@targets = local_unnamed_addr global [256 x i8*] zeroinitializer, align 16 +@tracing = local_unnamed_addr global i32 0, align 4 + +; Function Attrs: noinline norecurse nounwind uwtable +define i32 @_Z11irreduciblePh(i8* nocapture readonly %p) { +entry: + store <2 x i8*> <i8* blockaddress(@_Z11irreduciblePh, %sw.bb), i8* blockaddress(@_Z11irreduciblePh, %TARGET_1)>, <2 x i8*>* bitcast ([256 x i8*]* @targets to <2 x i8*>*), align 16 + store i8* blockaddress(@_Z11irreduciblePh, %TARGET_2), i8** getelementptr inbounds ([256 x i8*], [256 x i8*]* @targets, i64 0, i64 2), align 16 + %0 = load i32, i32* @tracing, align 4 + %tobool = icmp eq i32 %0, 0 + br label %for.cond1 + +for.cond1: ; preds = %sw.default, %entry + %p.addr.0 = phi i8* [ %p, %entry ], [ %p.addr.4, %sw.default ] + %sum.0 = phi i32 [ 0, %entry ], [ %add25, %sw.default ] + %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1 + %1 = load i8, i8* %p.addr.0, align 1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %p.addr.0, i64 2 + %2 = load i8, i8* %incdec.ptr, align 1 + %conv3 = zext i8 %2 to i32 + br label %dispatch_op + +dispatch_op: ; preds = %sw.bb6, %for.cond1 + %p.addr.1 = phi i8* [ %incdec.ptr2, %for.cond1 ], [ %p.addr.2, %sw.bb6 ] + %op.0 = phi i8 [ %1, %for.cond1 ], [ 1, %sw.bb6 ] + %oparg.0 = phi i32 [ %conv3, %for.cond1 ], [ %oparg.2, %sw.bb6 ] + %sum.1 = phi i32 [ %sum.0, %for.cond1 ], [ %add7, %sw.bb6 ] + switch i8 %op.0, label %sw.default [ + i8 0, label %sw.bb + i8 1, label %dispatch_op.sw.bb6_crit_edge + i8 2, label %sw.bb15 + ] + +dispatch_op.sw.bb6_crit_edge: ; preds = %dispatch_op + br label %sw.bb6 + +sw.bb: ; preds = %indirectgoto, %dispatch_op + %oparg.1 = phi i32 [ %oparg.0, %dispatch_op ], [ 0, %indirectgoto ] + %sum.2 = phi i32 [ %sum.1, %dispatch_op ], [ %sum.7, %indirectgoto ] + %add.neg = sub i32 -5, %oparg.1 + %sub = add i32 %add.neg, %sum.2 + br label %exit + +TARGET_1: ; preds = %indirectgoto + %incdec.ptr4 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 2 + %3 = load i8, i8* %p.addr.5, align 1 + %conv5 = zext i8 %3 to i32 + br label %sw.bb6 + +sw.bb6: ; preds = %dispatch_op.sw.bb6_crit_edge, %TARGET_1 + %p.addr.2 = phi i8* [ %incdec.ptr4, %TARGET_1 ], [ %p.addr.1, %dispatch_op.sw.bb6_crit_edge ] + %oparg.2 = phi i32 [ %conv5, %TARGET_1 ], [ %oparg.0, %dispatch_op.sw.bb6_crit_edge ] + %sum.3 = phi i32 [ %sum.7, %TARGET_1 ], [ %sum.1, %dispatch_op.sw.bb6_crit_edge ] + %mul = mul nsw i32 %oparg.2, 7 + %add7 = add nsw i32 %sum.3, %mul + %rem46 = and i32 %add7, 1 + %cmp8 = icmp eq i32 %rem46, 0 + br i1 %cmp8, label %dispatch_op, label %if.then +; USE: br i1 %cmp8, label %dispatch_op, label %if.then, !prof !{{[0-9]+}}, +; USE-SAME: !irr_loop ![[SW_BB6_IRR_LOOP:[0-9]+]] + +if.then: ; preds = %sw.bb6 + %mul9 = mul nsw i32 %add7, 9 + br label %indirectgoto + +TARGET_2: ; preds = %indirectgoto + %incdec.ptr13 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 2 + %4 = load i8, i8* %p.addr.5, align 1 + %conv14 = zext i8 %4 to i32 + br label %sw.bb15 + +sw.bb15: ; preds = %TARGET_2, %dispatch_op + %p.addr.3 = phi i8* [ %p.addr.1, %dispatch_op ], [ %incdec.ptr13, %TARGET_2 ] + %oparg.3 = phi i32 [ %oparg.0, %dispatch_op ], [ %conv14, %TARGET_2 ] + %sum.4 = phi i32 [ %sum.1, %dispatch_op ], [ %sum.7, %TARGET_2 ] + %add16 = add nsw i32 %oparg.3, 3 + %add17 = add nsw i32 %add16, %sum.4 + br i1 %tobool, label %if.then18, label %exit +; USE: br i1 %tobool, label %if.then18, label %exit, !prof !{{[0-9]+}}, +; USE-SAME: !irr_loop ![[SW_BB15_IRR_LOOP:[0-9]+]] + +if.then18: ; preds = %sw.bb15 + %idx.ext = sext i32 %oparg.3 to i64 + %add.ptr = getelementptr inbounds i8, i8* %p.addr.3, i64 %idx.ext + %mul19 = mul nsw i32 %add17, 17 + br label %indirectgoto + +unknown_op: ; preds = %indirectgoto + %sub24 = add nsw i32 %sum.7, -4 + br label %sw.default + +sw.default: ; preds = %unknown_op, %dispatch_op + %p.addr.4 = phi i8* [ %p.addr.5, %unknown_op ], [ %p.addr.1, %dispatch_op ] + %sum.5 = phi i32 [ %sub24, %unknown_op ], [ %sum.1, %dispatch_op ] + %add25 = add nsw i32 %sum.5, 11 + br label %for.cond1 + +exit: ; preds = %sw.bb15, %sw.bb + %sum.6 = phi i32 [ %sub, %sw.bb ], [ %add17, %sw.bb15 ] + ret i32 %sum.6 + +indirectgoto: ; preds = %if.then18, %if.then + %add.ptr.pn = phi i8* [ %add.ptr, %if.then18 ], [ %p.addr.2, %if.then ] + %sum.7 = phi i32 [ %mul19, %if.then18 ], [ %mul9, %if.then ] + %p.addr.5 = getelementptr inbounds i8, i8* %add.ptr.pn, i64 1 + %5 = load i8, i8* %add.ptr.pn, align 1 + %idxprom21 = zext i8 %5 to i64 + %arrayidx22 = getelementptr inbounds [256 x i8*], [256 x i8*]* @targets, i64 0, i64 %idxprom21 + %6 = load i8*, i8** %arrayidx22, align 8 + indirectbr i8* %6, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2] +; USE: indirectbr i8* %6, [label %unknown_op, label %sw.bb, label %TARGET_1, label %TARGET_2], !prof !{{[0-9]+}}, +; USE-SAME: !irr_loop ![[INDIRECTGOTO_IRR_LOOP:[0-9]+]] +} + +; USE: ![[FOR_COND2_IRR_LOOP]] = !{!"loop_header_weight", i64 1050} +; USE: ![[ENTRY8_IRR_LOOP]] = !{!"loop_header_weight", i64 373} +; USE: ![[IF_END9_IRR_LOOP]] = !{!"loop_header_weight", i64 1000} +; USE: ![[SW_BB6_IRR_LOOP]] = !{!"loop_header_weight", i64 501} +; USE: ![[SW_BB15_IRR_LOOP]] = !{!"loop_header_weight", i64 100} +; USE: ![[INDIRECTGOTO_IRR_LOOP]] = !{!"loop_header_weight", i64 400} diff --git a/test/Transforms/PGOProfile/thinlto_samplepgo_icp2.ll b/test/Transforms/PGOProfile/thinlto_samplepgo_icp2.ll index c1c074e75a70..1751854d448d 100644 --- a/test/Transforms/PGOProfile/thinlto_samplepgo_icp2.ll +++ b/test/Transforms/PGOProfile/thinlto_samplepgo_icp2.ll @@ -22,7 +22,7 @@ ; RUN: llvm-nm %t3.2 | FileCheck %s --check-prefix=NM ; NM: _ZL3barv ; RUN: llvm-dis < %t3.2.2.internalize.bc | FileCheck %s --check-prefix=INTERNALIZE -; INTERNALIZE: define void @_ZL3barv +; INTERNALIZE: define dso_local void @_ZL3barv target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll b/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll index 105afa9def5c..ebc15865a67d 100644 --- a/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll +++ b/test/Transforms/RewriteStatepointsForGC/drop-invalid-metadata.ll @@ -75,6 +75,54 @@ define void @test_dereferenceable(i32 addrspace(1)* addrspace(1)* %p, i32 %x, i3 ret void } +; invariant.start allows us to sink the load past the baz statepoint call into taken block, which is +; incorrect. remove the invariant.start and RAUW undef. +define void @test_inv_start(i1 %cond, i32 addrspace(1)* addrspace(1)* %p, i32 %x, i32 addrspace(1)* %q) gc "statepoint-example" { +; CHECK-LABEL: test_inv_start +; CHECK-NOT: invariant.start +; CHECK: gc.statepoint + %v1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p + %invst = call {}* @llvm.invariant.start.p1i32(i64 1, i32 addrspace(1)* %v1) + %v2 = load i32, i32 addrspace(1)* %v1 + call void @baz(i32 %x) + br i1 %cond, label %taken, label %untaken + +taken: + store i32 %v2, i32 addrspace(1)* %q, align 16 + call void @llvm.invariant.end.p1i32({}* %invst, i64 4, i32 addrspace(1)* %v1) + ret void + +; CHECK-LABEL: untaken: +; CHECK: gc.statepoint +untaken: + %foo = call i32 @escaping.invariant.start({}* %invst) + call void @dummy(i32 %foo) + ret void +} + +; invariant.start is removed and the uses are undef'ed. +define void @test_inv_start2(i1 %cond, i32 addrspace(1)* addrspace(1)* %p, i32 %x, i32 addrspace(1)* %q) gc "statepoint-example" { +; CHECK-LABEL: test_inv_start2 +; CHECK-NOT: invariant.start +; CHECK: gc.statepoint + %v1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p + %invst = call {}* @llvm.invariant.start.p1i32(i64 1, i32 addrspace(1)* %v1) + %v2 = load i32, i32 addrspace(1)* %v1 + call void @baz(i32 %x) + br i1 %cond, label %taken, label %untaken + +taken: + store i32 %v2, i32 addrspace(1)* %q, align 16 + call void @llvm.invariant.end.p1i32({}* %invst, i64 4, i32 addrspace(1)* %v1) + ret void + +untaken: + ret void +} +declare {}* @llvm.invariant.start.p1i32(i64, i32 addrspace(1)* nocapture) nounwind readonly +declare void @llvm.invariant.end.p1i32({}*, i64, i32 addrspace(1)* nocapture) nounwind +declare i32 @escaping.invariant.start({}*) nounwind +declare void @dummy(i32) declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32f(i64, i32, void (i32)*, i32, i32, ...) ; Function Attrs: nounwind readonly diff --git a/test/Transforms/SLPVectorizer/X86/call.ll b/test/Transforms/SLPVectorizer/X86/call.ll index 03b1e837a0ca..8397d348483c 100644 --- a/test/Transforms/SLPVectorizer/X86/call.ll +++ b/test/Transforms/SLPVectorizer/X86/call.ll @@ -11,133 +11,158 @@ declare double @sqrt(double) declare i64 @round(i64) -; CHECK: sin_libm -; CHECK: call <2 x double> @llvm.sin.v2f64 -; CHECK: ret void -define void @sin_libm(double* %a, double* %b, double* %c) { -entry: - %i0 = load double, double* %a, align 8 - %i1 = load double, double* %b, align 8 - %mul = fmul double %i0, %i1 - %call = tail call double @sin(double %mul) nounwind readnone - %arrayidx3 = getelementptr inbounds double, double* %a, i64 1 - %i3 = load double, double* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 - %i4 = load double, double* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - %call5 = tail call double @sin(double %mul5) nounwind readnone - store double %call, double* %c, align 8 - %arrayidx5 = getelementptr inbounds double, double* %c, i64 1 - store double %call5, double* %arrayidx5, align 8 +define void @sin_libm(double* %a, double* %b) { +; CHECK-LABEL: @sin_libm( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: ret void +; + %a0 = load double, double* %a, align 8 + %idx1 = getelementptr inbounds double, double* %a, i64 1 + %a1 = load double, double* %idx1, align 8 + %sin1 = tail call double @sin(double %a0) nounwind readnone + %sin2 = tail call double @sin(double %a1) nounwind readnone + store double %sin1, double* %b, align 8 + %idx2 = getelementptr inbounds double, double* %b, i64 1 + store double %sin2, double* %idx2, align 8 ret void } -; CHECK: cos_libm -; CHECK: call <2 x double> @llvm.cos.v2f64 -; CHECK: ret void -define void @cos_libm(double* %a, double* %b, double* %c) { -entry: - %i0 = load double, double* %a, align 8 - %i1 = load double, double* %b, align 8 - %mul = fmul double %i0, %i1 - %call = tail call double @cos(double %mul) nounwind readnone - %arrayidx3 = getelementptr inbounds double, double* %a, i64 1 - %i3 = load double, double* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 - %i4 = load double, double* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - %call5 = tail call double @cos(double %mul5) nounwind readnone - store double %call, double* %c, align 8 - %arrayidx5 = getelementptr inbounds double, double* %c, i64 1 - store double %call5, double* %arrayidx5, align 8 +define void @cos_libm(double* %a, double* %b) { +; CHECK-LABEL: @cos_libm( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: ret void +; + %a0 = load double, double* %a, align 8 + %idx1 = getelementptr inbounds double, double* %a, i64 1 + %a1 = load double, double* %idx1, align 8 + %cos1 = tail call double @cos(double %a0) nounwind readnone + %cos2 = tail call double @cos(double %a1) nounwind readnone + store double %cos1, double* %b, align 8 + %idx2 = getelementptr inbounds double, double* %b, i64 1 + store double %cos2, double* %idx2, align 8 ret void } -; CHECK: pow_libm -; CHECK: call <2 x double> @llvm.pow.v2f64 -; CHECK: ret void -define void @pow_libm(double* %a, double* %b, double* %c) { -entry: - %i0 = load double, double* %a, align 8 - %i1 = load double, double* %b, align 8 - %mul = fmul double %i0, %i1 - %call = tail call double @pow(double %mul,double %mul) nounwind readnone - %arrayidx3 = getelementptr inbounds double, double* %a, i64 1 - %i3 = load double, double* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 - %i4 = load double, double* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - %call5 = tail call double @pow(double %mul5,double %mul5) nounwind readnone - store double %call, double* %c, align 8 - %arrayidx5 = getelementptr inbounds double, double* %c, i64 1 - store double %call5, double* %arrayidx5, align 8 +define void @pow_libm(double* %a, double* %b) { +; CHECK-LABEL: @pow_libm( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: ret void +; + %a0 = load double, double* %a, align 8 + %idx1 = getelementptr inbounds double, double* %a, i64 1 + %a1 = load double, double* %idx1, align 8 + %pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone + %pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone + store double %pow1, double* %b, align 8 + %idx2 = getelementptr inbounds double, double* %b, i64 1 + store double %pow2, double* %idx2, align 8 ret void } - -; CHECK: exp2_libm -; CHECK: call <2 x double> @llvm.exp2.v2f64 -; CHECK: ret void -define void @exp2_libm(double* %a, double* %b, double* %c) { -entry: - %i0 = load double, double* %a, align 8 - %i1 = load double, double* %b, align 8 - %mul = fmul double %i0, %i1 - %call = tail call double @exp2(double %mul) nounwind readnone - %arrayidx3 = getelementptr inbounds double, double* %a, i64 1 - %i3 = load double, double* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 - %i4 = load double, double* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - %call5 = tail call double @exp2(double %mul5) nounwind readnone - store double %call, double* %c, align 8 - %arrayidx5 = getelementptr inbounds double, double* %c, i64 1 - store double %call5, double* %arrayidx5, align 8 +define void @exp_libm(double* %a, double* %b) { +; CHECK-LABEL: @exp_libm( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: ret void +; + %a0 = load double, double* %a, align 8 + %idx1 = getelementptr inbounds double, double* %a, i64 1 + %a1 = load double, double* %idx1, align 8 + %exp1 = tail call double @exp2(double %a0) nounwind readnone + %exp2 = tail call double @exp2(double %a1) nounwind readnone + store double %exp1, double* %b, align 8 + %idx2 = getelementptr inbounds double, double* %b, i64 1 + store double %exp2, double* %idx2, align 8 ret void } - -; CHECK: sqrt_libm -; CHECK: call nnan <2 x double> @llvm.sqrt.v2f64 -; CHECK: ret void -define void @sqrt_libm(double* %a, double* %b, double* %c) { -entry: - %i0 = load double, double* %a, align 8 - %i1 = load double, double* %b, align 8 - %mul = fmul double %i0, %i1 - %call = tail call nnan double @sqrt(double %mul) nounwind readnone - %arrayidx3 = getelementptr inbounds double, double* %a, i64 1 - %i3 = load double, double* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds double, double* %b, i64 1 - %i4 = load double, double* %arrayidx4, align 8 - %mul5 = fmul double %i3, %i4 - %call5 = tail call nnan double @sqrt(double %mul5) nounwind readnone - store double %call, double* %c, align 8 - %arrayidx5 = getelementptr inbounds double, double* %c, i64 1 - store double %call5, double* %arrayidx5, align 8 +; No fast-math-flags are required to convert sqrt library calls to an intrinsic. +; We just need to know that errno is not set (readnone). + +define void @sqrt_libm_no_errno(double* %a, double* %b) { +; CHECK-LABEL: @sqrt_libm_no_errno( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* %a to <2 x double>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* %b to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: ret void +; + %a0 = load double, double* %a, align 8 + %idx1 = getelementptr inbounds double, double* %a, i64 1 + %a1 = load double, double* %idx1, align 8 + %sqrt1 = tail call double @sqrt(double %a0) nounwind readnone + %sqrt2 = tail call double @sqrt(double %a1) nounwind readnone + store double %sqrt1, double* %b, align 8 + %idx2 = getelementptr inbounds double, double* %b, i64 1 + store double %sqrt2, double* %idx2, align 8 ret void } +; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize. +; The nnan on the call does not matter because there's no guarantee in the C standard that a negative +; input would result in a nan output ("On a domain error, the function returns an +; implementation-defined value.") + +define void @sqrt_libm_errno(double* %a, double* %b) { +; CHECK-LABEL: @sqrt_libm_errno( +; CHECK-NEXT: [[A0:%.*]] = load double, double* %a, align 8 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* %a, i64 1 +; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDX1]], align 8 +; CHECK-NEXT: [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #2 +; CHECK-NEXT: [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #2 +; CHECK-NEXT: store double [[SQRT1]], double* %b, align 8 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* %b, i64 1 +; CHECK-NEXT: store double [[SQRT2]], double* [[IDX2]], align 8 +; CHECK-NEXT: ret void +; + %a0 = load double, double* %a, align 8 + %idx1 = getelementptr inbounds double, double* %a, i64 1 + %a1 = load double, double* %idx1, align 8 + %sqrt1 = tail call nnan double @sqrt(double %a0) nounwind + %sqrt2 = tail call nnan double @sqrt(double %a1) nounwind + store double %sqrt1, double* %b, align 8 + %idx2 = getelementptr inbounds double, double* %b, i64 1 + store double %sqrt2, double* %idx2, align 8 + ret void +} ; Negative test case -; CHECK: round_custom -; CHECK-NOT: load <4 x i64> -; CHECK: ret void -define void @round_custom(i64* %a, i64* %b, i64* %c) { -entry: - %i0 = load i64, i64* %a, align 8 - %i1 = load i64, i64* %b, align 8 - %mul = mul i64 %i0, %i1 - %call = tail call i64 @round(i64 %mul) nounwind readnone - %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1 - %i3 = load i64, i64* %arrayidx3, align 8 - %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1 - %i4 = load i64, i64* %arrayidx4, align 8 - %mul5 = mul i64 %i3, %i4 - %call5 = tail call i64 @round(i64 %mul5) nounwind readnone - store i64 %call, i64* %c, align 8 - %arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1 - store i64 %call5, i64* %arrayidx5, align 8 +define void @round_custom(i64* %a, i64* %b) { +; CHECK-LABEL: @round_custom( +; CHECK-NEXT: [[A0:%.*]] = load i64, i64* %a, align 8 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds i64, i64* %a, i64 1 +; CHECK-NEXT: [[A1:%.*]] = load i64, i64* [[IDX1]], align 8 +; CHECK-NEXT: [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #3 +; CHECK-NEXT: [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #3 +; CHECK-NEXT: store i64 [[ROUND1]], i64* %b, align 8 +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds i64, i64* %b, i64 1 +; CHECK-NEXT: store i64 [[ROUND2]], i64* [[IDX2]], align 8 +; CHECK-NEXT: ret void +; + %a0 = load i64, i64* %a, align 8 + %idx1 = getelementptr inbounds i64, i64* %a, i64 1 + %a1 = load i64, i64* %idx1, align 8 + %round1 = tail call i64 @round(i64 %a0) nounwind readnone + %round2 = tail call i64 @round(i64 %a1) nounwind readnone + store i64 %round1, i64* %b, align 8 + %idx2 = getelementptr inbounds i64, i64* %b, i64 1 + store i64 %round2, i64* %idx2, align 8 ret void } diff --git a/test/Transforms/SLPVectorizer/X86/cast.ll b/test/Transforms/SLPVectorizer/X86/cast.ll index 5d7118753e92..2f9f84948eaf 100644 --- a/test/Transforms/SLPVectorizer/X86/cast.ll +++ b/test/Transforms/SLPVectorizer/X86/cast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -basicaa -slp-vectorizer -dce -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -basicaa -slp-vectorizer -dce -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -basicaa -slp-vectorizer -dce -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -basicaa -slp-vectorizer -dce -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -14,10 +14,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @test_sext_4i8_to_4i32(i32* noalias nocapture %A, i8* noalias nocapture %B) { ; CHECK-LABEL: @test_sext_4i8_to_4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* %B to <4 x i8>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <4 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* %A to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret i32 undef ; @@ -46,10 +46,10 @@ entry: define i32 @test_zext_4i16_to_4i32(i32* noalias nocapture %A, i16* noalias nocapture %B) { ; CHECK-LABEL: @test_zext_4i16_to_4i32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* %B to <4 x i16>* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[B:%.*]] to <4 x i16>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* %A to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret i32 undef ; @@ -76,30 +76,21 @@ entry: } define i64 @test_sext_4i16_to_4i64(i64* noalias nocapture %A, i16* noalias nocapture %B) { -; SSE-LABEL: @test_sext_4i16_to_4i64( -; SSE-NEXT: entry: -; SSE-NEXT: [[TMP0:%.*]] = bitcast i16* %B to <2 x i16>* -; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* [[TMP0]], align 1 -; SSE-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i64> -; SSE-NEXT: [[TMP3:%.*]] = bitcast i64* %A to <2 x i64>* -; SSE-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 4 -; SSE-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* %B, i64 2 -; SSE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, i64* %A, i64 2 -; SSE-NEXT: [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX5]] to <2 x i16>* -; SSE-NEXT: [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* [[TMP4]], align 1 -; SSE-NEXT: [[TMP6:%.*]] = sext <2 x i16> [[TMP5]] to <2 x i64> -; SSE-NEXT: [[TMP7:%.*]] = bitcast i64* [[ARRAYIDX7]] to <2 x i64>* -; SSE-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 -; SSE-NEXT: ret i64 undef -; -; AVX-LABEL: @test_sext_4i16_to_4i64( -; AVX-NEXT: entry: -; AVX-NEXT: [[TMP0:%.*]] = bitcast i16* %B to <4 x i16>* -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1 -; AVX-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i64> -; AVX-NEXT: [[TMP3:%.*]] = bitcast i64* %A to <4 x i64>* -; AVX-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 4 -; AVX-NEXT: ret i64 undef +; CHECK-LABEL: @test_sext_4i16_to_4i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[B:%.*]] to <2 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[B]], i64 2 +; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX5]] to <2 x i16>* +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* [[TMP4]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i16> [[TMP5]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[ARRAYIDX7]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4 +; CHECK-NEXT: ret i64 undef ; entry: %0 = load i16, i16* %B, align 1 diff --git a/test/Transforms/SLPVectorizer/X86/load-merge.ll b/test/Transforms/SLPVectorizer/X86/load-merge.ll new file mode 100644 index 000000000000..df990be073b1 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=haswell | FileCheck %s + +;unsigned load_le32(unsigned char *data) { +; unsigned le32 = (data[0]<<0) | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); +; return le32; +;} + +define i32 @_Z9load_le32Ph(i8* nocapture readonly %data) { +; CHECK-LABEL: @_Z9load_le32Ph( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[DATA:%.*]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[SHL3:%.*]] = shl nuw nsw i32 [[CONV2]], 8 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL3]], [[CONV]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[TMP2]] to i32 +; CHECK-NEXT: [[SHL6:%.*]] = shl nuw nsw i32 [[CONV5]], 16 +; CHECK-NEXT: [[OR7:%.*]] = or i32 [[OR]], [[SHL6]] +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[DATA]], i64 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[ARRAYIDX8]], align 1 +; CHECK-NEXT: [[CONV9:%.*]] = zext i8 [[TMP3]] to i32 +; CHECK-NEXT: [[SHL10:%.*]] = shl nuw i32 [[CONV9]], 24 +; CHECK-NEXT: [[OR11:%.*]] = or i32 [[OR7]], [[SHL10]] +; CHECK-NEXT: ret i32 [[OR11]] +; +entry: + %0 = load i8, i8* %data, align 1 + %conv = zext i8 %0 to i32 + %arrayidx1 = getelementptr inbounds i8, i8* %data, i64 1 + %1 = load i8, i8* %arrayidx1, align 1 + %conv2 = zext i8 %1 to i32 + %shl3 = shl nuw nsw i32 %conv2, 8 + %or = or i32 %shl3, %conv + %arrayidx4 = getelementptr inbounds i8, i8* %data, i64 2 + %2 = load i8, i8* %arrayidx4, align 1 + %conv5 = zext i8 %2 to i32 + %shl6 = shl nuw nsw i32 %conv5, 16 + %or7 = or i32 %or, %shl6 + %arrayidx8 = getelementptr inbounds i8, i8* %data, i64 3 + %3 = load i8, i8* %arrayidx8, align 1 + %conv9 = zext i8 %3 to i32 + %shl10 = shl nuw i32 %conv9, 24 + %or11 = or i32 %or7, %shl10 + ret i32 %or11 +} diff --git a/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll new file mode 100644 index 000000000000..79fb782db8f5 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s + +;void Distance(float *p1, int p2, unsigned long p3[], float p4[]) { +; long a = p3[0] = 5; +; p1 += p2; +; p4[3] += p1[a]; +; p3[0] >>= 5; +; p3[1] >>= 5; +; p3[2] >>= 5; +; p3[3] >>= 5; +; p1 += p2; +; p4[0] += p1[p3[0] & a]; +;} + +define void @_Z8DistanceIlLi5EEvPfiPmS0_(float* %p1, i32 %p2, i64* %p3, float* %p4) { +; CHECK-LABEL: @_Z8DistanceIlLi5EEvPfiPmS0_( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 5, i64* [[P3:%.*]], align 8 +; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[P2:%.*]] to i64 +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[P1:%.*]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 5 +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[P4:%.*]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]] +; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[P3]], align 8 +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[TMP2]], 5 +; CHECK-NEXT: store i64 [[SHR]], i64* [[P3]], align 8 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 1 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[ARRAYIDX4]] to <2 x i64>* +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i64> [[TMP4]], <i64 5, i64 5> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[ARRAYIDX4]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP6]], align 8 +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i64, i64* [[P3]], i64 3 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARRAYIDX8]], align 8 +; CHECK-NEXT: [[SHR9:%.*]] = lshr i64 [[TMP7]], 5 +; CHECK-NEXT: store i64 [[SHR9]], i64* [[ARRAYIDX8]], align 8 +; CHECK-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[AND:%.*]] = and i64 [[SHR]], 5 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[ADD_PTR11]], i64 [[AND]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[P4]], align 4 +; CHECK-NEXT: [[ADD15:%.*]] = fadd float [[TMP8]], [[TMP9]] +; CHECK-NEXT: store float [[ADD15]], float* [[P4]], align 4 +; CHECK-NEXT: ret void +; +entry: + store i64 5, i64* %p3, align 8 + %idx.ext = sext i32 %p2 to i64 + %add.ptr = getelementptr inbounds float, float* %p1, i64 %idx.ext + %arrayidx1 = getelementptr inbounds float, float* %add.ptr, i64 5 + %0 = load float, float* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds float, float* %p4, i64 3 + %1 = load float, float* %arrayidx2, align 4 + %add = fadd float %0, %1 + store float %add, float* %arrayidx2, align 4 + %2 = load i64, i64* %p3, align 8 + %shr = lshr i64 %2, 5 + store i64 %shr, i64* %p3, align 8 + %arrayidx4 = getelementptr inbounds i64, i64* %p3, i64 1 + %3 = load i64, i64* %arrayidx4, align 8 + %shr5 = lshr i64 %3, 5 + store i64 %shr5, i64* %arrayidx4, align 8 + %arrayidx6 = getelementptr inbounds i64, i64* %p3, i64 2 + %4 = load i64, i64* %arrayidx6, align 8 + %shr7 = lshr i64 %4, 5 + store i64 %shr7, i64* %arrayidx6, align 8 + %arrayidx8 = getelementptr inbounds i64, i64* %p3, i64 3 + %5 = load i64, i64* %arrayidx8, align 8 + %shr9 = lshr i64 %5, 5 + store i64 %shr9, i64* %arrayidx8, align 8 + %add.ptr11 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext + %and = and i64 %shr, 5 + %arrayidx13 = getelementptr inbounds float, float* %add.ptr11, i64 %and + %6 = load float, float* %arrayidx13, align 4 + %7 = load float, float* %p4, align 4 + %add15 = fadd float %6, %7 + store float %add15, float* %p4, align 4 + ret void +} diff --git a/test/Transforms/SampleProfile/indirect-call.ll b/test/Transforms/SampleProfile/indirect-call.ll index 61a1bc519966..0c00639e6c00 100644 --- a/test/Transforms/SampleProfile/indirect-call.ll +++ b/test/Transforms/SampleProfile/indirect-call.ll @@ -182,7 +182,7 @@ define void @test_direct() !dbg !22 { ; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398} ; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000} ; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000} -; CHECK: ![[VP]] = !{!"VP", i32 0, i64 1000, i64 -6391416044382067764, i64 1000} +; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000} !6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0) !7 = !DILocation(line: 7, scope: !6) !8 = distinct !DISubprogram(name: "test_inline_strip", scope: !1, file: !1, line: 8, unit: !0) diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll index a2b940380016..a2ca63d0a2df 100644 --- a/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll +++ b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -simplifycfg -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 | FileCheck %s +; RUN: opt -S < %s -simplifycfg -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=1 | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7--linux-gnueabihf" diff --git a/test/Transforms/WholeProgramDevirt/import-indir.ll b/test/Transforms/WholeProgramDevirt/import-indir.ll index 052a34948343..927ee16b370c 100644 --- a/test/Transforms/WholeProgramDevirt/import-indir.ll +++ b/test/Transforms/WholeProgramDevirt/import-indir.ll @@ -7,6 +7,7 @@ ; SUMMARY-NEXT: - Linkage: 0 ; SUMMARY-NEXT: NotEligibleToImport: false ; SUMMARY-NEXT: Live: true +; SUMMARY-NEXT: Local: false ; SUMMARY-NEXT: TypeTestAssumeVCalls: ; SUMMARY-NEXT: - GUID: 123 ; SUMMARY-NEXT: Offset: 0 |