summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandros Lamprineas <alexandros.lamprineas@arm.com>2021-12-14 15:29:44 +0000
committerAlexandros Lamprineas <alexandros.lamprineas@arm.com>2021-12-14 16:03:02 +0000
commit61bb8b5d404023d5cd329a6d56c2467a81ab138a (patch)
tree781a527734bc4d8c28411987763cd0f9260a639e
parentebb6bb725eadd57a0f7995fff17774020f6f0389 (diff)
downloadllvm-61bb8b5d404023d5cd329a6d56c2467a81ab138a.tar.gz
[AArch64] Convert sra(X, elt_size(X)-1) to cmlt(X, 0)
CMLT has twice the execution throughput of SSHR on Arm out-of-order cores. Differential Revision: https://reviews.llvm.org/D115457
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td18
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/vector-select.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vshr.ll9
-rw-r--r--llvm/test/CodeGen/AArch64/cmp-select-sign.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/dag-numsignbits.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/div_minsize.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll8
-rw-r--r--llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll10
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/vec_uaddo.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/vec_umulo.ll14
-rw-r--r--llvm/test/CodeGen/AArch64/vselect-constants.ll30
18 files changed, 93 insertions, 68 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index bcaf6cd3195e..3f174bb6b17e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4174,6 +4174,21 @@ defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
+def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
+ (CMLTv8i8rz V64:$Rn)>;
+def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
+ (CMLTv4i16rz V64:$Rn)>;
+def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
+ (CMLTv2i32rz V64:$Rn)>;
+def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
+ (CMLTv16i8rz V128:$Rn)>;
+def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
+ (CMLTv8i16rz V128:$Rn)>;
+def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
+ (CMLTv4i32rz V128:$Rn)>;
+def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
+ (CMLTv2i64rz V128:$Rn)>;
+
defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
@@ -4825,6 +4840,9 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
+def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
+ (CMLTv1i64rz V64:$Rn)>;
+
def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
(FCVTASv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index 6e77612815f4..2149fe228296 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -143,7 +143,7 @@ define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) {
; CODE: bb.0
; CODE-NEXT: ushll v{{.+}}.2d, v{{.+}}.2s, #0
; CODE-NEXT: shl v{{.+}}.2d, v{{.+}}.2d, #63
-; CODE-NEXT: sshr v{{.+}}.2d, v{{.+}}.2d, #63
+; CODE-NEXT: cmlt v{{.+}}.2d, v{{.+}}.2d, #0
; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
; CODE-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 947d9c8571b9..38d574213b9d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -348,8 +348,8 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
; CHECK-NEXT: mov.b v1[15], w8
; CHECK-NEXT: shl.16b v0, v0, #7
; CHECK-NEXT: shl.16b v1, v1, #7
-; CHECK-NEXT: sshr.16b v0, v0, #7
-; CHECK-NEXT: sshr.16b v1, v1, #7
+; CHECK-NEXT: cmlt.16b v0, v0, #0
+; CHECK-NEXT: cmlt.16b v1, v1, #0
; CHECK-NEXT: ret
%res = sext <32 x i1> %arg to <32 x i8>
ret <32 x i8> %res
@@ -615,10 +615,10 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
; CHECK-NEXT: shl.16b v2, v2, #7
; CHECK-NEXT: shl.16b v4, v1, #7
; CHECK-NEXT: shl.16b v5, v0, #7
-; CHECK-NEXT: sshr.16b v0, v3, #7
-; CHECK-NEXT: sshr.16b v1, v2, #7
-; CHECK-NEXT: sshr.16b v2, v4, #7
-; CHECK-NEXT: sshr.16b v3, v5, #7
+; CHECK-NEXT: cmlt.16b v0, v3, #0
+; CHECK-NEXT: cmlt.16b v1, v2, #0
+; CHECK-NEXT: cmlt.16b v2, v4, #0
+; CHECK-NEXT: cmlt.16b v3, v5, #0
; CHECK-NEXT: ret
%res = sext <64 x i1> %arg to <64 x i8>
ret <64 x i8> %res
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshr.ll b/llvm/test/CodeGen/AArch64/arm64-vshr.ll
index 6d599ccd6fc5..bd5aa2505be2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshr.ll
@@ -48,7 +48,14 @@ entry:
define <1 x i64> @sshr_v1i64(<1 x i64> %A) nounwind {
; CHECK-LABEL: sshr_v1i64:
-; CHECK: sshr d0, d0, #63
+; CHECK: sshr d0, d0, #42
+ %tmp3 = ashr <1 x i64> %A, < i64 42 >
+ ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @cmlt_v1i64(<1 x i64> %A) nounwind {
+; CHECK-LABEL: cmlt_v1i64:
+; CHECK: cmlt d0, d0, #0
%tmp3 = ashr <1 x i64> %A, < i64 63 >
ret <1 x i64> %tmp3
}
diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
index dce218349c57..44d0eed3d723 100644
--- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -115,7 +115,7 @@ define <7 x i8> @sign_7xi8(<7 x i8> %a) {
; CHECK-LABEL: sign_7xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.8b, #1
-; CHECK-NEXT: sshr v0.8b, v0.8b, #7
+; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%c = icmp sgt <7 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -127,7 +127,7 @@ define <8 x i8> @sign_8xi8(<8 x i8> %a) {
; CHECK-LABEL: sign_8xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.8b, #1
-; CHECK-NEXT: sshr v0.8b, v0.8b, #7
+; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%c = icmp sgt <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -139,7 +139,7 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) {
; CHECK-LABEL: sign_16xi8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.16b, #1
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%c = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -150,7 +150,7 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) {
define <3 x i32> @sign_3xi32(<3 x i32> %a) {
; CHECK-LABEL: sign_3xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: orr v0.4s, #1
; CHECK-NEXT: ret
%c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1>
@@ -161,7 +161,7 @@ define <3 x i32> @sign_3xi32(<3 x i32> %a) {
define <4 x i32> @sign_4xi32(<4 x i32> %a) {
; CHECK-LABEL: sign_4xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: orr v0.4s, #1
; CHECK-NEXT: ret
%c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -177,7 +177,7 @@ define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) {
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT: sshr v2.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s
; CHECK-NEXT: orr v2.4s, #1
; CHECK-NEXT: xtn v0.4h, v0.4s
@@ -214,7 +214,7 @@ define <4 x i32> @not_sign_4xi32_2(<4 x i32> %a) {
; CHECK-LABEL: not_sign_4xi32_2:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI17_0
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
index 8da816e141a1..e4f13f5c98a1 100644
--- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
+++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
@@ -19,7 +19,7 @@ define void @signbits_vXi1(<4 x i16> %a1) {
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1]
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: sshr v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: umov w3, v0.h[3]
; CHECK-NEXT: b foo
diff --git a/llvm/test/CodeGen/AArch64/div_minsize.ll b/llvm/test/CodeGen/AArch64/div_minsize.ll
index f62ef4ee4a2d..8de967e52005 100644
--- a/llvm/test/CodeGen/AArch64/div_minsize.ll
+++ b/llvm/test/CodeGen/AArch64/div_minsize.ll
@@ -35,7 +35,7 @@ entry:
define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
entry:
; CHECK: sdiv_vec8x16_minsize
-; CHECK: sshr v1.8h, v0.8h, #15
+; CHECK: cmlt v1.8h, v0.8h, #0
; CHECK: usra v0.8h, v1.8h, #11
; CHECK: sshr v0.8h, v0.8h, #5
; CHECK: ret
diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
index 6fa2d8320562..e473bbe72cef 100644
--- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
+++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
@@ -167,7 +167,7 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) {
; CHECK: // %bb.0:
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: movi v1.16b, #128
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%shl = select <16 x i1> %t, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>, <16 x i8> zeroinitializer
@@ -180,7 +180,7 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: movi v1.8h, #128
; CHECK-NEXT: shl v0.8h, v0.8h, #15
-; CHECK-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
@@ -193,7 +193,7 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: movi v1.4s, #64
; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
@@ -207,7 +207,7 @@ define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
; CHECK-NEXT: mov w8, #65536
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
index 85f0ab366899..c37e5450160f 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
@@ -184,7 +184,7 @@ define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_srem_pow2:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v2.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
; CHECK-NEXT: mov v3.16b, v0.16b
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: usra v3.4s, v2.4s, #28
@@ -203,7 +203,7 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_srem_int_min:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v2.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
; CHECK-NEXT: mov v3.16b, v0.16b
; CHECK-NEXT: movi v1.4s, #128, lsl #24
; CHECK-NEXT: usra v3.4s, v2.4s, #1
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll
index 001ff9ba2272..9d8964980854 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll
@@ -25,7 +25,7 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
; CHECK-LABEL: select_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.4h, v2.4h, #15
-; CHECK-NEXT: sshr v2.4h, v2.4h, #15
+; CHECK-NEXT: cmlt v2.4h, v2.4h, #0
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2
@@ -38,7 +38,7 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
; CHECK-NEXT: shl v2.8h, v2.8h, #15
-; CHECK-NEXT: sshr v2.8h, v2.8h, #15
+; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2
@@ -122,7 +122,7 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
; CHECK-LABEL: select_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.2s, v2.2s, #31
-; CHECK-NEXT: sshr v2.2s, v2.2s, #31
+; CHECK-NEXT: cmlt v2.2s, v2.2s, #0
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2
@@ -135,7 +135,7 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: shl v2.4s, v2.4s, #31
-; CHECK-NEXT: sshr v2.4s, v2.4s, #31
+; CHECK-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2
@@ -233,7 +233,7 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-NEXT: shl v2.2d, v2.2d, #63
-; CHECK-NEXT: sshr v2.2d, v2.2d, #63
+; CHECK-NEXT: cmlt v2.2d, v2.2d, #0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
index 86cef17f1dd9..1fea072f0e37 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
@@ -25,7 +25,7 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 {
; CHECK-LABEL: select_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.8b, v2.8b, #7
-; CHECK-NEXT: sshr v2.8b, v2.8b, #7
+; CHECK-NEXT: cmlt v2.8b, v2.8b, #0
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2
@@ -37,7 +37,7 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
; CHECK-LABEL: select_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.16b, v2.16b, #7
-; CHECK-NEXT: sshr v2.16b, v2.16b, #7
+; CHECK-NEXT: cmlt v2.16b, v2.16b, #0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2
@@ -1137,7 +1137,7 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) #
; CHECK-LABEL: select_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.4h, v2.4h, #15
-; CHECK-NEXT: sshr v2.4h, v2.4h, #15
+; CHECK-NEXT: cmlt v2.4h, v2.4h, #0
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2
@@ -1150,7 +1150,7 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
; CHECK-NEXT: shl v2.8h, v2.8h, #15
-; CHECK-NEXT: sshr v2.8h, v2.8h, #15
+; CHECK-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2
@@ -1767,7 +1767,7 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) #
; CHECK-LABEL: select_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v2.2s, v2.2s, #31
-; CHECK-NEXT: sshr v2.2s, v2.2s, #31
+; CHECK-NEXT: cmlt v2.2s, v2.2s, #0
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
%sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2
@@ -1780,7 +1780,7 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: shl v2.4s, v2.4s, #31
-; CHECK-NEXT: sshr v2.4s, v2.4s, #31
+; CHECK-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2
@@ -2110,7 +2110,7 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-NEXT: shl v2.2d, v2.2d, #63
-; CHECK-NEXT: sshr v2.2d, v2.2d, #63
+; CHECK-NEXT: cmlt v2.2d, v2.2d, #0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
index 6fbadb8d7b12..af0c7e512ca1 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
@@ -632,7 +632,7 @@ define void @masked_gather_v2f16(<2 x half>* %a, <2 x half*>* %b) #0 {
; CHECK-NEXT: mov v0.h[0], w8
; CHECK-NEXT: mov v0.h[1], w9
; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: sshr v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
index aacc4da6bd96..5086282f8f24 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
@@ -42,7 +42,7 @@ define <2 x half> @masked_load_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 {
; CHECK-NEXT: mov v0.h[0], w8
; CHECK-NEXT: mov v0.h[1], w9
; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: sshr v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
index 9b0918995296..bff87a586656 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
@@ -581,7 +581,7 @@ define void @masked_scatter_v2f16(<2 x half>* %a, <2 x half*>* %b) #0 {
; CHECK-NEXT: mov v0.h[0], w8
; CHECK-NEXT: mov v0.h[1], w9
; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: sshr v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
index 218323194d0e..19bc39025dce 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
@@ -42,7 +42,7 @@ define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 {
; CHECK-NEXT: mov v0.h[0], w8
; CHECK-NEXT: mov v0.h[1], w9
; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: sshr v0.4h, v0.4h, #15
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: st1h { z1.h }, p0, [x1]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll
index dce9feb1b29d..f75d247e88c3 100644
--- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll
@@ -152,12 +152,12 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; CHECK-NEXT: ushll v3.4s, v3.4h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: shl v5.4s, v0.4s, #31
-; CHECK-NEXT: sshr v0.4s, v2.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v2.4s, #0
; CHECK-NEXT: shl v3.4s, v3.4s, #31
; CHECK-NEXT: shl v6.4s, v1.4s, #31
-; CHECK-NEXT: sshr v1.4s, v5.4s, #31
-; CHECK-NEXT: sshr v2.4s, v3.4s, #31
-; CHECK-NEXT: sshr v3.4s, v6.4s, #31
+; CHECK-NEXT: cmlt v1.4s, v5.4s, #0
+; CHECK-NEXT: cmlt v2.4s, v3.4s, #0
+; CHECK-NEXT: cmlt v3.4s, v6.4s, #0
; CHECK-NEXT: ret
%t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1)
%val = extractvalue {<16 x i8>, <16 x i1>} %t, 0
@@ -180,8 +180,8 @@ define <8 x i32> @uaddo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: shl v1.4s, v1.4s, #31
; CHECK-NEXT: shl v3.4s, v0.4s, #31
-; CHECK-NEXT: sshr v0.4s, v1.4s, #31
-; CHECK-NEXT: sshr v1.4s, v3.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v1.4s, #0
+; CHECK-NEXT: cmlt v1.4s, v3.4s, #0
; CHECK-NEXT: ret
%t = call {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16> %a0, <8 x i16> %a1)
%val = extractvalue {<8 x i16>, <8 x i1>} %t, 0
@@ -296,7 +296,7 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; CHECK-NEXT: stp x8, x9, [x10, #16]
; CHECK-NEXT: shl v0.2s, v0.2s, #31
; CHECK-NEXT: stp x11, x12, [x10]
-; CHECK-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-NEXT: ret
%t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll
index 7c7b023ed9e3..8dca9d0818e0 100644
--- a/llvm/test/CodeGen/AArch64/vec_umulo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll
@@ -181,10 +181,10 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou
; CHECK-NEXT: shl v2.4s, v2.4s, #31
; CHECK-NEXT: shl v6.4s, v5.4s, #31
; CHECK-NEXT: shl v3.4s, v3.4s, #31
-; CHECK-NEXT: sshr v4.4s, v4.4s, #31
-; CHECK-NEXT: sshr v5.4s, v2.4s, #31
-; CHECK-NEXT: sshr v2.4s, v6.4s, #31
-; CHECK-NEXT: sshr v3.4s, v3.4s, #31
+; CHECK-NEXT: cmlt v4.4s, v4.4s, #0
+; CHECK-NEXT: cmlt v5.4s, v2.4s, #0
+; CHECK-NEXT: cmlt v2.4s, v6.4s, #0
+; CHECK-NEXT: cmlt v3.4s, v3.4s, #0
; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b
; CHECK-NEXT: mov v0.16b, v4.16b
; CHECK-NEXT: mov v1.16b, v5.16b
@@ -212,8 +212,8 @@ define <8 x i32> @umulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: shl v3.4s, v3.4s, #31
; CHECK-NEXT: shl v4.4s, v2.4s, #31
-; CHECK-NEXT: sshr v2.4s, v3.4s, #31
-; CHECK-NEXT: sshr v3.4s, v4.4s, #31
+; CHECK-NEXT: cmlt v2.4s, v3.4s, #0
+; CHECK-NEXT: cmlt v3.4s, v4.4s, #0
; CHECK-NEXT: mul v4.8h, v0.8h, v1.8h
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: mov v1.16b, v3.16b
@@ -370,7 +370,7 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; CHECK-NEXT: mul x9, x2, x6
; CHECK-NEXT: shl v0.2s, v0.2s, #31
; CHECK-NEXT: stp x9, x8, [x10, #16]
-; CHECK-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-NEXT: ret
%t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll
index a11662fea7a5..763edf825e1f 100644
--- a/llvm/test/CodeGen/AArch64/vselect-constants.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll
@@ -16,7 +16,7 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: shl v0.4s, v0.4s, #31
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1]
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
@@ -47,7 +47,7 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: shl v0.4s, v0.4s, #31
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1]
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
@@ -78,7 +78,7 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: shl v0.4s, v0.4s, #31
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1]
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
@@ -105,7 +105,7 @@ define <4 x i32> @sel_minus1_or_0_vec(<4 x i1> %cond) {
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i32> %add
@@ -149,7 +149,7 @@ define <4 x i32> @sel_1_or_0_vec(<4 x i1> %cond) {
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -196,7 +196,7 @@ define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) {
define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: signbit_mask_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp slt <16 x i8> %a, zeroinitializer
@@ -209,7 +209,7 @@ define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <16 x i8> @signbit_mask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: signbit_mask_swap_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -220,7 +220,7 @@ define <16 x i8> @signbit_mask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: signbit_mask_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp slt <8 x i16> %a, zeroinitializer
@@ -231,7 +231,7 @@ define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: signbit_mask_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp slt <4 x i32> %a, zeroinitializer
@@ -242,7 +242,7 @@ define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: signbit_mask_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp slt <2 x i64> %a, zeroinitializer
@@ -253,7 +253,7 @@ define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) {
define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: signbit_setmask_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp slt <16 x i8> %a, zeroinitializer
@@ -264,7 +264,7 @@ define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: signbit_setmask_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp slt <8 x i16> %a, zeroinitializer
@@ -277,7 +277,7 @@ define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @signbit_setmask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: signbit_setmask_swap_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -288,7 +288,7 @@ define <8 x i16> @signbit_setmask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: signbit_setmask_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp slt <4 x i32> %a, zeroinitializer
@@ -299,7 +299,7 @@ define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: signbit_setmask_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%cond = icmp slt <2 x i64> %a, zeroinitializer