diff options
author | Alexandros Lamprineas <alexandros.lamprineas@arm.com> | 2021-12-14 15:29:44 +0000 |
---|---|---|
committer | Alexandros Lamprineas <alexandros.lamprineas@arm.com> | 2021-12-14 16:03:02 +0000 |
commit | 61bb8b5d404023d5cd329a6d56c2467a81ab138a (patch) | |
tree | 781a527734bc4d8c28411987763cd0f9260a639e | |
parent | ebb6bb725eadd57a0f7995fff17774020f6f0389 (diff) | |
download | llvm-61bb8b5d404023d5cd329a6d56c2467a81ab138a.tar.gz |
[AArch64] Convert sra(X, elt_size(X)-1) to cmlt(X, 0)
CMLT has twice the execution throughput of SSHR on Arm out-of-order cores.
Differential Revision: https://reviews.llvm.org/D115457
18 files changed, 93 insertions, 68 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index bcaf6cd3195e..3f174bb6b17e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4174,6 +4174,21 @@ defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; +def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), + (CMLTv8i8rz V64:$Rn)>; +def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), + (CMLTv4i16rz V64:$Rn)>; +def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), + (CMLTv2i32rz V64:$Rn)>; +def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), + (CMLTv16i8rz V128:$Rn)>; +def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), + (CMLTv8i16rz V128:$Rn)>; +def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), + (CMLTv4i32rz V128:$Rn)>; +def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), + (CMLTv2i64rz V128:$Rn)>; + defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; @@ -4825,6 +4840,9 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; +def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), + (CMLTv1i64rz V64:$Rn)>; + def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), (FCVTASv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll index 6e77612815f4..2149fe228296 100644 --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -143,7 +143,7 @@ define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) { ; CODE: bb.0 ; CODE-NEXT: ushll v{{.+}}.2d, v{{.+}}.2s, #0 ; CODE-NEXT: shl v{{.+}}.2d, v{{.+}}.2d, #63 -; CODE-NEXT: sshr v{{.+}}.2d, v{{.+}}.2d, #63 +; CODE-NEXT: cmlt v{{.+}}.2d, v{{.+}}.2d, #0 ; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b ; CODE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll index 947d9c8571b9..38d574213b9d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -348,8 +348,8 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) { ; CHECK-NEXT: mov.b v1[15], w8 ; CHECK-NEXT: shl.16b v0, v0, #7 ; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: sshr.16b v0, v0, #7 -; CHECK-NEXT: sshr.16b v1, v1, #7 +; CHECK-NEXT: cmlt.16b v0, v0, #0 +; CHECK-NEXT: cmlt.16b v1, v1, #0 ; CHECK-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -615,10 +615,10 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) { ; CHECK-NEXT: shl.16b v2, v2, #7 ; CHECK-NEXT: shl.16b v4, v1, #7 ; CHECK-NEXT: shl.16b v5, v0, #7 -; CHECK-NEXT: sshr.16b v0, v3, #7 -; CHECK-NEXT: sshr.16b v1, v2, #7 -; CHECK-NEXT: sshr.16b v2, v4, #7 -; CHECK-NEXT: sshr.16b v3, v5, #7 +; CHECK-NEXT: cmlt.16b v0, v3, #0 +; CHECK-NEXT: cmlt.16b v1, v2, #0 +; CHECK-NEXT: cmlt.16b v2, v4, #0 +; CHECK-NEXT: cmlt.16b v3, v5, #0 ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res diff --git a/llvm/test/CodeGen/AArch64/arm64-vshr.ll b/llvm/test/CodeGen/AArch64/arm64-vshr.ll index 6d599ccd6fc5..bd5aa2505be2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshr.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshr.ll @@ -48,7 +48,14 @@ entry: define <1 x i64> @sshr_v1i64(<1 x i64> %A) nounwind { ; CHECK-LABEL: sshr_v1i64: -; CHECK: sshr d0, d0, #63 +; CHECK: sshr d0, d0, #42 + %tmp3 = ashr <1 x i64> %A, < i64 42 > + ret <1 x i64> %tmp3 +} + +define <1 x i64> @cmlt_v1i64(<1 x i64> %A) nounwind { +; CHECK-LABEL: cmlt_v1i64: +; CHECK: cmlt d0, d0, #0 %tmp3 = ashr <1 x i64> %A, < i64 63 > ret <1 x i64> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll index dce218349c57..44d0eed3d723 100644 --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -115,7 +115,7 @@ define <7 x i8> @sign_7xi8(<7 x i8> %a) { ; CHECK-LABEL: sign_7xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <7 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> @@ -127,7 +127,7 @@ define <8 x i8> @sign_8xi8(<8 x i8> %a) { ; CHECK-LABEL: sign_8xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> @@ -139,7 +139,7 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) { ; CHECK-LABEL: sign_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.16b, #1 -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %c = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> @@ -150,7 +150,7 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) { define <3 x i32> @sign_3xi32(<3 x i32> %a) { ; CHECK-LABEL: sign_3xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: orr v0.4s, #1 ; CHECK-NEXT: ret %c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1> @@ -161,7 +161,7 @@ define <3 x i32> @sign_3xi32(<3 x i32> %a) { define <4 x i32> @sign_4xi32(<4 x i32> %a) { ; CHECK-LABEL: sign_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: orr v0.4s, #1 ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -177,7 +177,7 @@ define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-NEXT: sshr v2.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v2.4s, #1 ; CHECK-NEXT: xtn v0.4h, v0.4s @@ -214,7 +214,7 @@ define <4 x i32> @not_sign_4xi32_2(<4 x i32> %a) { ; CHECK-LABEL: not_sign_4xi32_2: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll index 8da816e141a1..e4f13f5c98a1 100644 --- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -19,7 +19,7 @@ define void @signbits_vXi1(<4 x i16> %a1) { ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: umov w3, v0.h[3] ; CHECK-NEXT: b foo diff --git a/llvm/test/CodeGen/AArch64/div_minsize.ll b/llvm/test/CodeGen/AArch64/div_minsize.ll index f62ef4ee4a2d..8de967e52005 100644 --- a/llvm/test/CodeGen/AArch64/div_minsize.ll +++ b/llvm/test/CodeGen/AArch64/div_minsize.ll @@ -35,7 +35,7 @@ entry: define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize { entry: ; CHECK: sdiv_vec8x16_minsize -; CHECK: sshr v1.8h, v0.8h, #15 +; CHECK: cmlt v1.8h, v0.8h, #0 ; CHECK: usra v0.8h, v1.8h, #11 ; CHECK: sshr v0.8h, v0.8h, #5 ; CHECK: ret diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll index 6fa2d8320562..e473bbe72cef 100644 --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -167,7 +167,7 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { ; CHECK: // %bb.0: ; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: movi v1.16b, #128 -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl = select <16 x i1> %t, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>, <16 x i8> zeroinitializer @@ -180,7 +180,7 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: movi v1.8h, #128 ; CHECK-NEXT: shl v0.8h, v0.8h, #15 -; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer @@ -193,7 +193,7 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) { ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: movi v1.4s, #64 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer @@ -207,7 +207,7 @@ define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) { ; CHECK-NEXT: mov w8, #65536 ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: shl v0.2d, v0.2d, #63 -; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll index 85f0ab366899..c37e5450160f 100644 --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll @@ -184,7 +184,7 @@ define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind { define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v2.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 ; CHECK-NEXT: mov v3.16b, v0.16b ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: usra v3.4s, v2.4s, #28 @@ -203,7 +203,7 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind { define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v2.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 ; CHECK-NEXT: mov v3.16b, v0.16b ; CHECK-NEXT: movi v1.4s, #128, lsl #24 ; CHECK-NEXT: usra v3.4s, v2.4s, #1 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll index 001ff9ba2272..9d8964980854 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll @@ -25,7 +25,7 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask ; CHECK-LABEL: select_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.4h, v2.4h, #15 -; CHECK-NEXT: sshr v2.4h, v2.4h, #15 +; CHECK-NEXT: cmlt v2.4h, v2.4h, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2 @@ -38,7 +38,7 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-NEXT: shl v2.8h, v2.8h, #15 -; CHECK-NEXT: sshr v2.8h, v2.8h, #15 +; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2 @@ -122,7 +122,7 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m ; CHECK-LABEL: select_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.2s, v2.2s, #31 -; CHECK-NEXT: sshr v2.2s, v2.2s, #31 +; CHECK-NEXT: cmlt v2.2s, v2.2s, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2 @@ -135,7 +135,7 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2 @@ -233,7 +233,7 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-NEXT: shl v2.2d, v2.2d, #63 -; CHECK-NEXT: sshr v2.2d, v2.2d, #63 +; CHECK-NEXT: cmlt v2.2d, v2.2d, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll index 86cef17f1dd9..1fea072f0e37 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll @@ -25,7 +25,7 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 { ; CHECK-LABEL: select_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.8b, v2.8b, #7 -; CHECK-NEXT: sshr v2.8b, v2.8b, #7 +; CHECK-NEXT: cmlt v2.8b, v2.8b, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2 @@ -37,7 +37,7 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) ; CHECK-LABEL: select_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.16b, v2.16b, #7 -; CHECK-NEXT: sshr v2.16b, v2.16b, #7 +; CHECK-NEXT: cmlt v2.16b, v2.16b, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2 @@ -1137,7 +1137,7 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) # ; CHECK-LABEL: select_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.4h, v2.4h, #15 -; CHECK-NEXT: sshr v2.4h, v2.4h, #15 +; CHECK-NEXT: cmlt v2.4h, v2.4h, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2 @@ -1150,7 +1150,7 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) # ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-NEXT: shl v2.8h, v2.8h, #15 -; CHECK-NEXT: sshr v2.8h, v2.8h, #15 +; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2 @@ -1767,7 +1767,7 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) # ; CHECK-LABEL: select_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: shl v2.2s, v2.2s, #31 -; CHECK-NEXT: sshr v2.2s, v2.2s, #31 +; CHECK-NEXT: cmlt v2.2s, v2.2s, #0 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b ; CHECK-NEXT: ret %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2 @@ -1780,7 +1780,7 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) # ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2 @@ -2110,7 +2110,7 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) # ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-NEXT: shl v2.2d, v2.2d, #63 -; CHECK-NEXT: sshr v2.2d, v2.2d, #63 +; CHECK-NEXT: cmlt v2.2d, v2.2d, #0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll index 6fbadb8d7b12..af0c7e512ca1 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -632,7 +632,7 @@ define void @masked_gather_v2f16(<2 x half>* %a, <2 x half*>* %b) #0 { ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll index aacc4da6bd96..5086282f8f24 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -42,7 +42,7 @@ define <2 x half> @masked_load_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 { ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll index 9b0918995296..bff87a586656 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -581,7 +581,7 @@ define void @masked_scatter_v2f16(<2 x half>* %a, <2 x half*>* %b) #0 { ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll index 218323194d0e..19bc39025dce 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -42,7 +42,7 @@ define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) #0 { ; CHECK-NEXT: mov v0.h[0], w8 ; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: st1h { z1.h }, p0, [x1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll index dce9feb1b29d..f75d247e88c3 100644 --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -152,12 +152,12 @@ define <16 x i32> @uaddo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou ; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: shl v5.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v2.4s, #0 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 ; CHECK-NEXT: shl v6.4s, v1.4s, #31 -; CHECK-NEXT: sshr v1.4s, v5.4s, #31 -; CHECK-NEXT: sshr v2.4s, v3.4s, #31 -; CHECK-NEXT: sshr v3.4s, v6.4s, #31 +; CHECK-NEXT: cmlt v1.4s, v5.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v6.4s, #0 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -180,8 +180,8 @@ define <8 x i32> @uaddo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-NEXT: shl v3.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v1.4s, #31 -; CHECK-NEXT: sshr v1.4s, v3.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v1.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v3.4s, #0 ; CHECK-NEXT: ret %t = call {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16> %a0, <8 x i16> %a1) %val = extractvalue {<8 x i16>, <8 x i1>} %t, 0 @@ -296,7 +296,7 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: stp x11, x12, [x10] -; CHECK-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll index 7c7b023ed9e3..8dca9d0818e0 100644 --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -181,10 +181,10 @@ define <16 x i32> @umulo_v16i8(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %p2) nou ; CHECK-NEXT: shl v2.4s, v2.4s, #31 ; CHECK-NEXT: shl v6.4s, v5.4s, #31 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: sshr v4.4s, v4.4s, #31 -; CHECK-NEXT: sshr v5.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v6.4s, #31 -; CHECK-NEXT: sshr v3.4s, v3.4s, #31 +; CHECK-NEXT: cmlt v4.4s, v4.4s, #0 +; CHECK-NEXT: cmlt v5.4s, v2.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v6.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 ; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b ; CHECK-NEXT: mov v0.16b, v4.16b ; CHECK-NEXT: mov v1.16b, v5.16b @@ -212,8 +212,8 @@ define <8 x i32> @umulo_v8i16(<8 x i16> %a0, <8 x i16> %a1, <8 x i16>* %p2) noun ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 ; CHECK-NEXT: shl v4.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v3.4s, #31 -; CHECK-NEXT: sshr v3.4s, v4.4s, #31 +; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v4.4s, #0 ; CHECK-NEXT: mul v4.8h, v0.8h, v1.8h ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: mov v1.16b, v3.16b @@ -370,7 +370,7 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) ; CHECK-NEXT: mul x9, x2, x6 ; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: stp x9, x8, [x10, #16] -; CHECK-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll index a11662fea7a5..763edf825e1f 100644 --- a/llvm/test/CodeGen/AArch64/vselect-constants.ll +++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll @@ -16,7 +16,7 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) { ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> @@ -47,7 +47,7 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1> @@ -78,7 +78,7 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) { ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1] -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1> @@ -105,7 +105,7 @@ define <4 x i32> @sel_minus1_or_0_vec(<4 x i1> %cond) { ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> ret <4 x i32> %add @@ -149,7 +149,7 @@ define <4 x i32> @sel_1_or_0_vec(<4 x i1> %cond) { ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0> @@ -196,7 +196,7 @@ define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) { define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: signbit_mask_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <16 x i8> %a, zeroinitializer @@ -209,7 +209,7 @@ define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { define <16 x i8> @signbit_mask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: signbit_mask_swap_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> @@ -220,7 +220,7 @@ define <16 x i8> @signbit_mask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: signbit_mask_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <8 x i16> %a, zeroinitializer @@ -231,7 +231,7 @@ define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: signbit_mask_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <4 x i32> %a, zeroinitializer @@ -242,7 +242,7 @@ define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: signbit_mask_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <2 x i64> %a, zeroinitializer @@ -253,7 +253,7 @@ define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: signbit_setmask_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <16 x i8> %a, zeroinitializer @@ -264,7 +264,7 @@ define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: signbit_setmask_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <8 x i16> %a, zeroinitializer @@ -277,7 +277,7 @@ define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @signbit_setmask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: signbit_setmask_swap_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> @@ -288,7 +288,7 @@ define <8 x i16> @signbit_setmask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: signbit_setmask_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <4 x i32> %a, zeroinitializer @@ -299,7 +299,7 @@ define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: signbit_setmask_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp slt <2 x i64> %a, zeroinitializer |