diff options
author | Kerry McLaughlin <kerry.mclaughlin@arm.com> | 2021-10-26 10:37:52 +0100 |
---|---|---|
committer | Kerry McLaughlin <kerry.mclaughlin@arm.com> | 2021-10-27 14:15:41 +0100 |
commit | f01fafdcd469eab1e76f4bbb549d0b8729b5b195 (patch) | |
tree | 6946943b2e2fa280b10409acddbf672ffc11807a | |
parent | eae047afe0d35efc85e23b1688fc859c53843893 (diff) | |
download | llvm-f01fafdcd469eab1e76f4bbb549d0b8729b5b195.tar.gz |
[SVE][CodeGen] Fix incorrect legalisation of zero-extended masked loads
PromoteIntRes_MLOAD always sets the extension type to `EXTLOAD`, which
results in a sign-extended load. If the type returned by getExtensionType()
for the load being promoted is something other than `NON_EXTLOAD`, we
should instead pass this to getMaskedLoad() as the extension type.
Reviewed By: CarolineConcatto
Differential Revision: https://reviews.llvm.org/D112320
4 files changed, 46 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 3e43c554d77c..29812ef8f1a0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -784,11 +784,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); + ISD::LoadExtType ExtType = N->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + ExtType = ISD::EXTLOAD; + SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), N->getMask(), ExtPassThru, N->getMemoryVT(), N->getMemOperand(), - N->getAddressingMode(), ISD::EXTLOAD); + N->getAddressingMode(), ExtType, + N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll index 085c15396778..70b4f5b1b053 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll @@ -117,6 +117,15 @@ define <vscale x 4 x i32> @masked_load_passthru(<vscale x 4 x i32> *%a, <vscale ret <vscale x 4 x i32> %load } +; Masked load requires promotion +define <vscale x 2 x i16> @masked_load_nxv2i16(<vscale x 2 x i16>* noalias %in, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_load_nxv2i16 +; CHECK: ld1sh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %wide.load = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>* %in, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef) + ret <vscale x 2 x i16> %wide.load +} + ; ; Masked Stores ; @@ -315,6 +324,7 @@ define void @masked.store.nxv2p0s_struct(<vscale x 2 x %struct*> %data, <vscale declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) +declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>) declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>) declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>) diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll index 9a20035d3c79..4d3c4fa3616e 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll @@ -89,6 +89,23 @@ define <vscale x 16 x i32> @masked_sload_nxv16i8(<vscale x 16 x i8>* %a, <vscale ret <vscale x 16 x i32> %ext } +; Masked load requires promotion +define <vscale x 4 x double> @masked_sload_4i8_4f32(<vscale x 4 x i8>* noalias %in, <vscale x 4 x i1> %mask) { +; CHECK-LABEL: masked_sload_4i8_4f32: +; CHECK: punpkhi p2.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ld1sb { z1.d }, p2/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: scvtf z0.d, p1/m, z0.d +; CHECK-NEXT: scvtf z1.d, p1/m, z1.d +; CHECK-NEXT: ret + %wide.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8>* %in, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i8> undef) + %sext = sext <vscale x 4 x i8> %wide.load to <vscale x 4 x i64> + %res = sitofp <vscale x 4 x i64> %sext to <vscale x 4 x double> + ret <vscale x 4 x double> %res +} + declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>) declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>) declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>) diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll index 79eff4d7c572..69b3b46d9a7e 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll @@ -95,6 +95,19 @@ define <vscale x 8 x i64> @masked_zload_nxv8i16(<vscale x 8 x i16>* %a, <vscale ret <vscale x 8 x i64> %ext } +; Masked load requires promotion +define <vscale x 2 x double> @masked_zload_2i16_2f64(<vscale x 2 x i16>* noalias %in, <vscale x 2 x i1> %mask) { +; CHECK-LABEL: masked_zload_2i16_2f64: +; CHECK: ld1h { z0.d }, p0/z, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %wide.load = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>* %in, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef) + %zext = zext <vscale x 2 x i16> %wide.load to <vscale x 2 x i32> + %res = uitofp <vscale x 2 x i32> %zext to <vscale x 2 x double> + ret <vscale x 2 x double> %res +} + declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>) declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>) declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>) |