[SVE][CodeGen] Fix incorrect legalisation of zero-extended masked loads

PromoteIntRes_MLOAD always sets the extension type to `EXTLOAD`, which results in a sign-extended load. If the type returned by getExtensionType() for the load being promoted is something other than `NON_EXTLOAD`, we should instead pass this to getMaskedLoad() as the extension type. Reviewed By: CarolineConcatto Differential Revision: https://reviews.llvm.org/D112320
author: Kerry McLaughlin <kerry.mclaughlin@arm.com> 2021-10-26 10:37:52 +0100
committer: Kerry McLaughlin <kerry.mclaughlin@arm.com> 2021-10-27 14:15:41 +0100
commit: f01fafdcd469eab1e76f4bbb549d0b8729b5b195 (patch)
tree: 6946943b2e2fa280b10409acddbf672ffc11807a
parent: eae047afe0d35efc85e23b1688fc859c53843893 (diff)
download: llvm-f01fafdcd469eab1e76f4bbb549d0b8729b5b195.tar.gz
4 files changed, 46 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 3e43c554d77c..29812ef8f1a0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -784,11 +784,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
 
+  ISD::LoadExtType ExtType = N->getExtensionType();
+  if (ExtType == ISD::NON_EXTLOAD)
+    ExtType = ISD::EXTLOAD;
+
   SDLoc dl(N);
   SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
                                   N->getOffset(), N->getMask(), ExtPassThru,
                                   N->getMemoryVT(), N->getMemOperand(),
-                                  N->getAddressingMode(), ISD::EXTLOAD);
+                                  N->getAddressingMode(), ExtType,
+                                  N->isExpandingLoad());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
index 085c15396778..70b4f5b1b053 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -117,6 +117,15 @@ define <vscale x 4 x i32> @masked_load_passthru(<vscale x 4 x i32> *%a, <vscale
   ret <vscale x 4 x i32> %load
 }
 
+; Masked load requires promotion
+define <vscale x 2 x i16> @masked_load_nxv2i16(<vscale x 2 x i16>* noalias %in, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_load_nxv2i16
+; CHECK:       ld1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT:  ret
+  %wide.load = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>* %in, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+  ret <vscale x 2 x i16> %wide.load
+}
+
 ;
 ; Masked Stores
 ;
@@ -315,6 +324,7 @@ define void @masked.store.nxv2p0s_struct(<vscale x 2 x %struct*> %data, <vscale
 
 declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
 declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
 declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
 declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
 
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
index 9a20035d3c79..4d3c4fa3616e 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
@@ -89,6 +89,23 @@ define <vscale x 16 x i32> @masked_sload_nxv16i8(<vscale x 16 x i8>* %a, <vscale
   ret <vscale x 16 x i32> %ext
 }
 
+; Masked load requires promotion
+define <vscale x 4 x double> @masked_sload_4i8_4f32(<vscale x 4 x i8>* noalias %in, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: masked_sload_4i8_4f32:
+; CHECK:       punpkhi p2.h, p0.b
+; CHECK-NEXT:  punpklo p0.h, p0.b
+; CHECK-NEXT:  ld1sb { z1.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT:  ld1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT:  ptrue p1.d
+; CHECK-NEXT:  scvtf z0.d, p1/m, z0.d
+; CHECK-NEXT:  scvtf z1.d, p1/m, z1.d
+; CHECK-NEXT:  ret
+  %wide.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8>* %in, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i8> undef)
+  %sext = sext <vscale x 4 x i8> %wide.load to <vscale x 4 x i64>
+  %res = sitofp <vscale x 4 x i64> %sext to <vscale x 4 x double>
+  ret <vscale x 4 x double> %res
+}
+
 declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
 declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
 declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
index 79eff4d7c572..69b3b46d9a7e 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
@@ -95,6 +95,19 @@ define <vscale x 8 x i64> @masked_zload_nxv8i16(<vscale x 8 x i16>* %a, <vscale
   ret <vscale x 8 x i64> %ext
 }
 
+; Masked load requires promotion
+define <vscale x 2 x double> @masked_zload_2i16_2f64(<vscale x 2 x i16>* noalias %in, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_zload_2i16_2f64:
+; CHECK:       ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT:  ptrue p0.d
+; CHECK-NEXT:  ucvtf z0.d, p0/m, z0.s
+; CHECK-NEXT:  ret
+  %wide.load = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>* %in, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+  %zext = zext <vscale x 2 x i16> %wide.load to <vscale x 2 x i32>
+  %res = uitofp <vscale x 2 x i32> %zext to <vscale x 2 x double>
+  ret <vscale x 2 x double> %res
+}
+
 declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
 declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
 declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
author	Kerry McLaughlin <kerry.mclaughlin@arm.com>	2021-10-26 10:37:52 +0100
committer	Kerry McLaughlin <kerry.mclaughlin@arm.com>	2021-10-27 14:15:41 +0100
commit	f01fafdcd469eab1e76f4bbb549d0b8729b5b195 (patch)
tree	6946943b2e2fa280b10409acddbf672ffc11807a
parent	eae047afe0d35efc85e23b1688fc859c53843893 (diff)
download	llvm-f01fafdcd469eab1e76f4bbb549d0b8729b5b195.tar.gz