summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKerry McLaughlin <kerry.mclaughlin@arm.com>2021-10-26 10:37:52 +0100
committerKerry McLaughlin <kerry.mclaughlin@arm.com>2021-10-27 14:15:41 +0100
commitf01fafdcd469eab1e76f4bbb549d0b8729b5b195 (patch)
tree6946943b2e2fa280b10409acddbf672ffc11807a
parenteae047afe0d35efc85e23b1688fc859c53843893 (diff)
downloadllvm-f01fafdcd469eab1e76f4bbb549d0b8729b5b195.tar.gz
[SVE][CodeGen] Fix incorrect legalisation of zero-extended masked loads
PromoteIntRes_MLOAD always sets the extension type to `EXTLOAD`, which results in a sign-extended load. If the type returned by getExtensionType() for the load being promoted is something other than `NON_EXTLOAD`, we should instead pass this to getMaskedLoad() as the extension type. Reviewed By: CarolineConcatto Differential Revision: https://reviews.llvm.org/D112320
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp7
-rw-r--r--llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll10
-rw-r--r--llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll17
-rw-r--r--llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll13
4 files changed, 46 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 3e43c554d77c..29812ef8f1a0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -784,11 +784,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getOffset(), N->getMask(), ExtPassThru,
N->getMemoryVT(), N->getMemOperand(),
- N->getAddressingMode(), ISD::EXTLOAD);
+ N->getAddressingMode(), ExtType,
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
index 085c15396778..70b4f5b1b053 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -117,6 +117,15 @@ define <vscale x 4 x i32> @masked_load_passthru(<vscale x 4 x i32> *%a, <vscale
ret <vscale x 4 x i32> %load
}
+; Masked load requires promotion
+define <vscale x 2 x i16> @masked_load_nxv2i16(<vscale x 2 x i16>* noalias %in, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_load_nxv2i16
+; CHECK: ld1sh { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %wide.load = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>* %in, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ ret <vscale x 2 x i16> %wide.load
+}
+
;
; Masked Stores
;
@@ -315,6 +324,7 @@ define void @masked.store.nxv2p0s_struct(<vscale x 2 x %struct*> %data, <vscale
declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
index 9a20035d3c79..4d3c4fa3616e 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
@@ -89,6 +89,23 @@ define <vscale x 16 x i32> @masked_sload_nxv16i8(<vscale x 16 x i8>* %a, <vscale
ret <vscale x 16 x i32> %ext
}
+; Masked load requires promotion
+define <vscale x 4 x double> @masked_sload_4i8_4f32(<vscale x 4 x i8>* noalias %in, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: masked_sload_4i8_4f32:
+; CHECK: punpkhi p2.h, p0.b
+; CHECK-NEXT: punpklo p0.h, p0.b
+; CHECK-NEXT: ld1sb { z1.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ptrue p1.d
+; CHECK-NEXT: scvtf z0.d, p1/m, z0.d
+; CHECK-NEXT: scvtf z1.d, p1/m, z1.d
+; CHECK-NEXT: ret
+ %wide.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8(<vscale x 4 x i8>* %in, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i8> undef)
+ %sext = sext <vscale x 4 x i8> %wide.load to <vscale x 4 x i64>
+ %res = sitofp <vscale x 4 x i64> %sext to <vscale x 4 x double>
+ ret <vscale x 4 x double> %res
+}
+
declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
index 79eff4d7c572..69b3b46d9a7e 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
@@ -95,6 +95,19 @@ define <vscale x 8 x i64> @masked_zload_nxv8i16(<vscale x 8 x i16>* %a, <vscale
ret <vscale x 8 x i64> %ext
}
+; Masked load requires promotion
+define <vscale x 2 x double> @masked_zload_2i16_2f64(<vscale x 2 x i16>* noalias %in, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_zload_2i16_2f64:
+; CHECK: ld1h { z0.d }, p0/z, [x0]
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: ret
+ %wide.load = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>* %in, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ %zext = zext <vscale x 2 x i16> %wide.load to <vscale x 2 x i32>
+ %res = uitofp <vscale x 2 x i32> %zext to <vscale x 2 x double>
+ ret <vscale x 2 x double> %res
+}
+
declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)