summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Reames <preames@rivosinc.com>2023-05-17 11:06:24 -0700
committerPhilip Reames <listmail@philipreames.com>2023-05-17 11:13:57 -0700
commit0457f506fddf47cfe842b398c7f522057cef8163 (patch)
tree76a81a28f1721626ba5e0f3bfdab0f21dec3a8f5
parent6c59f399a6ca66b7ed3298ab5bcc594aa2930043 (diff)
downloadllvm-0457f506fddf47cfe842b398c7f522057cef8163.tar.gz
[RISCV] Implement storeOfVectorConstantIsCheap hook to prevent store merging at VL=2
In general, VL=2 vectors are very questionable profitability wise. For constants specifically, our inability to materialize many vector constants cheaply biases us strongly towards unprofitability at VL=2. This hook is very close to the x86 implementation. The difference is that X86 whitelists stores of zeros, and we're better off letting that stay scalar at VL=2. Differential Revision: https://reviews.llvm.org/D150798
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h7
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/combine-store.ll53
2 files changed, 26 insertions, 34 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3936c51884cb..6bf3a811b266 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -531,6 +531,13 @@ public:
return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
}
+ bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
+ unsigned AddrSpace) const override {
+ // If we can replace 4 or more scalar stores, there will be a reduction
+ // in instructions even after we add a vector constant load.
+ return NumElem >= 4;
+ }
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
index 9640d7591a9b..c7187148f571 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
@@ -31,9 +31,8 @@ define void @combine_zero_stores_4xi8(ptr %p) {
define void @combine_zero_stores_8xi8(ptr %p) {
; RV32-LABEL: combine_zero_stores_8xi8:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: combine_zero_stores_8xi8:
@@ -72,9 +71,8 @@ define void @combine_zero_stores_2xi16(ptr %p) {
define void @combine_zero_stores_4xi16(ptr %p) {
; RV32-LABEL: combine_zero_stores_4xi16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: combine_zero_stores_4xi16:
@@ -104,9 +102,8 @@ define void @combine_zero_stores_8xi16(ptr %p) {
;
; RV64-LABEL: combine_zero_stores_8xi16:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: sd zero, 0(a0)
+; RV64-NEXT: sd zero, 8(a0)
; RV64-NEXT: ret
store i16 zeroinitializer, ptr %p, align 16
%gep1 = getelementptr i16, ptr %p, i64 1
@@ -129,9 +126,8 @@ define void @combine_zero_stores_8xi16(ptr %p) {
define void @combine_zero_stores_2xi32(ptr %p) {
; RV32-LABEL: combine_zero_stores_2xi32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: combine_zero_stores_2xi32:
@@ -154,9 +150,8 @@ define void @combine_zero_stores_4xi32(ptr %p) {
;
; RV64-LABEL: combine_zero_stores_4xi32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: sd zero, 0(a0)
+; RV64-NEXT: sd zero, 8(a0)
; RV64-NEXT: ret
store i32 zeroinitializer, ptr %p, align 16
%gep1 = getelementptr i32, ptr %p, i64 1
@@ -201,18 +196,11 @@ define void @combine_zero_stores_8xi32(ptr %p) {
}
define void @combine_zero_stores_2xi32_unaligned(ptr %p) {
-; RV32-LABEL: combine_zero_stores_2xi32_unaligned:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: combine_zero_stores_2xi32_unaligned:
-; RV64: # %bb.0:
-; RV64-NEXT: sw zero, 0(a0)
-; RV64-NEXT: sw zero, 4(a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: combine_zero_stores_2xi32_unaligned:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sw zero, 0(a0)
+; CHECK-NEXT: sw zero, 4(a0)
+; CHECK-NEXT: ret
store i32 zeroinitializer, ptr %p
%gep = getelementptr i8, ptr %p, i64 4
store i32 zeroinitializer, ptr %gep
@@ -230,9 +218,8 @@ define void @combine_zero_stores_2xi64(ptr %p) {
;
; RV64-LABEL: combine_zero_stores_2xi64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: sd zero, 0(a0)
+; RV64-NEXT: sd zero, 8(a0)
; RV64-NEXT: ret
store i64 zeroinitializer, ptr %p
%gep = getelementptr i8, ptr %p, i64 8
@@ -243,10 +230,8 @@ define void @combine_zero_stores_2xi64(ptr %p) {
define void @combine_fp_zero_stores_crash(ptr %ptr) {
; CHECK-LABEL: combine_fp_zero_stores_crash:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: sw zero, 4(a0)
+; CHECK-NEXT: sw zero, 8(a0)
; CHECK-NEXT: ret
%addr1 = getelementptr float, ptr %ptr, i64 1
%addr2 = getelementptr float, ptr %ptr, i64 2