summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2023-03-06 12:42:44 +0000
committerAndrew Stubbs <ams@codesourcery.com>2023-03-17 13:03:01 +0000
commit47cfa2d657edb9eddc8836059f02d81cbebad2e5 (patch)
treea27093980ed49904cc33e92df306c0d0d2f0ed64
parent61453f32d28c60bb0f996a6d60b2fdc52e67c093 (diff)
downloadgcc-47cfa2d657edb9eddc8836059f02d81cbebad2e5.tar.gz
amdgcn: gather/scatter with DImode offsets
The GPU architecture requires SImode offsets on gather/scatter instructions, but they can also take a vector of absolute addresses, so this allows gather/scatter in more situations. gcc/ChangeLog: * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New. (scatter_store<mode><vndi>): New. (mask_gather_load<mode><vndi>): New. (mask_scatter_store<mode><vndi>): New.
-rw-r--r--gcc/ChangeLog.omp7
-rw-r--r--gcc/config/gcn/gcn-valu.md123
2 files changed, 130 insertions, 0 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 6d88f486fa7..3a3e54bdeb2 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,5 +1,12 @@
2023-03-17 Andrew Stubbs <ams@codesourcery.com>
+ * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New.
+ (scatter_store<mode><vndi>): New.
+ (mask_gather_load<mode><vndi>): New.
+ (mask_scatter_store<mode><vndi>): New.
+
+2023-03-17 Andrew Stubbs <ams@codesourcery.com>
+
* config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New.
* config/gcn/gcn-valu.md (V_1REG_ALT): New.
(V_2REG_ALT): New.
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 1bfd358cf10..70e3fa63c53 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -961,6 +961,34 @@
;;
;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
+(define_expand "gather_load<mode><vndi>"
+ [(match_operand:V_ALL 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:<VnDI> 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")]
+ ""
+ {
+ rtx vec_base = gen_reg_rtx (<VnDI>mode);
+ rtx addr = gen_reg_rtx (<VnDI>mode);
+ rtx multiplier = gen_reg_rtx (<VnDI>mode);
+ rtx offsets = gen_reg_rtx (<VnDI>mode);
+
+ if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1)
+ {
+ emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[4]));
+ emit_insn (gen_mul<vndi>3 (offsets, operands[2], multiplier));
+ }
+ else
+ offsets = operands[2];
+ emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[1]));
+ emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+ emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
(define_expand "gather_load<mode><vnsi>"
[(match_operand:V_ALL 0 "register_operand")
(match_operand:DI 1 "register_operand")
@@ -1091,6 +1119,34 @@
(set_attr "length" "12")
(set_attr "xnack" "off,on")])
+(define_expand "scatter_store<mode><vndi>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:<VnDI> 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:V_ALL 4 "register_operand")]
+ ""
+ {
+ rtx vec_base = gen_reg_rtx (<VnDI>mode);
+ rtx addr = gen_reg_rtx (<VnDI>mode);
+ rtx multiplier = gen_reg_rtx (<VnDI>mode);
+ rtx offsets = gen_reg_rtx (<VnDI>mode);
+
+ if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1)
+ {
+ emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[3]));
+ emit_insn (gen_mul<vndi>3 (offsets, operands[1], multiplier));
+ }
+ else
+ offsets = operands[1];
+ emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[0]));
+ emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+ emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
(define_expand "scatter_store<mode><vnsi>"
[(match_operand:DI 0 "register_operand")
(match_operand:<VnSI> 1 "register_operand")
@@ -3528,6 +3584,41 @@
DONE;
})
+(define_expand "mask_gather_load<mode><vndi>"
+ [(match_operand:V_ALL 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:<VnDI> 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")
+ (match_operand:DI 5 "")]
+ ""
+ {
+ rtx vec_base = gen_reg_rtx (<VnDI>mode);
+ rtx addr = gen_reg_rtx (<VnDI>mode);
+ rtx multiplier = gen_reg_rtx (<VnDI>mode);
+ rtx offsets = gen_reg_rtx (<VnDI>mode);
+ rtx exec = force_reg (DImode, operands[5]);
+
+ if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1)
+ {
+ emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[4]));
+ emit_insn (gen_mul<vndi>3 (offsets, operands[2], multiplier));
+ }
+ else
+ offsets = operands[2];
+ emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[1]));
+ emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+ /* Masked lanes are required to hold zero. */
+ emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
+
+ emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+ const0_rtx, const0_rtx,
+ const0_rtx, operands[0],
+ exec));
+ DONE;
+ })
+
(define_expand "mask_gather_load<mode><vnsi>"
[(match_operand:V_ALL 0 "register_operand")
(match_operand:DI 1 "register_operand")
@@ -3559,6 +3650,38 @@
DONE;
})
+(define_expand "mask_scatter_store<mode><vndi>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:<VnDI> 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:DI 3 "gcn_alu_operand")
+ (match_operand:V_ALL 4 "register_operand")
+ (match_operand:DI 5 "")]
+ ""
+ {
+ rtx vec_base = gen_reg_rtx (<VnDI>mode);
+ rtx addr = gen_reg_rtx (<VnDI>mode);
+ rtx multiplier = gen_reg_rtx (<VnDI>mode);
+ rtx offsets = gen_reg_rtx (<VnDI>mode);
+ rtx exec = force_reg (DImode, operands[5]);
+
+ if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1)
+ {
+ emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[3]));
+ emit_insn (gen_mul<vndi>3 (offsets, operands[1], multiplier));
+ }
+ else
+ offsets = operands[1];
+ emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[0]));
+ emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+ emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+ operands[4], const0_rtx,
+ const0_rtx,
+ exec));
+ DONE;
+ })
+
(define_expand "mask_scatter_store<mode><vnsi>"
[(match_operand:DI 0 "register_operand")
(match_operand:<VnSI> 1 "register_operand")