summaryrefslogtreecommitdiff
path: root/gcc/config/spu
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/spu')
-rw-r--r--gcc/config/spu/spu-builtins.def1
-rw-r--r--gcc/config/spu/spu.c39
-rw-r--r--gcc/config/spu/spu.md55
3 files changed, 94 insertions, 1 deletions
diff --git a/gcc/config/spu/spu-builtins.def b/gcc/config/spu/spu-builtins.def
index 5fdd0cb0b1a..c8b7851ab67 100644
--- a/gcc/config/spu/spu-builtins.def
+++ b/gcc/config/spu/spu-builtins.def
@@ -246,6 +246,7 @@ DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_cmeq_v4sf, "spu_cmpabseq", B_INSN,
DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
DEF_BUILTIN (SPU_IDISABLE, CODE_FOR_spu_idisable, "spu_idisable", B_INSN, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_IENABLE, CODE_FOR_spu_ienable, "spu_ienable", B_INSN, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_MASK_FOR_LOAD, CODE_FOR_spu_lvsr, "spu_lvsr", B_INSN, _A2(SPU_BTI_V16QI, SPU_BTI_PTR))
/* definitions to support overloaded generic builtin functions: */
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 7317da70afb..83bfdc8f1f8 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -130,6 +130,7 @@ static void spu_init_libfuncs (void);
static bool spu_return_in_memory (tree type, tree fntype);
static void fix_range (const char *);
static void spu_encode_section_info (tree, rtx, int);
+static tree spu_builtin_mask_for_load (void);
extern const char *reg_names[];
rtx spu_compare_op0, spu_compare_op1;
@@ -248,6 +249,9 @@ const struct attribute_spec spu_attribute_table[];
#undef TARGET_ENCODE_SECTION_INFO
#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
+#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Sometimes certain combinations of command options do not make sense
@@ -4288,6 +4292,8 @@ spu_init_builtins (void)
d->fndecl =
add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
NULL, NULL_TREE);
+ if (d->fcode == SPU_MASK_FOR_LOAD)
+ TREE_READONLY (d->fndecl) = 1;
}
}
@@ -4843,6 +4849,31 @@ spu_expand_builtin_1 (struct spu_builtin_description *d,
i++;
}
+ if (d->fcode == SPU_MASK_FOR_LOAD)
+ {
+ enum machine_mode mode = insn_data[icode].operand[1].mode;
+ tree arg;
+ rtx addr, op, pat;
+
+ /* get addr */
+ arg = TREE_VALUE (arglist);
+ gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+
+ /* negate addr */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (VOIDmode, op,
+ gen_rtx_NEG (GET_MODE (addr), addr)));
+ op = gen_rtx_MEM (mode, op);
+
+ pat = GEN_FCN (icode) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+ }
+
/* Ignore align_hint, but still expand it's args in case they have
side effects. */
if (icode == CODE_FOR_spu_align_hint)
@@ -4962,3 +4993,11 @@ spu_expand_builtin (tree exp,
abort ();
}
+/* Implement targetm.vectorize.builtin_mask_for_load. */
+static tree
+spu_builtin_mask_for_load (void)
+{
+ struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
+ gcc_assert (d);
+ return d->fndecl;
+}
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index 9cb0dc7f400..60f00fb6cda 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -142,7 +142,10 @@
(UNSPEC_MTSPR 45)
(UNSPEC_RDCH 46)
(UNSPEC_RCHCNT 47)
- (UNSPEC_WRCH 48)])
+ (UNSPEC_WRCH 48)
+ (UNSPEC_SPU_REALIGN_LOAD 49)
+ (UNSPEC_SPU_MASK_FOR_LOAD 50)
+])
(include "predicates.md")
(include "constraints.md")
@@ -3374,3 +3377,53 @@ selb\t%0,%4,%0,%3"
emit_insn (gen_selb (operands[0], operands[1], operands[2], mask));
DONE;
}")
+
+(define_expand "vec_realign_load_<mode>"
+ [(set (match_operand:ALL 0 "register_operand" "=r")
+ (unspec:ALL [(match_operand:ALL 1 "register_operand" "r")
+ (match_operand:ALL 2 "register_operand" "r")
+ (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))]
+ ""
+ "
+{
+ emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3]));
+ DONE;
+}")
+
+(define_expand "spu_lvsr"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))]
+ ""
+ "
+{
+ rtx addr;
+ rtx offset = gen_reg_rtx (V8HImode);
+ rtx addr_bits = gen_reg_rtx (SImode);
+ rtx addr_bits_vec = gen_reg_rtx (V8HImode);
+ rtx splatqi = gen_reg_rtx (TImode);
+ rtx result = gen_reg_rtx (V8HImode);
+ unsigned char arr[16] = {
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
+ unsigned char arr2[16] = {
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03};
+
+ emit_move_insn (offset, array_to_constant (V8HImode, arr));
+ emit_move_insn (splatqi, array_to_constant (TImode, arr2));
+
+ gcc_assert (GET_CODE (operands[1]) == MEM);
+ addr = force_reg (Pmode, XEXP (operands[1], 0));
+ emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF)));
+ emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi));
+
+ /* offset - (addr & 0xF)
+ It is safe to use a single sfh, because each byte of offset is > 15 and
+ each byte of addr is <= 15. */
+ emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec));
+
+ result = simplify_gen_subreg (V16QImode, result, V8HImode, 0);
+ emit_move_insn (operands[0], result);
+
+ DONE;
+}")