diff options
Diffstat (limited to 'gcc/config/spu')
-rw-r--r-- | gcc/config/spu/spu-builtins.def | 1 | ||||
-rw-r--r-- | gcc/config/spu/spu.c | 39 | ||||
-rw-r--r-- | gcc/config/spu/spu.md | 55 |
3 files changed, 94 insertions, 1 deletions
diff --git a/gcc/config/spu/spu-builtins.def b/gcc/config/spu/spu-builtins.def index 5fdd0cb0b1a..c8b7851ab67 100644 --- a/gcc/config/spu/spu-builtins.def +++ b/gcc/config/spu/spu-builtins.def @@ -246,6 +246,7 @@ DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_cmeq_v4sf, "spu_cmpabseq", B_INSN, DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF)) DEF_BUILTIN (SPU_IDISABLE, CODE_FOR_spu_idisable, "spu_idisable", B_INSN, _A1(SPU_BTI_VOID)) DEF_BUILTIN (SPU_IENABLE, CODE_FOR_spu_ienable, "spu_ienable", B_INSN, _A1(SPU_BTI_VOID)) +DEF_BUILTIN (SPU_MASK_FOR_LOAD, CODE_FOR_spu_lvsr, "spu_lvsr", B_INSN, _A2(SPU_BTI_V16QI, SPU_BTI_PTR)) /* definitions to support overloaded generic builtin functions: */ diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index 7317da70afb..83bfdc8f1f8 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -130,6 +130,7 @@ static void spu_init_libfuncs (void); static bool spu_return_in_memory (tree type, tree fntype); static void fix_range (const char *); static void spu_encode_section_info (tree, rtx, int); +static tree spu_builtin_mask_for_load (void); extern const char *reg_names[]; rtx spu_compare_op0, spu_compare_op1; @@ -248,6 +249,9 @@ const struct attribute_spec spu_attribute_table[]; #undef TARGET_ENCODE_SECTION_INFO #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info +#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD +#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load + struct gcc_target targetm = TARGET_INITIALIZER; /* Sometimes certain combinations of command options do not make sense @@ -4288,6 +4292,8 @@ spu_init_builtins (void) d->fndecl = add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD, NULL, NULL_TREE); + if (d->fcode == SPU_MASK_FOR_LOAD) + TREE_READONLY (d->fndecl) = 1; } } @@ -4843,6 +4849,31 @@ spu_expand_builtin_1 (struct spu_builtin_description *d, i++; } + if (d->fcode == SPU_MASK_FOR_LOAD) + { + enum machine_mode mode = insn_data[icode].operand[1].mode; + tree arg; + rtx addr, op, pat; + + /* get addr */ + arg = TREE_VALUE (arglist); + gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE); + op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); + addr = memory_address (mode, op); + + /* negate addr */ + op = gen_reg_rtx (GET_MODE (addr)); + emit_insn (gen_rtx_SET (VOIDmode, op, + gen_rtx_NEG (GET_MODE (addr), addr))); + op = gen_rtx_MEM (mode, op); + + pat = GEN_FCN (icode) (target, op); + if (!pat) + return 0; + emit_insn (pat); + return target; + } + /* Ignore align_hint, but still expand it's args in case they have side effects. */ if (icode == CODE_FOR_spu_align_hint) @@ -4962,3 +4993,11 @@ spu_expand_builtin (tree exp, abort (); } +/* Implement targetm.vectorize.builtin_mask_for_load. */ +static tree +spu_builtin_mask_for_load (void) +{ + struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD]; + gcc_assert (d); + return d->fndecl; +} diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md index 9cb0dc7f400..60f00fb6cda 100644 --- a/gcc/config/spu/spu.md +++ b/gcc/config/spu/spu.md @@ -142,7 +142,10 @@ (UNSPEC_MTSPR 45) (UNSPEC_RDCH 46) (UNSPEC_RCHCNT 47) - (UNSPEC_WRCH 48)]) + (UNSPEC_WRCH 48) + (UNSPEC_SPU_REALIGN_LOAD 49) + (UNSPEC_SPU_MASK_FOR_LOAD 50) +]) (include "predicates.md") (include "constraints.md") @@ -3374,3 +3377,53 @@ selb\t%0,%4,%0,%3" emit_insn (gen_selb (operands[0], operands[1], operands[2], mask)); DONE; }") + +(define_expand "vec_realign_load_<mode>" + [(set (match_operand:ALL 0 "register_operand" "=r") + (unspec:ALL [(match_operand:ALL 1 "register_operand" "r") + (match_operand:ALL 2 "register_operand" "r") + (match_operand:TI 3 "register_operand" "r")] UNSPEC_SPU_REALIGN_LOAD))] + "" + " +{ + emit_insn (gen_shufb (operands[0], operands[1], operands[2], operands[3])); + DONE; +}") + +(define_expand "spu_lvsr" + [(set (match_operand:V16QI 0 "register_operand" "") + (unspec:V16QI [(match_operand 1 "memory_operand" "")] UNSPEC_SPU_MASK_FOR_LOAD))] + "" + " +{ + rtx addr; + rtx offset = gen_reg_rtx (V8HImode); + rtx addr_bits = gen_reg_rtx (SImode); + rtx addr_bits_vec = gen_reg_rtx (V8HImode); + rtx splatqi = gen_reg_rtx (TImode); + rtx result = gen_reg_rtx (V8HImode); + unsigned char arr[16] = { + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; + unsigned char arr2[16] = { + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03}; + + emit_move_insn (offset, array_to_constant (V8HImode, arr)); + emit_move_insn (splatqi, array_to_constant (TImode, arr2)); + + gcc_assert (GET_CODE (operands[1]) == MEM); + addr = force_reg (Pmode, XEXP (operands[1], 0)); + emit_insn (gen_andsi3 (addr_bits, addr, GEN_INT (0xF))); + emit_insn (gen_shufb (addr_bits_vec, addr_bits, addr_bits, splatqi)); + + /* offset - (addr & 0xF) + It is safe to use a single sfh, because each byte of offset is > 15 and + each byte of addr is <= 15. */ + emit_insn (gen_subv8hi3 (result, offset, addr_bits_vec)); + + result = simplify_gen_subreg (V16QImode, result, V8HImode, 0); + emit_move_insn (operands[0], result); + + DONE; +}") |