diff options
author | belagod <belagod@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-01-08 17:19:57 +0000 |
---|---|---|
committer | belagod <belagod@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-01-08 17:19:57 +0000 |
commit | 501336f7ede14521a9ce947f4dd924acf455446c (patch) | |
tree | c3af02e2f57cf5da176faefe1cd85d515a5e8f64 | |
parent | cc72d6e932e3cf059a3d02c5f86380b4b217782f (diff) | |
download | gcc-501336f7ede14521a9ce947f4dd924acf455446c.tar.gz |
2013-01-08 Tejas Belagod <tejas.belagod@arm.com>
* config/aarch64/aarch64-simd.md (vec_init<mode>): New.
* config/aarch64/aarch64-protos.h (aarch64_expand_vector_init): Declare.
* config/aarch64/aarch64.c (aarch64_simd_dup_constant,
aarch64_simd_make_constant, aarch64_expand_vector_init): New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@195027 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 11 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 160 |
4 files changed, 179 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 59d97edee7b..757ee869a4e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2013-01-08 Tejas Belagod <tejas.belagod@arm.com> + + * config/aarch64/aarch64-simd.md (vec_init<mode>): New. + * config/aarch64/aarch64-protos.h (aarch64_expand_vector_init): Declare. + * config/aarch64/aarch64.c (aarch64_simd_dup_constant, + aarch64_simd_make_constant, aarch64_expand_vector_init): New. + 2013-01-08 Jakub Jelinek <jakub@redhat.com> PR fortran/55341 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index ba96cd66618..a29e04ea13c 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -184,6 +184,7 @@ void aarch64_elf_asm_named_section (const char *, unsigned, tree); void aarch64_expand_epilogue (bool); void aarch64_expand_mov_immediate (rtx, rtx); void aarch64_expand_prologue (void); +void aarch64_expand_vector_init (rtx, rtx); void aarch64_function_profiler (FILE *, int); void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, const_tree, unsigned); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index cbf8b885fb8..06eafc69c9f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3548,3 +3548,14 @@ DONE; }) +;; Standard pattern name vec_init<mode>. + +(define_expand "vec_init<mode>" + [(match_operand:VALL 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SIMD" +{ + aarch64_expand_vector_init (operands[0], operands[1]); + DONE; +}) + diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ba8287ff026..d10cfb80e0a 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -6548,6 +6548,166 @@ aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed) return true; } +/* If VALS is a vector constant that can be loaded into a register + using DUP, generate instructions to do so and return an RTX to + assign to the register. Otherwise return NULL_RTX. */ +static rtx +aarch64_simd_dup_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + bool all_same = true; + rtx x; + int i; + + if (GET_CODE (vals) != CONST_VECTOR) + return NULL_RTX; + + for (i = 1; i < n_elts; ++i) + { + x = CONST_VECTOR_ELT (vals, i); + if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0))) + all_same = false; + } + + if (!all_same) + return NULL_RTX; + + /* We can load this constant by using DUP and a constant in a + single ARM register. This will be cheaper than a vector + load. */ + x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0)); + return gen_rtx_VEC_DUPLICATE (mode, x); +} + + +/* Generate code to load VALS, which is a PARALLEL containing only + constants (for vec_init) or CONST_VECTOR, efficiently into a + register. Returns an RTX to copy into the register, or NULL_RTX + for a PARALLEL that can not be converted into a CONST_VECTOR. */ +rtx +aarch64_simd_make_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + rtx const_dup; + rtx const_vec = NULL_RTX; + int n_elts = GET_MODE_NUNITS (mode); + int n_const = 0; + int i; + + if (GET_CODE (vals) == CONST_VECTOR) + const_vec = vals; + else if (GET_CODE (vals) == PARALLEL) + { + /* A CONST_VECTOR must contain only CONST_INTs and + CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). + Only store valid constants in a CONST_VECTOR. */ + for (i = 0; i < n_elts; ++i) + { + rtx x = XVECEXP (vals, 0, i); + if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) + n_const++; + } + if (n_const == n_elts) + const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); + } + else + gcc_unreachable (); + + if (const_vec != NULL_RTX + && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL, + NULL, NULL, NULL)) + /* Load using MOVI/MVNI. */ + return const_vec; + else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX) + /* Loaded using DUP. */ + return const_dup; + else if (const_vec != NULL_RTX) + /* Load from constant pool. We can not take advantage of single-cycle + LD1 because we need a PC-relative addressing mode. */ + return const_vec; + else + /* A PARALLEL containing something not valid inside CONST_VECTOR. + We can not construct an initializer. */ + return NULL_RTX; +} + +void +aarch64_expand_vector_init (rtx target, rtx vals) +{ + enum machine_mode mode = GET_MODE (target); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0, one_var = -1; + bool all_same = true; + rtx x, mem; + int i; + + x = XVECEXP (vals, 0, 0); + if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) + n_var = 1, one_var = 0; + + for (i = 1; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) + ++n_var, one_var = i; + + if (!rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + if (n_var == 0) + { + rtx constant = aarch64_simd_make_constant (vals); + if (constant != NULL_RTX) + { + emit_move_insn (target, constant); + return; + } + } + + /* Splat a single non-constant element if we can. */ + if (all_same) + { + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x)); + return; + } + + /* One field is non-constant. Load constant then overwrite varying + field. This is more efficient than using the stack. */ + if (n_var == 1) + { + rtx copy = copy_rtx (vals); + rtx index = GEN_INT (one_var); + enum insn_code icode; + + /* Load constant part of vector, substitute neighboring value for + varying element. */ + XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1); + aarch64_expand_vector_init (target, copy); + + /* Insert variable. */ + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); + icode = optab_handler (vec_set_optab, mode); + gcc_assert (icode != CODE_FOR_nothing); + emit_insn (GEN_FCN (icode) (target, x, index)); + return; + } + + /* Construct the vector in memory one field at a time + and load the whole vector. */ + mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); + for (i = 0; i < n_elts; i++) + emit_move_insn (adjust_address_nv (mem, inner_mode, + i * GET_MODE_SIZE (inner_mode)), + XVECEXP (vals, 0, i)); + emit_move_insn (target, mem); + +} + static unsigned HOST_WIDE_INT aarch64_shift_truncation_mask (enum machine_mode mode) { |