summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbelagod <belagod@138bc75d-0d04-0410-961f-82ee72b054a4>2013-01-08 17:19:57 +0000
committerbelagod <belagod@138bc75d-0d04-0410-961f-82ee72b054a4>2013-01-08 17:19:57 +0000
commit501336f7ede14521a9ce947f4dd924acf455446c (patch)
treec3af02e2f57cf5da176faefe1cd85d515a5e8f64
parentcc72d6e932e3cf059a3d02c5f86380b4b217782f (diff)
downloadgcc-501336f7ede14521a9ce947f4dd924acf455446c.tar.gz
2013-01-08 Tejas Belagod <tejas.belagod@arm.com>
* config/aarch64/aarch64-simd.md (vec_init<mode>): New. * config/aarch64/aarch64-protos.h (aarch64_expand_vector_init): Declare. * config/aarch64/aarch64.c (aarch64_simd_dup_constant, aarch64_simd_make_constant, aarch64_expand_vector_init): New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@195027 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64-simd.md11
-rw-r--r--gcc/config/aarch64/aarch64.c160
4 files changed, 179 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 59d97edee7b..757ee869a4e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2013-01-08 Tejas Belagod <tejas.belagod@arm.com>
+
+ * config/aarch64/aarch64-simd.md (vec_init<mode>): New.
+ * config/aarch64/aarch64-protos.h (aarch64_expand_vector_init): Declare.
+ * config/aarch64/aarch64.c (aarch64_simd_dup_constant,
+ aarch64_simd_make_constant, aarch64_expand_vector_init): New.
+
2013-01-08 Jakub Jelinek <jakub@redhat.com>
PR fortran/55341
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ba96cd66618..a29e04ea13c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -184,6 +184,7 @@ void aarch64_elf_asm_named_section (const char *, unsigned, tree);
void aarch64_expand_epilogue (bool);
void aarch64_expand_mov_immediate (rtx, rtx);
void aarch64_expand_prologue (void);
+void aarch64_expand_vector_init (rtx, rtx);
void aarch64_function_profiler (FILE *, int);
void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
const_tree, unsigned);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cbf8b885fb8..06eafc69c9f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3548,3 +3548,14 @@
DONE;
})
+;; Standard pattern name vec_init<mode>.
+
+(define_expand "vec_init<mode>"
+ [(match_operand:VALL 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SIMD"
+{
+ aarch64_expand_vector_init (operands[0], operands[1]);
+ DONE;
+})
+
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ba8287ff026..d10cfb80e0a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6548,6 +6548,166 @@ aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
return true;
}
+/* If VALS is a vector constant that can be loaded into a register
+ using DUP, generate instructions to do so and return an RTX to
+ assign to the register. Otherwise return NULL_RTX. */
+static rtx
+aarch64_simd_dup_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ bool all_same = true;
+ rtx x;
+ int i;
+
+ if (GET_CODE (vals) != CONST_VECTOR)
+ return NULL_RTX;
+
+ for (i = 1; i < n_elts; ++i)
+ {
+ x = CONST_VECTOR_ELT (vals, i);
+ if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
+ all_same = false;
+ }
+
+ if (!all_same)
+ return NULL_RTX;
+
+ /* We can load this constant by using DUP and a constant in a
+ single ARM register. This will be cheaper than a vector
+ load. */
+ x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
+ return gen_rtx_VEC_DUPLICATE (mode, x);
+}
+
+
+/* Generate code to load VALS, which is a PARALLEL containing only
+ constants (for vec_init) or CONST_VECTOR, efficiently into a
+ register. Returns an RTX to copy into the register, or NULL_RTX
+ for a PARALLEL that can not be converted into a CONST_VECTOR. */
+rtx
+aarch64_simd_make_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ rtx const_dup;
+ rtx const_vec = NULL_RTX;
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_const = 0;
+ int i;
+
+ if (GET_CODE (vals) == CONST_VECTOR)
+ const_vec = vals;
+ else if (GET_CODE (vals) == PARALLEL)
+ {
+ /* A CONST_VECTOR must contain only CONST_INTs and
+ CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
+ Only store valid constants in a CONST_VECTOR. */
+ for (i = 0; i < n_elts; ++i)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+ if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
+ n_const++;
+ }
+ if (n_const == n_elts)
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+ }
+ else
+ gcc_unreachable ();
+
+ if (const_vec != NULL_RTX
+ && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
+ NULL, NULL, NULL))
+ /* Load using MOVI/MVNI. */
+ return const_vec;
+ else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
+ /* Loaded using DUP. */
+ return const_dup;
+ else if (const_vec != NULL_RTX)
+ /* Load from constant pool. We can not take advantage of single-cycle
+ LD1 because we need a PC-relative addressing mode. */
+ return const_vec;
+ else
+ /* A PARALLEL containing something not valid inside CONST_VECTOR.
+ We can not construct an initializer. */
+ return NULL_RTX;
+}
+
+void
+aarch64_expand_vector_init (rtx target, rtx vals)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_var = 0, one_var = -1;
+ bool all_same = true;
+ rtx x, mem;
+ int i;
+
+ x = XVECEXP (vals, 0, 0);
+ if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
+ n_var = 1, one_var = 0;
+
+ for (i = 1; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
+ ++n_var, one_var = i;
+
+ if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ if (n_var == 0)
+ {
+ rtx constant = aarch64_simd_make_constant (vals);
+ if (constant != NULL_RTX)
+ {
+ emit_move_insn (target, constant);
+ return;
+ }
+ }
+
+ /* Splat a single non-constant element if we can. */
+ if (all_same)
+ {
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
+ return;
+ }
+
+ /* One field is non-constant. Load constant then overwrite varying
+ field. This is more efficient than using the stack. */
+ if (n_var == 1)
+ {
+ rtx copy = copy_rtx (vals);
+ rtx index = GEN_INT (one_var);
+ enum insn_code icode;
+
+ /* Load constant part of vector, substitute neighboring value for
+ varying element. */
+ XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
+ aarch64_expand_vector_init (target, copy);
+
+ /* Insert variable. */
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
+ icode = optab_handler (vec_set_optab, mode);
+ gcc_assert (icode != CODE_FOR_nothing);
+ emit_insn (GEN_FCN (icode) (target, x, index));
+ return;
+ }
+
+ /* Construct the vector in memory one field at a time
+ and load the whole vector. */
+ mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
+ for (i = 0; i < n_elts; i++)
+ emit_move_insn (adjust_address_nv (mem, inner_mode,
+ i * GET_MODE_SIZE (inner_mode)),
+ XVECEXP (vals, 0, i));
+ emit_move_insn (target, mem);
+
+}
+
static unsigned HOST_WIDE_INT
aarch64_shift_truncation_mask (enum machine_mode mode)
{