summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog94
-rw-r--r--gcc/alias.c12
-rw-r--r--gcc/config/i386/i386.c15
-rw-r--r--gcc/config/rs6000/altivec.md58
-rw-r--r--gcc/config/rs6000/rs6000.c96
-rw-r--r--gcc/config/rs6000/rs6000.h2
-rw-r--r--gcc/emit-rtl.c33
-rw-r--r--gcc/expr.c34
-rw-r--r--gcc/fold-const.c8
-rw-r--r--gcc/genopinit.c4
-rw-r--r--gcc/gimplify.c3
-rw-r--r--gcc/optabs.c90
-rw-r--r--gcc/optabs.h10
-rw-r--r--gcc/print-rtl.c12
-rw-r--r--gcc/target-def.h12
-rw-r--r--gcc/target.h17
-rw-r--r--gcc/targhooks.c6
-rw-r--r--gcc/targhooks.h2
-rw-r--r--gcc/testsuite/ChangeLog35
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-13.c22
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-26.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-27.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-27a.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-28.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-29.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-29a.c50
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-40.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-41.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-42.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-43.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-44.c13
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-45.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-46.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-47.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-48.c12
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-48a.c58
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-49.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-50.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-51.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-52.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-53.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-54.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-55.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-56.c5
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-56a.c56
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-57.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-58.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-59.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-60.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-61.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-72.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-72a.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-75.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-76.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-77.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-77a.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-78.c2
-rw-r--r--gcc/tree-dump.c2
-rw-r--r--gcc/tree-eh.c2
-rw-r--r--gcc/tree-gimple.c6
-rw-r--r--gcc/tree-pretty-print.c27
-rw-r--r--gcc/tree-ssa-alias.c11
-rw-r--r--gcc/tree-ssa-dce.c4
-rw-r--r--gcc/tree-ssa-dom.c4
-rw-r--r--gcc/tree-ssa-loop-im.c6
-rw-r--r--gcc/tree-ssa-loop-ivopts.c29
-rw-r--r--gcc/tree-ssa-operands.c16
-rw-r--r--gcc/tree-vectorizer.c391
-rw-r--r--gcc/tree-vectorizer.h1
-rw-r--r--gcc/tree.c4
-rw-r--r--gcc/tree.def33
-rw-r--r--gcc/tree.h10
72 files changed, 1398 insertions, 167 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index aa7bb17cea7..65f8c94af6c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,97 @@
+2004-09-23 Dorit Naishlos <dorit@il.ibm.com>
+
+ * tree.def (ALIGN_INDIRECT_REF, MISALIGNED_INDIRECT_REF):
+ New tree-codes.
+ * tree.h (REF_ORIGINAL): Consider ALIGN_INDIRECT_REF and
+ MISALIGNED_INDIRECT_REF.
+ * alias.c (get_alias_set, nonoverlapping_memrefs_p): Likewise.
+ * emit-rtl.c (mem_expr_equal_p, set_mem_attributes_minus_bitpos):
+ Likewise.
+ * expr.c (safe_from_p, expand_expr_real_1, rewrite_address_base)
+ (find_interesting_uses_address): Likewise.
+ * fold-const.c (non_lvalue, operand_equal_p): Likewise.
+ (build_fold_addr_expr_with_type): Likewise.
+ * gimplify.c (gimplify_addr_expr, gimplify_expr): Likewise.
+ * print-rtl.c (print_mem_expr): Likewise.
+ * tree-dump.c (dequeue_and_dump): Likewise.
+ * tree-eh.c (tree_could_trap_p): Likewise.
+ * tree-gimple.c (is_gimple_addressable, get_base_address): Likewise.
+ * tree-pretty-print.c (op_prio, op_symbol, dump_generic_node): Likewise.
+ * tree-ssa-alias.c (find_ptr_dereference, ptr_is_dereferenced_by):
+ Likewise.
+ * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Likewise.
+ * tree-ssa-dom.c (record_equivalences_from_stmt): Likewise.
+ * tree-ssa-loop-im.c (for_each_index, is_call_clobbered_ref): Likewise.
+ * tree-ssa-loop-ivopts.c (find_interesting_uses_address): Likewise.
+ (add_address_candidates, rewrite_address_base): Likewise.
+ * tree-ssa-operands.c (get_expr_operands, get_indirect_ref_operands):
+ Likewise.
+ * tree.c (staticp, build1_stat): Likewise.
+
+ * tree.def (REALIGN_LOAD_EXPR, REALIGN_STORE_EXPR): New tree-codes.
+ * tree-pretty-print.c (dump_generic_node): Consider REALIGN_LOAD_EXPR.
+ * tree-ssa-operands.c (get_expr_operands): Likewise.
+ * expr.c (expand_expr_real_1): Likewise.
+
+ * optabs.h (vec_realign_store_optab, vec_realign_load_optab): New
+ optabs.
+ (OTI_vec_realign_store, OTI_vec_realign_load): New optab_index values
+ for the new optabs.
+ (expand_ternary_op): New function.
+ * genopinit.c (optabs): Handle the new optabs.
+ * optabs.c (optab_for_tree_code): Add cases for the new tree-codes.
+ (init_optabs): Initialize vec_realign_load_optab.
+ (expand_ternary_op): New functions.
+
+ * target-def.h (TARGET_VECTORIZE): New member for struct gcc_target.
+ (TARGET_VECTORIZE_MISALIGNED_MEM_OK): New member for targetm.vectorize.
+ (TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD): Likewise.
+ (TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE): Likewise.
+ * target.h (struct vectorize): New member for struct gcc_target.
+ (misaligned_mem_ok): New member for targetm.vectorize.
+ (builtin_mask_for_load): Likewise.
+ (builtin_mask_for_store): Likewise.
+ * targethooks.c (default_vect_misaligned_mem_ok): New function.
+ * targethooks.h (default_vect_misaligned_mem_ok): New function.
+
+ * config/rs6000/altivec.md (build_vector_mask_for_load): New
+ define_expand.
+ (vec_realign_load_v4si, vec_realign_load_v4sf, vec_realign_load_v8hi)
+ (vec_realign_load_v16qi): New define_insn.
+ * config/rs6000/rs6000.h (ALTIVEC_BUILTIN_MASK_FOR_LOAD):
+ (ALTIVEC_BUILTIN_MASK_FOR_STORE): New target builtins.
+ * config/rs6000/rs6000.c (altivec_builtin_mask_for_load):
+ (altivec_builtin_mask_for_store): New variables.
+ (rs6000_builtin_mask_for_load): New function. Implements
+ TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD.
+ (rs6000_builtin_mask_for_store): New function. Implements
+ TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE.
+ (rs6000_expand_builtin): Expand the target builtins
+ builtin_mask_for_load and builtin_mask_for_store.
+ (altivec_init_builtins): Initialize the new target builtins.
+ * config/i386/i386.c (ix86_misaligned_mem_ok): New function.
+ Implements the target hook TARGET_VECTORIZE_MISALIGNED_MEM_OK.
+
+ * tree-vectorizer.c (vect_create_data_ref): Renamed to
+ vect_create_data_ref_ptr. Returns a pointer instead of an array-ref.
+ (vect_create_addr_base_for_vector_ref): Additional argument (offset).
+ (vectorizable_store): Call vect_create_data_ref_ptr with additional
+ arguments, and create an indirect_ref with its return value data_ref.
+ Check aligned_access_p.
+ (vectorizable_load): Handle misaligned loads, using software-pipelined
+ scheme with REALIGN_LOAD_EXPR and ALIGN_INDIRECT_REF if
+ vec_realign_load_optab is supported, or using a scheme without
+ software-pipelining with MISALIGNED_INDIRECT_REF if the target hook
+ misaligned_mem_ok is supported.
+
+ (vect_finish_stmt_generation): Typo.
+ (vect_enhance_data_refs_alignment): Rename loop_vinfo to loop_info.
+ (vect_analyze_data_refs_alignment): Don't fail vectorization in the
+ presence of misaligned loads.
+ (vect_analyze_data_ref_access): Add check for constant init.
+ (vect_get_symbl_and_dr): Remove duplicate line.
+ * tree-vectorizer.h (DR_MISALIGNMENT): Add comment.
+
2004-09-23 Kazu Hirata <kazu@cs.umass.edu>
* builtins.c: Fix a comment typo.
diff --git a/gcc/alias.c b/gcc/alias.c
index b937cb13cb8..e096cbf6bc3 100644
--- a/gcc/alias.c
+++ b/gcc/alias.c
@@ -450,7 +450,9 @@ get_alias_set (tree t)
}
/* Check for accesses through restrict-qualified pointers. */
- if (TREE_CODE (inner) == INDIRECT_REF)
+ if (TREE_CODE (inner) == INDIRECT_REF
+ || TREE_CODE (inner) == ALIGN_INDIRECT_REF
+ || TREE_CODE (inner) == MISALIGNED_INDIRECT_REF)
{
tree decl = find_base_decl (TREE_OPERAND (inner, 0));
@@ -2006,7 +2008,9 @@ nonoverlapping_memrefs_p (rtx x, rtx y)
moffsetx = adjust_offset_for_component_ref (exprx, moffsetx);
exprx = t;
}
- else if (TREE_CODE (exprx) == INDIRECT_REF)
+ else if (TREE_CODE (exprx) == INDIRECT_REF
+ || TREE_CODE (exprx) == ALIGN_INDIRECT_REF
+ || TREE_CODE (exprx) == MISALIGNED_INDIRECT_REF)
{
exprx = TREE_OPERAND (exprx, 0);
if (flag_argument_noalias < 2
@@ -2023,7 +2027,9 @@ nonoverlapping_memrefs_p (rtx x, rtx y)
moffsety = adjust_offset_for_component_ref (expry, moffsety);
expry = t;
}
- else if (TREE_CODE (expry) == INDIRECT_REF)
+ else if (TREE_CODE (expry) == INDIRECT_REF
+ || TREE_CODE (expry) == ALIGN_INDIRECT_REF
+ || TREE_CODE (expry) == MISALIGNED_INDIRECT_REF)
{
expry = TREE_OPERAND (expry, 0);
if (flag_argument_noalias < 2
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 09db92e7971..de8411eb663 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -868,6 +868,7 @@ static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
static int ix86_issue_rate (void);
static int ix86_adjust_cost (rtx, rtx, rtx, int);
static int ia32_multipass_dfa_lookahead (void);
+static bool ix86_misaligned_mem_ok (enum machine_mode);
static void ix86_init_mmx_sse_builtins (void);
static rtx x86_this_parameter (tree);
static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
@@ -1014,6 +1015,9 @@ static void init_ext_80387_constants (void);
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
ia32_multipass_dfa_lookahead
+#undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
+#define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok
+
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
@@ -11616,6 +11620,17 @@ ia32_multipass_dfa_lookahead (void)
}
+/* Implement the target hook targetm.vectorize.misaligned_mem_ok. */
+
+static bool
+ix86_misaligned_mem_ok (enum machine_mode mode)
+{
+ if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+ return true;
+ else
+ return false;
+}
+
/* Compute the alignment given to a constant that is being placed in memory.
EXP is the constant and ALIGN is the alignment that the object would
ordinarily have.
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index ff58d8e3fa8..0eb29b1ea37 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1902,6 +1902,27 @@
"lvsr %0,%y1"
[(set_attr "type" "vecload")])
+(define_expand "build_vector_mask_for_load"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand 1 "memory_operand" "m")] 195))]
+ "TARGET_ALTIVEC"
+ "
+{
+ rtx addr;
+ rtx temp;
+
+ if (GET_CODE (operands[1]) != MEM)
+ abort ();
+
+ addr = XEXP (operands[1], 0);
+ temp = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (VOIDmode, temp,
+ gen_rtx_NEG (GET_MODE (addr), addr)));
+ emit_insn (gen_altivec_lvsr (operands[0],
+ gen_rtx_MEM (GET_MODE (operands[1]), temp)));
+ DONE;
+}")
+
;; Parallel some of the LVE* and STV*'s with unspecs because some have
;; identical rtl but different instructions-- and gcc gets confused.
@@ -2062,3 +2083,40 @@
"vspltisb %2,0\;vsubsws %3,%2,%1\;vmaxsw %0,%1,%3"
[(set_attr "type" "vecsimple")
(set_attr "length" "12")])
+
+(define_insn "vec_realign_load_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")
+ (match_operand:V16QI 3 "register_operand" "v")] 215))]
+ "TARGET_ALTIVEC"
+ "vperm %0,%1,%2,%3"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "vec_realign_load_v4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "register_operand" "v")
+ (match_operand:V16QI 3 "register_operand" "v")] 216))]
+ "TARGET_ALTIVEC"
+ "vperm %0,%1,%2,%3"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "vec_realign_load_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")
+ (match_operand:V8HI 2 "register_operand" "v")
+ (match_operand:V16QI 3 "register_operand" "v")] 217))]
+ "TARGET_ALTIVEC"
+ "vperm %0,%1,%2,%3"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "vec_realign_load_v16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")
+ (match_operand:V16QI 2 "register_operand" "v")
+ (match_operand:V16QI 3 "register_operand" "v")] 218))]
+ "TARGET_ALTIVEC"
+ "vperm %0,%1,%2,%3"
+ [(set_attr "type" "vecperm")])
+
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index eba4a0672fb..7f448ee0c4b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -143,6 +143,11 @@ enum rs6000_dependence_cost rs6000_sched_costly_dep;
const char *rs6000_sched_insert_nops_str;
enum rs6000_nop_insertion rs6000_sched_insert_nops;
+/* Support targetm.vectorize.builtin_mask_for_load. */
+tree altivec_builtin_mask_for_load;
+/* Support targetm.vectorize.builtin_mask_for_store. */
+tree altivec_builtin_mask_for_store;
+
/* Size of long double */
const char *rs6000_long_double_size_string;
int rs6000_long_double_type_size;
@@ -681,6 +686,8 @@ static int redefine_groups (FILE *, int, rtx, rtx);
static int pad_groups (FILE *, int, rtx, rtx);
static void rs6000_sched_finish (FILE *, int);
static int rs6000_use_sched_lookahead (void);
+static tree rs6000_builtin_mask_for_load (void);
+static tree rs6000_builtin_mask_for_store (void);
static void rs6000_init_builtins (void);
static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx);
@@ -905,6 +912,12 @@ static const char alt_reg_names[][8] =
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
+#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
+
+#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE rs6000_builtin_mask_for_store
+
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
@@ -1536,6 +1549,26 @@ rs6000_override_options (const char *default_cpu)
}
}
+/* Implement targetm.vectorize.builtin_mask_for_load. */
+static tree
+rs6000_builtin_mask_for_load (void)
+{
+ if (TARGET_ALTIVEC)
+ return altivec_builtin_mask_for_load;
+ else
+ return 0;
+}
+
+/* Implement targetm.vectorize.builtin_mask_for_store. */
+static tree
+rs6000_builtin_mask_for_store (void)
+{
+ if (TARGET_ALTIVEC)
+ return altivec_builtin_mask_for_store;
+ else
+ return 0;
+}
+
/* Handle generic options of the form -mfoo=yes/no.
NAME is the option name.
VALUE is the option value.
@@ -7202,6 +7235,48 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
rtx ret;
bool success;
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_LOAD
+ || fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ {
+ int icode = (int) CODE_FOR_altivec_lvsr;
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode = insn_data[icode].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ if (!TARGET_ALTIVEC)
+ abort ();
+
+ arg = TREE_VALUE (arglist);
+ if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE)
+ abort ();
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (VOIDmode, op,
+ gen_rtx_NEG (GET_MODE (addr), addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ /*pat = gen_altivec_lvsr (target, op);*/
+ pat = GEN_FCN (icode) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
if (TARGET_ALTIVEC)
{
ret = altivec_expand_builtin (exp, target, &success);
@@ -7691,6 +7766,9 @@ altivec_init_builtins (void)
= build_function_type_list (integer_type_node,
pcchar_type_node, NULL_TREE);
+ tree id;
+ tree decl;
+
def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_4sf", v4sf_ftype_pcfloat,
ALTIVEC_BUILTIN_LD_INTERNAL_4sf);
def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_4sf", void_ftype_pfloat_v4sf,
@@ -7792,6 +7870,24 @@ altivec_init_builtins (void)
def_builtin (d->mask, d->name, type, d->code);
}
+
+ /* Initialize target builtin that implements
+ targetm.vectorize.builtin_mask_for_load. */
+ id = get_identifier ("__builtin_altivec_mask_for_load");
+ decl = build_decl (FUNCTION_DECL, id, v16qi_ftype_long_pcvoid);
+ DECL_BUILT_IN_CLASS (decl) = BUILT_IN_MD;
+ DECL_FUNCTION_CODE (decl) = ALTIVEC_BUILTIN_MASK_FOR_LOAD;
+ /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
+ altivec_builtin_mask_for_load = decl;
+
+ /* Initialize target builtin that implements
+ targetm.vectorize.builtin_mask_for_store. */
+ id = get_identifier ("__builtin_altivec_mask_for_store");
+ decl = build_decl (FUNCTION_DECL, id, v16qi_ftype_long_pcvoid);
+ DECL_BUILT_IN_CLASS (decl) = BUILT_IN_MD;
+ DECL_FUNCTION_CODE (decl) = ALTIVEC_BUILTIN_MASK_FOR_STORE;
+ /* Record the decl. Will be used by rs6000_builtin_mask_for_store. */
+ altivec_builtin_mask_for_store = decl;
}
static void
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 801d54fd93f..399f2e22d60 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2822,6 +2822,8 @@ enum rs6000_builtins
ALTIVEC_BUILTIN_ABS_V8HI,
ALTIVEC_BUILTIN_ABS_V16QI,
ALTIVEC_BUILTIN_COMPILETIME_ERROR,
+ ALTIVEC_BUILTIN_MASK_FOR_LOAD,
+ ALTIVEC_BUILTIN_MASK_FOR_STORE,
/* SPE builtins. */
SPE_BUILTIN_EVADDW,
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index ce597b086e6..2572b856410 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -1485,7 +1485,9 @@ mem_expr_equal_p (tree expr1, tree expr2)
&& mem_expr_equal_p (TREE_OPERAND (expr1, 1), /* field decl */
TREE_OPERAND (expr2, 1));
- if (TREE_CODE (expr1) == INDIRECT_REF)
+ if (TREE_CODE (expr1) == INDIRECT_REF
+ || TREE_CODE (expr1) == ALIGN_INDIRECT_REF
+ || TREE_CODE (expr1) == MISALIGNED_INDIRECT_REF)
return mem_expr_equal_p (TREE_OPERAND (expr1, 0),
TREE_OPERAND (expr2, 0));
@@ -1546,8 +1548,19 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp,
/* We can set the alignment from the type if we are making an object,
this is an INDIRECT_REF, or if TYPE_ALIGN_OK. */
- if (objectp || TREE_CODE (t) == INDIRECT_REF || TYPE_ALIGN_OK (type))
+ if (objectp || TREE_CODE (t) == INDIRECT_REF
+ || TREE_CODE (t) == ALIGN_INDIRECT_REF
+ || TYPE_ALIGN_OK (type))
align = MAX (align, TYPE_ALIGN (type));
+ else
+ if (TREE_CODE (t) == MISALIGNED_INDIRECT_REF)
+ {
+ if (integer_zerop (TREE_OPERAND (t, 1)))
+ /* We don't know anything about the alignment. */
+ align = BITS_PER_UNIT;
+ else
+ align = tree_low_cst (TREE_OPERAND (t, 1), 1);
+ }
/* If the size is known, we can set that. */
if (TYPE_SIZE_UNIT (type) && host_integerp (TYPE_SIZE_UNIT (type), 1))
@@ -1672,7 +1685,9 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp,
the size we got from the type? */
}
else if (flag_argument_noalias > 1
- && TREE_CODE (t2) == INDIRECT_REF
+ && (TREE_CODE (t2) == INDIRECT_REF
+ || TREE_CODE (t2) == ALIGN_INDIRECT_REF
+ || TREE_CODE (t2) == MISALIGNED_INDIRECT_REF)
&& TREE_CODE (TREE_OPERAND (t2, 0)) == PARM_DECL)
{
expr = t2;
@@ -1683,7 +1698,9 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp,
/* If this is a Fortran indirect argument reference, record the
parameter decl. */
else if (flag_argument_noalias > 1
- && TREE_CODE (t) == INDIRECT_REF
+ && (TREE_CODE (t) == INDIRECT_REF
+ || TREE_CODE (t) == ALIGN_INDIRECT_REF
+ || TREE_CODE (t) == MISALIGNED_INDIRECT_REF)
&& TREE_CODE (TREE_OPERAND (t, 0)) == PARM_DECL)
{
expr = t;
@@ -1701,6 +1718,14 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp,
size = plus_constant (size, apply_bitpos / BITS_PER_UNIT);
}
+ if (TREE_CODE (t) == ALIGN_INDIRECT_REF)
+ {
+ /* Force EXPR and OFFSE to NULL, since we don't know exactly what
+ we're overlapping. */
+ offset = NULL;
+ expr = NULL;
+ }
+
/* Now set the attributes we computed above. */
MEM_ATTRS (ref)
= get_mem_attrs (alias, expr, offset, size, align, GET_MODE (ref));
diff --git a/gcc/expr.c b/gcc/expr.c
index da8304a9708..96cab626f7b 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -5829,6 +5829,8 @@ safe_from_p (rtx x, tree exp, int top_p)
}
break;
+ case MISALIGNED_INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
case INDIRECT_REF:
if (MEM_P (x)
&& alias_sets_conflict_p (MEM_ALIAS_SET (x),
@@ -6745,11 +6747,17 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
return target;
}
+ case MISALIGNED_INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
case INDIRECT_REF:
{
tree exp1 = TREE_OPERAND (exp, 0);
tree orig;
+ if (code == MISALIGNED_INDIRECT_REF
+ && !targetm.vectorize.misaligned_mem_ok (mode))
+ abort ();
+
if (modifier != EXPAND_WRITE)
{
tree t;
@@ -6761,6 +6769,14 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
op0 = expand_expr (exp1, NULL_RTX, VOIDmode, EXPAND_SUM);
op0 = memory_address (mode, op0);
+
+ if (code == ALIGN_INDIRECT_REF)
+ {
+ int align = TYPE_ALIGN_UNIT (type);
+ op0 = gen_rtx_AND (Pmode, op0, GEN_INT (-align));
+ op0 = memory_address (mode, op0);
+ }
+
temp = gen_rtx_MEM (mode, op0);
orig = REF_ORIGINAL (exp);
@@ -8203,6 +8219,24 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
return expand_expr_real (TREE_OPERAND (exp, 0), original_target, tmode,
modifier, alt_rtl);
+ case REALIGN_LOAD_EXPR:
+ {
+ tree oprnd0 = TREE_OPERAND (exp, 0);
+ tree oprnd1 = TREE_OPERAND (exp, 1);
+ tree oprnd2 = TREE_OPERAND (exp, 2);
+ rtx op2;
+
+ this_optab = optab_for_tree_code (code, type);
+ expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, 0);
+ op2 = expand_expr (oprnd2, NULL_RTX, VOIDmode, 0);
+ temp = expand_ternary_op (mode, this_optab, op0, op1, op2,
+ target, unsignedp);
+ if (temp == 0)
+ abort ();
+ return temp;
+ }
+
+
default:
return lang_hooks.expand_expr (exp, original_target, tmode,
modifier, alt_rtl);
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index c762fada3fe..7049f4f9475 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -1990,6 +1990,8 @@ non_lvalue (tree x)
case COMPONENT_REF:
case INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
case ARRAY_REF:
case ARRAY_RANGE_REF:
case BIT_FIELD_REF:
@@ -2466,6 +2468,8 @@ operand_equal_p (tree arg0, tree arg1, unsigned int flags)
switch (TREE_CODE (arg0))
{
case INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
case REALPART_EXPR:
case IMAGPART_EXPR:
return operand_equal_p (TREE_OPERAND (arg0, 0),
@@ -10478,7 +10482,9 @@ build_fold_addr_expr_with_type (tree t, tree ptrtype)
if (TREE_CODE (t) == WITH_SIZE_EXPR)
t = TREE_OPERAND (t, 0);
- if (TREE_CODE (t) == INDIRECT_REF)
+ /* Note: doesn't apply to ALIGN_INDIRECT_REF */
+ if (TREE_CODE (t) == INDIRECT_REF
+ || TREE_CODE (t) == MISALIGNED_INDIRECT_REF)
{
t = TREE_OPERAND (t, 0);
if (TREE_TYPE (t) != ptrtype)
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index 273a5fbb5a0..0d39f67ef75 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -169,7 +169,9 @@ static const char * const optabs[] =
"cmpmem_optab[$A] = CODE_FOR_$(cmpmem$a$)",
"vec_set_optab->handlers[$A].insn_code = CODE_FOR_$(vec_set$a$)",
"vec_extract_optab->handlers[$A].insn_code = CODE_FOR_$(vec_extract$a$)",
- "vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)" };
+ "vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)",
+ "vec_realign_store_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_store_$a$)",
+ "vec_realign_load_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_load_$a$)" };
static void gen_insn (rtx);
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 1bf14a738a3..27744bcb689 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -3052,6 +3052,7 @@ gimplify_addr_expr (tree *expr_p, tree *pre_p, tree *post_p)
switch (TREE_CODE (op0))
{
case INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
do_indirect_ref:
/* Check if we are dealing with an expression of the form '&*ptr'.
While the front end folds away '&*ptr' into 'ptr', these
@@ -3642,6 +3643,8 @@ gimplify_expr (tree *expr_p, tree *pre_p, tree *post_p,
recalculate_side_effects (*expr_p);
break;
+ case ALIGN_INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
case INDIRECT_REF:
ret = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p, post_p,
is_gimple_reg, fb_rvalue);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 0b75a81cd7f..abb69700a04 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -286,6 +286,12 @@ optab_for_tree_code (enum tree_code code, tree type)
case MIN_EXPR:
return TYPE_UNSIGNED (type) ? umin_optab : smin_optab;
+ case REALIGN_STORE_EXPR:
+ return vec_realign_store_optab;
+
+ case REALIGN_LOAD_EXPR:
+ return vec_realign_load_optab;
+
default:
break;
}
@@ -313,6 +319,88 @@ optab_for_tree_code (enum tree_code code, tree type)
}
}
+
+/* Generate code to perform an operation specified by TERNARY_OPTAB
+ on operands OP0, OP1 and OP2, with result having machine-mode MODE.
+
+ UNSIGNEDP is for the case where we have to widen the operands
+ to perform the operation. It says to use zero-extension.
+
+ If TARGET is nonzero, the value
+ is generated there, if it is convenient to do so.
+ In all cases an rtx is returned for the locus of the value;
+ this may or may not be TARGET. */
+
+rtx
+expand_ternary_op (enum machine_mode mode, optab ternary_optab, rtx op0,
+ rtx op1, rtx op2, rtx target, int unsignedp)
+{
+ int icode = (int) ternary_optab->handlers[(int) mode].insn_code;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+ enum machine_mode mode2 = insn_data[icode].operand[3].mode;
+ rtx temp;
+ rtx pat;
+ rtx xop0 = op0, xop1 = op1, xop2 = op2;
+
+ if (ternary_optab->handlers[(int) mode].insn_code == CODE_FOR_nothing)
+ abort ();
+
+ if (!target
+ || ! (*insn_data[icode].operand[0].predicate) (target, mode))
+ temp = gen_reg_rtx (mode);
+ else
+ temp = target;
+
+ /* In case the insn wants input operands in modes different from
+ those of the actual operands, convert the operands. It would
+ seem that we don't need to convert CONST_INTs, but we do, so
+ that they're properly zero-extended, sign-extended or truncated
+ for their mode. */
+
+ if (GET_MODE (op0) != mode0 && mode0 != VOIDmode)
+ xop0 = convert_modes (mode0,
+ GET_MODE (op0) != VOIDmode
+ ? GET_MODE (op0)
+ : mode,
+ xop0, unsignedp);
+
+ if (GET_MODE (op1) != mode1 && mode1 != VOIDmode)
+ xop1 = convert_modes (mode1,
+ GET_MODE (op1) != VOIDmode
+ ? GET_MODE (op1)
+ : mode,
+ xop1, unsignedp);
+
+ if (GET_MODE (op2) != mode2 && mode2 != VOIDmode)
+ xop2 = convert_modes (mode2,
+ GET_MODE (op2) != VOIDmode
+ ? GET_MODE (op2)
+ : mode,
+ xop2, unsignedp);
+
+ /* Now, if insn's predicates don't allow our operands, put them into
+ pseudo regs. */
+
+ if (! (*insn_data[icode].operand[1].predicate) (xop0, mode0)
+ && mode0 != VOIDmode)
+ xop0 = copy_to_mode_reg (mode0, xop0);
+
+ if (! (*insn_data[icode].operand[2].predicate) (xop1, mode1)
+ && mode1 != VOIDmode)
+ xop1 = copy_to_mode_reg (mode1, xop1);
+
+ if (! (*insn_data[icode].operand[3].predicate) (xop2, mode2)
+ && mode2 != VOIDmode)
+ xop2 = copy_to_mode_reg (mode2, xop2);
+
+ pat = GEN_FCN (icode) (temp, xop0, xop1, xop2);
+
+ emit_insn (pat);
+ return temp;
+}
+
+
/* Like expand_binop, but return a constant rtx if the result can be
calculated at compile time. The arguments and return value are
otherwise the same as for expand_binop. */
@@ -4657,6 +4745,8 @@ init_optabs (void)
vec_extract_optab = init_optab (UNKNOWN);
vec_set_optab = init_optab (UNKNOWN);
vec_init_optab = init_optab (UNKNOWN);
+ vec_realign_load_optab = init_optab (UNKNOWN);
+
/* Conversions. */
sext_optab = init_convert_optab (SIGN_EXTEND);
zext_optab = init_convert_optab (ZERO_EXTEND);
diff --git a/gcc/optabs.h b/gcc/optabs.h
index b5632c770fe..8e895f203c5 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -228,6 +228,10 @@ enum optab_index
OTI_vec_extract,
/* Initialize vector operand. */
OTI_vec_init,
+ /* Extract specified elements from vectors, for vector store. */
+ OTI_vec_realign_store,
+ /* Extract specified elements from vectors, for vector load. */
+ OTI_vec_realign_load,
OTI_MAX
};
@@ -330,6 +334,8 @@ extern GTY(()) optab optab_table[OTI_MAX];
#define vec_set_optab (optab_table[OTI_vec_set])
#define vec_extract_optab (optab_table[OTI_vec_extract])
#define vec_init_optab (optab_table[OTI_vec_init])
+#define vec_realign_store_optab (optab_table[OTI_vec_realign_store])
+#define vec_realign_load_optab (optab_table[OTI_vec_realign_load])
/* Conversion optabs have their own table and indexes. */
enum convert_optab_index
@@ -405,6 +411,10 @@ extern enum insn_code cmpmem_optab[NUM_MACHINE_MODES];
/* Define functions given in optabs.c. */
+extern rtx expand_ternary_op (enum machine_mode mode, optab ternary_optab,
+ rtx op0, rtx op1, rtx op2, rtx target,
+ int unsignedp);
+
/* Expand a binary operation given optab and rtx operands. */
extern rtx expand_binop (enum machine_mode, optab, rtx, rtx, rtx, int,
enum optab_methods);
diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index b4b2ca36fb5..32ef01c8844 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -102,6 +102,18 @@ print_mem_expr (FILE *outfile, tree expr)
print_mem_expr (outfile, TREE_OPERAND (expr, 0));
fputs (")", outfile);
}
+ else if (TREE_CODE (expr) == ALIGN_INDIRECT_REF)
+ {
+ fputs (" (A*", outfile);
+ print_mem_expr (outfile, TREE_OPERAND (expr, 0));
+ fputs (")", outfile);
+ }
+ else if (TREE_CODE (expr) == MISALIGNED_INDIRECT_REF)
+ {
+ fputs (" (M*", outfile);
+ print_mem_expr (outfile, TREE_OPERAND (expr, 0));
+ fputs (")", outfile);
+ }
else if (TREE_CODE (expr) == RESULT_DECL)
fputs (" <result>", outfile);
else
diff --git a/gcc/target-def.h b/gcc/target-def.h
index a77c3be82b1..61adf14325a 100644
--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -273,6 +273,17 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
TARGET_SCHED_DFA_NEW_CYCLE, \
TARGET_SCHED_IS_COSTLY_DEPENDENCE}
+#ifndef TARGET_VECTORIZE_MISALIGNED_MEM_OK
+#define TARGET_VECTORIZE_MISALIGNED_MEM_OK default_vect_misaligned_mem_ok
+#endif
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE 0
+
+#define TARGET_VECTORIZE \
+ {TARGET_VECTORIZE_MISALIGNED_MEM_OK, \
+ TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \
+ TARGET_VECTORIZE_BUILTIN_MASK_FOR_STORE}
+
/* In except.c */
#define TARGET_EH_RETURN_FILTER_MODE default_eh_return_filter_mode
@@ -466,6 +477,7 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
{ \
TARGET_ASM_OUT, \
TARGET_SCHED, \
+ TARGET_VECTORIZE, \
TARGET_EH_RETURN_FILTER_MODE, \
TARGET_MERGE_DECL_ATTRIBUTES, \
TARGET_MERGE_TYPE_ATTRIBUTES, \
diff --git a/gcc/target.h b/gcc/target.h
index 6824c13562e..9f766ae18dd 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -282,6 +282,23 @@ struct gcc_target
bool (* is_costly_dependence) (rtx, rtx, rtx, int, int);
} sched;
+ /* Functions relating to vectorization. */
+ struct vectorize
+ {
+ /* The following member value is a pointer to a function called
+ by the vectorizer, and when expanding a MISALIGNED_INDIREC_REF
+ expression. If the hook returns true (false) then a move* pattern
+ to/from memory can (cannot) be generated for this mode even if the
+ memory location is unaligned. */
+ bool (* misaligned_mem_ok) (enum machine_mode);
+
+ /* The following member values are pointers to functions called
+ by the vectorizer, and return the decl of the target builtin
+ function. */
+ tree (* builtin_mask_for_load) (void);
+ tree (* builtin_mask_for_store) (void);
+ } vectorize;
+
/* Return machine mode for filter value. */
enum machine_mode (* eh_return_filter_mode) (void);
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index c7170752444..48690a130cf 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -269,6 +269,12 @@ default_scalar_mode_supported_p (enum machine_mode mode)
}
bool
+default_vect_misaligned_mem_ok (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+ return !STRICT_ALIGNMENT;
+}
+
+bool
hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false (
CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 4ff0ea36af8..024a0808e0b 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -47,6 +47,8 @@ extern void default_unwind_emit (FILE *, rtx);
extern bool default_scalar_mode_supported_p (enum machine_mode);
+extern bool default_vect_misaligned_mem_ok (enum machine_mode);
+
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index fbd76080aec..536ccf5cef8 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,38 @@
+2004-09-23 Dorit Naishlos <dorit@il.ibm.com>
+
+ * gcc.dg/vect/vect-27.c: Now vectorized on altivec.
+ * gcc.dg/vect/vect-29.c: Now vectorized on altivec.
+ * gcc.dg/vect/vect-48.c: Now vectorized on altivec.
+ * gcc.dg/vect/vect-56.c: Now vectorized on altivec.
+ * gcc.dg/vect/vect-72.c: New test for altivec and sse2.
+ * gcc.dg/vect/vect-77.c: Now vectorized on altivec.
+
+ * gcc.dg/vect/vect-27a.c: New test for altivec and mmx.
+ * gcc.dg/vect/vect-29a.c: New test for altivec and mmx.
+ * gcc.dg/vect/vect-48a.c: New test for altivec and mmx.
+ * gcc.dg/vect/vect-56a.c: New test for altivec and mmx.
+ * gcc.dg/vect/vect-72a.c: New test for altivec and mmx.
+ * gcc.dg/vect/vect-77a.c: New test for altivec and mmx.
+
+ * gcc.dg/vect/vect-13.c: Change to run test instead of compile.
+
+ * gcc.dg/vect/vect-44.c: Check additional cases.
+ * gcc.dg/vect/vect-48.c: Check additional cases.
+
+ * gcc.dg/vect/vect-26.c: Use sse2 instead of sse.
+ * gcc.dg/vect/vect-27.c: Use sse2 instead of sse.
+ * gcc.dg/vect/vect-28.c: Use sse2 instead of sse.
+ * gcc.dg/vect/vect-29.c: Use sse2 instead of sse.
+ * gcc.dg/vect/vect-4?.c: Use sse2 instead of sse.
+ * gcc.dg/vect/vect-75.c: Use sse2 instead of sse.
+ * gcc.dg/vect/vect-76.c: Use sse2 instead of sse.
+ * gcc.dg/vect/vect-77.c: Use sse2 instead of sse.
+ * gcc.dg/vect/vect-78.c: Use sse2 instead of sse.
+
+ * gcc.dg/vect/vect-5?.c: Use sse2 instead of sse. Add return 0.
+ * gcc.dg/vect/vect-60.c: Use sse2 instead of sse. Add return 0.
+ * gcc.dg/vect/vect-61.c: Use sse2 instead of sse. Add return 0.
+
2004-09-23 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
* gcc.dg/tree-ssa/loop-6.c: New test.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-13.c b/gcc/testsuite/gcc.dg/vect/vect-13.c
index 21385b40ef8..052abe12d18 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-13.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-13.c
@@ -1,25 +1,41 @@
-/* { dg-do compile { target powerpc*-*-* i?86-*-* x86_64-*-* } } */
+/* { dg-do run { target powerpc*-*-* i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
+#include <stdarg.h>
+#include "tree-vect.h"
#define N 16
int a[N];
int results[N] = {0,1,2,3,0,0,0,0,0,0,0,0,12,13,14,15};
-int main ()
+int main1()
{
int i;
int b[N] = {0,1,2,3,-4,-5,-6,-7,-8,-9,-10,-11,12,13,14,15};
- /* Not vectorizable yet (condition in loop). */
+ /* Max pattern. */
for (i = 0; i < N; i++)
{
a[i] = (b[i] >= 0 ? b[i] : 0);
}
+ /* Check results */
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != results[i])
+ abort ();
+ }
+
return 0;
}
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-26.c b/gcc/testsuite/gcc.dg/vect/vect-26.c
index bfeb76ece69..fd21beddb78 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-26.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-26.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-27.c b/gcc/testsuite/gcc.dg/vect/vect-27.c
index 35e8f418f5f..d2e8c944559 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-27.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-27.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -43,5 +43,5 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-27a.c b/gcc/testsuite/gcc.dg/vect/vect-27a.c
new file mode 100644
index 00000000000..9dd75498676
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-27a.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+/* unaligned load. */
+
+int main1 ()
+{
+ int i;
+ int ia[N];
+ int ib[N+1];
+
+ for (i=0; i < N; i++)
+ {
+ ib[i] = i;
+ }
+
+ for (i = 1; i <= N; i++)
+ {
+ ia[i-1] = ib[i];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i-1] != ib[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-28.c b/gcc/testsuite/gcc.dg/vect/vect-28.c
index b10cf136e17..cb8d7cb0c76 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-28.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-28.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-29.c b/gcc/testsuite/gcc.dg/vect/vect-29.c
index 80754f5fea5..c0383c7c8a6 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-29.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-29.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -46,5 +46,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-29a.c b/gcc/testsuite/gcc.dg/vect/vect-29a.c
new file mode 100644
index 00000000000..13cd5c9d3f2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-29a.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+#define OFF 3
+
+/* unaligned load. */
+
+int main1 (int off)
+{
+ int i;
+ int ia[N];
+ int ib[N+OFF];
+
+ for (i = 0; i < N+OFF; i++)
+ {
+ ib[i] = i;
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = ib[i+off];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i] != ib[i+off])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (0); /* aligned */
+ main1 (OFF); /* unaligned */
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-40.c b/gcc/testsuite/gcc.dg/vect/vect-40.c
index e3dd2778adb..2507575ba13 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-40.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-40.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-41.c b/gcc/testsuite/gcc.dg/vect/vect-41.c
index 0f9cae4ee8f..f54258fab3c 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-41.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-41.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-42.c b/gcc/testsuite/gcc.dg/vect/vect-42.c
index 7f79d42de49..9d40a37113a 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-42.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-42.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-43.c b/gcc/testsuite/gcc.dg/vect/vect-43.c
index 370b5dd8bb7..eedef268bd5 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-43.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-43.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-44.c b/gcc/testsuite/gcc.dg/vect/vect-44.c
index 4068ab1d577..8aab2fe07e1 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-44.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-44.c
@@ -1,13 +1,14 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 256
+typedef float afloat __attribute__ ((__aligned__(16)));
void bar (float *pa, float *pb, float *pc)
{
@@ -42,13 +43,17 @@ main1 (float * __restrict__ pa, float * __restrict__ pb, float * __restrict__ pc
int main (void)
{
int i;
- float a[N];
- float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
- float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
+ afloat a[N+4];
+ afloat b[N+4] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60,63,66,69};
+ afloat c[N+4] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23};
check_vect ();
main1 (a,b,c);
+ main1 (&a[1],b,c);
+ main1 (a,&b[1],c);
+ main1 (&a[1],&b[1],&c[1]);
+
return 0;
}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-45.c b/gcc/testsuite/gcc.dg/vect/vect-45.c
index c0b0029e12f..bc5d5c843cb 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-45.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-45.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-46.c b/gcc/testsuite/gcc.dg/vect/vect-46.c
index 1fb08a7eab5..7710c039d50 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-46.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-46.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-47.c b/gcc/testsuite/gcc.dg/vect/vect-47.c
index e1e1d38e6b6..ccbcccf880d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-47.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-47.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-48.c b/gcc/testsuite/gcc.dg/vect/vect-48.c
index c7485be3a0b..3dfdb957937 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-48.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-48.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -43,14 +43,16 @@ main1 (afloat * __restrict__ pa, float * __restrict__ pb, float * __restrict__ p
int main (void)
{
int i;
- float a[N];
- float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
- float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
+ afloat a[N];
+ afloat b[N+1] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60};
+ afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
check_vect ();
main1 (a,b,c);
+ main1 (a,&b[1],c);
+
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-48a.c b/gcc/testsuite/gcc.dg/vect/vect-48a.c
new file mode 100644
index 00000000000..0422edcbf6b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-48a.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 256
+
+typedef short ashort __attribute__ ((__aligned__(16)));
+
+void bar (short *pa, short *pb, short *pc)
+{
+ int i;
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (pa[i] != (pb[i] + pc[i]))
+ abort ();
+ }
+
+ return;
+}
+
+
+int
+main1 (ashort * __restrict__ pa, short * __restrict__ pb, short * __restrict__ pc)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ pa[i] = pb[i] + pc[i];
+ }
+
+ bar (pa,pb,pc);
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+ ashort a[N];
+ ashort b[N+1] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60};
+ ashort c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
+
+ check_vect ();
+
+ main1 (a,b,c);
+ main1 (a,&b[1],c);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-49.c b/gcc/testsuite/gcc.dg/vect/vect-49.c
index 8d8b212a66f..6a2669956a7 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-49.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-49.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-50.c b/gcc/testsuite/gcc.dg/vect/vect-50.c
index cd7cf5d4996..3b952a3a6c1 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-50.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-50.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -49,6 +49,7 @@ int main (void)
check_vect ();
main1 (N,a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-51.c b/gcc/testsuite/gcc.dg/vect/vect-51.c
index 65c0f402754..7804a4c8e8f 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-51.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-51.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -49,6 +49,7 @@ int main (void)
check_vect ();
main1 (N,a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-52.c b/gcc/testsuite/gcc.dg/vect/vect-52.c
index 32a584b2a0b..60d9b796296 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-52.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-52.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -51,6 +51,7 @@ int main (void)
main1 (N,a,&b[1],c);
main1 (N,a,&b[1],&c[1]);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-53.c b/gcc/testsuite/gcc.dg/vect/vect-53.c
index 4b99304caf3..e8f17472be0 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-53.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-53.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -51,6 +51,7 @@ int main (void)
main1 (N,a,&b[1],c);
main1 (N,a,&b[1],&c[1]);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c
index 5ab9e026f42..172feae8d67 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-54.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-54.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -50,6 +50,7 @@ int main (void)
check_vect ();
main1 (a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-55.c b/gcc/testsuite/gcc.dg/vect/vect-55.c
index 2257a2312f3..e5c4ba2dbc2 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-55.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-55.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -50,6 +50,7 @@ int main (void)
check_vect ();
main1 (a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-56.c b/gcc/testsuite/gcc.dg/vect/vect-56.c
index 0e20137bebd..9170e49cd2f 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-56.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-56.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -50,6 +50,7 @@ int main (void)
check_vect ();
main1 (a,b,c);
+ return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-56a.c b/gcc/testsuite/gcc.dg/vect/vect-56a.c
new file mode 100644
index 00000000000..5d8ed115f18
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-56a.c
@@ -0,0 +1,56 @@
+/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 256
+
+typedef short ashort __attribute__ ((__aligned__(16)));
+
+void bar (ashort *pa, ashort *pb, ashort *pc)
+{
+ int i;
+
+ /* check results: */
+ for (i = 0; i < N/2; i++)
+ {
+ if (pa[i] != (pb[i+1] + pc[i+1]))
+ abort ();
+ }
+
+ return;
+}
+
+
+int
+main1 (ashort * __restrict__ pa, ashort * __restrict__ pb, ashort * __restrict__ pc)
+{
+ int i;
+
+ for (i = 0; i < N/2; i++)
+ {
+ pa[i] = pb[i+1] + pc[i+1];
+ }
+
+ bar (pa,pb,pc);
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+ ashort a[N];
+ ashort b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
+ ashort c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
+
+ check_vect ();
+
+ main1 (a,b,c);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-57.c b/gcc/testsuite/gcc.dg/vect/vect-57.c
index 0e675d7755c..ce2e74089a4 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-57.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-57.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -50,6 +50,7 @@ int main (void)
check_vect ();
main1 (a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c
index 57c27795f1c..c080c909d1b 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-58.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-58.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -51,6 +51,7 @@ int main (void)
check_vect ();
main1 (n,a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-59.c b/gcc/testsuite/gcc.dg/vect/vect-59.c
index 3dfbe19e988..4bdd7b401a0 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-59.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-59.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -51,6 +51,7 @@ int main (void)
check_vect ();
main1 (n,a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c
index 276b7e222fb..e19d36e8fef 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-60.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-60.c
@@ -1,7 +1,8 @@
+
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -51,6 +52,7 @@ int main (void)
check_vect ();
main1 (n,a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-61.c b/gcc/testsuite/gcc.dg/vect/vect-61.c
index 257ef388fc5..6df22a61c40 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-61.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-61.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -51,6 +51,7 @@ int main (void)
check_vect ();
main1 (n,a,b,c);
+ return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-72.c b/gcc/testsuite/gcc.dg/vect/vect-72.c
new file mode 100644
index 00000000000..1a2ad070963
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-72.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+/* unaligned load. */
+
+int main1 ()
+{
+ int i;
+ char ia[N];
+ char ib[N+1];
+
+ for (i=0; i < N+1; i++)
+ {
+ ib[i] = i;
+ }
+
+ for (i = 1; i < N+1; i++)
+ {
+ ia[i-1] = ib[i];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i-1] != ib[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-72a.c b/gcc/testsuite/gcc.dg/vect/vect-72a.c
new file mode 100644
index 00000000000..71fda70badc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-72a.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+/* unaligned load. */
+
+int main1 ()
+{
+ int i;
+ char ia[N];
+ char ib[N+1];
+
+ for (i=0; i < N+1; i++)
+ {
+ ib[i] = i;
+ }
+
+ for (i = 1; i < N+1; i++)
+ {
+ ia[i-1] = ib[i];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i-1] != ib[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-75.c b/gcc/testsuite/gcc.dg/vect/vect-75.c
index 5fcb3abad86..f5fee582d2d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-75.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-75.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-76.c b/gcc/testsuite/gcc.dg/vect/vect-76.c
index 11b87e325a8..17d6ff7b52f 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-76.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-76.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-77.c b/gcc/testsuite/gcc.dg/vect/vect-77.c
index c5dacc5a881..9f5697d6035 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-77.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-77.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
@@ -43,5 +43,5 @@ int main (void)
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-77a.c b/gcc/testsuite/gcc.dg/vect/vect-77a.c
new file mode 100644
index 00000000000..afa0c494d3b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-77a.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -mmmx" { target i?86-*-* x86_64-*-* } } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+#define OFF 8
+
+typedef int aint __attribute__ ((__aligned__(16)));
+
+aint ib[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10, 14, 22, 26, 34};
+
+int main1 (aint *ib, int off)
+{
+ int i;
+ int ia[N];
+
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = ib[i+off];
+ }
+
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i] != ib[i+off])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (ib, 8);
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-78.c b/gcc/testsuite/gcc.dg/vect/vect-78.c
index 6a4eb8c4dc7..75ad3c29843 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-78.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-78.c
@@ -1,7 +1,7 @@
/* { dg-do run { target powerpc*-*-* } } */
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -maltivec" { target powerpc*-*-* } } */
-/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse" { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-stats -msse2" { target i?86-*-* x86_64-*-* } } */
#include <stdarg.h>
#include "tree-vect.h"
diff --git a/gcc/tree-dump.c b/gcc/tree-dump.c
index d8d370fc304..fed3bfc4a09 100644
--- a/gcc/tree-dump.c
+++ b/gcc/tree-dump.c
@@ -527,6 +527,8 @@ dequeue_and_dump (dump_info_p di)
case TRUTH_NOT_EXPR:
case ADDR_EXPR:
case INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
case CLEANUP_POINT_EXPR:
case SAVE_EXPR:
case REALPART_EXPR:
diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c
index b7bc7c3221b..b7ed2edc387 100644
--- a/gcc/tree-eh.c
+++ b/gcc/tree-eh.c
@@ -1767,6 +1767,8 @@ tree_could_trap_p (tree expr)
return !in_array_bounds_p (expr);
case INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
return !TREE_THIS_NOTRAP (expr);
case ASM_EXPR:
diff --git a/gcc/tree-gimple.c b/gcc/tree-gimple.c
index d9fe0205b3c..fe707d5fc54 100644
--- a/gcc/tree-gimple.c
+++ b/gcc/tree-gimple.c
@@ -323,7 +323,9 @@ is_gimple_addressable (tree t)
return (is_gimple_id (t) || handled_component_p (t)
|| TREE_CODE (t) == REALPART_EXPR
|| TREE_CODE (t) == IMAGPART_EXPR
- || TREE_CODE (t) == INDIRECT_REF);
+ || TREE_CODE (t) == INDIRECT_REF
+ || TREE_CODE (t) == ALIGN_INDIRECT_REF
+ || TREE_CODE (t) == MISALIGNED_INDIRECT_REF);
}
/* Return true if T is function invariant. Or rather a restricted
@@ -563,6 +565,8 @@ get_base_address (tree t)
if (SSA_VAR_P (t)
|| TREE_CODE (t) == STRING_CST
|| TREE_CODE (t) == CONSTRUCTOR
+ || TREE_CODE (t) == MISALIGNED_INDIRECT_REF
+ || TREE_CODE (t) == ALIGN_INDIRECT_REF
|| TREE_CODE (t) == INDIRECT_REF)
return t;
else
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 71196fbe783..0bb2072f714 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1018,6 +1018,8 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
case ADDR_EXPR:
case PREDECREMENT_EXPR:
case PREINCREMENT_EXPR:
+ case ALIGN_INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
case INDIRECT_REF:
if (TREE_CODE (node) == ADDR_EXPR
&& (TREE_CODE (TREE_OPERAND (node, 0)) == STRING_CST
@@ -1034,6 +1036,13 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
}
else
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+
+ if (TREE_CODE (node) == MISALIGNED_INDIRECT_REF)
+ {
+ pp_string (buffer, "{misalignment: ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_character (buffer, '}');
+ }
break;
case POSTDECREMENT_EXPR:
@@ -1451,6 +1460,16 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
is_stmt = false;
break;
+ case REALIGN_LOAD_EXPR:
+ pp_string (buffer, "REALIGN_LOAD <");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
+ pp_string (buffer, ">");
+ break;
+
default:
NIY;
}
@@ -1710,6 +1729,8 @@ op_prio (tree op)
case PREINCREMENT_EXPR:
case PREDECREMENT_EXPR:
case NEGATE_EXPR:
+ case ALIGN_INDIRECT_REF:
+ case MISALIGNED_INDIRECT_REF:
case INDIRECT_REF:
case ADDR_EXPR:
case FLOAT_EXPR:
@@ -1838,6 +1859,12 @@ op_symbol (tree op)
case INDIRECT_REF:
return "*";
+ case ALIGN_INDIRECT_REF:
+ return "A*";
+
+ case MISALIGNED_INDIRECT_REF:
+ return "M*";
+
case TRUNC_DIV_EXPR:
case RDIV_EXPR:
return "/";
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index 3d99d94e92e..b0cdddce968 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -495,14 +495,16 @@ collect_points_to_info_for (struct alias_info *ai, tree ptr)
/* Helper for ptr_is_dereferenced_by. Called by walk_tree to look for
- INDIRECT_REF nodes for the pointer passed in DATA. */
+ (ALIGN/MISALIGNED_)INDIRECT_REF nodes for the pointer passed in DATA. */
static tree
find_ptr_dereference (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data)
{
tree ptr = (tree) data;
- if (TREE_CODE (*tp) == INDIRECT_REF
+ if ((TREE_CODE (*tp) == INDIRECT_REF
+ || TREE_CODE (*tp) == ALIGN_INDIRECT_REF
+ || TREE_CODE (*tp) == MISALIGNED_INDIRECT_REF)
&& TREE_OPERAND (*tp, 0) == ptr)
return *tp;
@@ -510,8 +512,9 @@ find_ptr_dereference (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data)
}
-/* Return true if STMT contains INDIRECT_REF <PTR>. *IS_STORE is set
- to 'true' if the dereference is on the LHS of an assignment. */
+/* Return true if STMT contains (ALIGN/MISALIGNED_)INDIRECT_REF <PTR>.
+ *IS_STORE is set to 'true' if the dereference is on the LHS of an
+ assignment. */
static bool
ptr_is_dereferenced_by (tree ptr, tree stmt, bool *is_store)
diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
index 80357dcfc27..937a301f523 100644
--- a/gcc/tree-ssa-dce.c
+++ b/gcc/tree-ssa-dce.c
@@ -418,7 +418,9 @@ mark_stmt_if_obviously_necessary (tree stmt, bool aggressive)
if (is_global_var (lhs))
mark_stmt_necessary (stmt, true);
}
- else if (TREE_CODE (lhs) == INDIRECT_REF)
+ else if (TREE_CODE (lhs) == INDIRECT_REF
+ || TREE_CODE (lhs) == ALIGN_INDIRECT_REF
+ || TREE_CODE (lhs) == MISALIGNED_INDIRECT_REF)
{
tree ptr = TREE_OPERAND (lhs, 0);
struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr);
diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index 057e72af6da..aed07e25f3b 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -2414,7 +2414,9 @@ record_equivalences_from_stmt (tree stmt,
t = TREE_OPERAND (t, 0);
/* Now see if this is a pointer dereference. */
- if (TREE_CODE (t) == INDIRECT_REF)
+ if (TREE_CODE (t) == INDIRECT_REF
+ || TREE_CODE (t) == ALIGN_INDIRECT_REF
+ || TREE_CODE (t) == MISALIGNED_INDIRECT_REF)
{
tree op = TREE_OPERAND (t, 0);
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index de8649173d0..4aafc815b84 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -137,6 +137,8 @@ for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
case SSA_NAME:
return cbck (*addr_p, addr_p, data);
+ case MISALIGNED_INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
case INDIRECT_REF:
nxt = &TREE_OPERAND (*addr_p, 0);
return cbck (*addr_p, nxt, data);
@@ -1101,7 +1103,9 @@ is_call_clobbered_ref (tree ref)
if (DECL_P (base))
return is_call_clobbered (base);
- if (TREE_CODE (base) == INDIRECT_REF)
+ if (TREE_CODE (base) == INDIRECT_REF
+ || TREE_CODE (base) == ALIGN_INDIRECT_REF
+ || TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
{
/* Check whether the alias tags associated with the pointer
are call clobbered. */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 078e8a36cf9..08a962d0932 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -1269,7 +1269,9 @@ find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
|| zero_p (step))
goto fail;
- if (TREE_CODE (base) == INDIRECT_REF)
+ if (TREE_CODE (base) == INDIRECT_REF
+ || TREE_CODE (base) == ALIGN_INDIRECT_REF
+ || TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
base = TREE_OPERAND (base, 0);
else
base = build_addr (base);
@@ -1699,7 +1701,9 @@ add_address_candidates (struct ivopts_data *data,
if (base != TREE_OPERAND (iv->base, 0))
{
- if (TREE_CODE (base) == INDIRECT_REF)
+ if (TREE_CODE (base) == INDIRECT_REF
+ || TREE_CODE (base) == ALIGN_INDIRECT_REF
+ || TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
base = TREE_OPERAND (base, 0);
else
base = build_addr (base);
@@ -3826,13 +3830,16 @@ unshare_and_remove_ssa_names (tree ref)
static void
rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
{
- tree var = get_base_address (*op), new_var, new_name, copy, name;
+ tree bvar, var, new_var, new_name, copy, name;
tree orig;
+ var = bvar = get_base_address (*op);
+
if (!var || TREE_CODE (with) != SSA_NAME)
goto do_rewrite;
-
- if (TREE_CODE (var) == INDIRECT_REF)
+ if (TREE_CODE (var) == INDIRECT_REF
+ || TREE_CODE (var) == ALIGN_INDIRECT_REF
+ || TREE_CODE (var) == MISALIGNED_INDIRECT_REF)
var = TREE_OPERAND (var, 0);
if (TREE_CODE (var) == SSA_NAME)
{
@@ -3869,12 +3876,20 @@ rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
do_rewrite:
orig = NULL_TREE;
- if (TREE_CODE (*op) == INDIRECT_REF)
+ if (TREE_CODE (*op) == INDIRECT_REF
+ || TREE_CODE (*op) == ALIGN_INDIRECT_REF
+ || TREE_CODE (*op) == MISALIGNED_INDIRECT_REF)
orig = REF_ORIGINAL (*op);
if (!orig)
orig = unshare_and_remove_ssa_names (*op);
- *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
+ if (TREE_CODE (bvar) == ALIGN_INDIRECT_REF)
+ *op = build1 (ALIGN_INDIRECT_REF, TREE_TYPE (*op), with);
+ else if (TREE_CODE (bvar) == MISALIGNED_INDIRECT_REF)
+ *op = build2 (MISALIGNED_INDIRECT_REF, TREE_TYPE (*op), with, TREE_OPERAND (*op, 1));
+ else
+ *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
+
/* Record the original reference, for purposes of alias analysis. */
REF_ORIGINAL (*op) = orig;
}
diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index 8c0516998bc..55e6f8a1446 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -1009,6 +1009,11 @@ get_expr_operands (tree stmt, tree *expr_p, int flags)
add_stmt_operand (expr_p, stmt, flags);
return;
+ case MISALIGNED_INDIRECT_REF:
+ get_expr_operands (stmt, &TREE_OPERAND (expr, 1), flags);
+ /* fall through */
+
+ case ALIGN_INDIRECT_REF:
case INDIRECT_REF:
get_indirect_ref_operands (stmt, expr, flags);
return;
@@ -1162,6 +1167,14 @@ get_expr_operands (tree stmt, tree *expr_p, int flags)
return;
}
+ case REALIGN_LOAD_EXPR:
+ {
+ get_expr_operands (stmt, &TREE_OPERAND (expr, 0), flags);
+ get_expr_operands (stmt, &TREE_OPERAND (expr, 1), flags);
+ get_expr_operands (stmt, &TREE_OPERAND (expr, 2), flags);
+ return;
+ }
+
case BLOCK:
case FUNCTION_DECL:
case EXC_PTR_EXPR:
@@ -1274,7 +1287,8 @@ get_asm_expr_operands (tree stmt)
}
}
-/* A subroutine of get_expr_operands to handle INDIRECT_REF. */
+/* A subroutine of get_expr_operands to handle INDIRECT_REF,
+ ALIGN_INDIRECT_REF and MISALIGNED_INDIRECT_REF. */
static void
get_indirect_ref_operands (tree stmt, tree expr, int flags)
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 33f68ad56d4..b158278d0fd 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -179,7 +179,8 @@ static bool vect_compute_data_ref_alignment
static bool vect_analyze_data_ref_access (struct data_reference *);
static bool vect_get_first_index (tree, tree *);
static bool vect_can_force_dr_alignment_p (tree, unsigned int);
-static struct data_reference * vect_analyze_pointer_ref_access (tree, tree, bool);
+static struct data_reference * vect_analyze_pointer_ref_access
+ (tree, tree, bool);
static tree vect_get_base_and_bit_offset
(struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
static struct data_reference * vect_analyze_pointer_ref_access
@@ -193,9 +194,11 @@ static tree vect_get_symbl_and_dr
/* Utility functions for the code transformation. */
static tree vect_create_destination_var (tree, tree);
-static tree vect_create_data_ref (tree, block_stmt_iterator *);
-static tree vect_create_index_for_vector_ref (struct loop *, block_stmt_iterator *);
-static tree vect_create_addr_base_for_vector_ref (tree, tree *);
+static tree vect_create_data_ref_ptr
+ (tree, block_stmt_iterator *, tree, tree *, bool);
+static tree vect_create_index_for_vector_ref
+ (struct loop *, block_stmt_iterator *);
+static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
static tree get_vectype_for_scalar_type (tree);
static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
static tree vect_get_vec_def_for_operand (tree, tree);
@@ -702,8 +705,8 @@ vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
Input:
STMT: The statement containing the data reference.
- NEW_STMT_LIST: Must be initialized to NULL_TREE or a
- statement list.
+ NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
+ OFFSET: Optional. If supplied, it is be added to the initial address.
Output:
1. Return an SSA_NAME whose value is the address of the memory location of the
@@ -715,7 +718,8 @@ vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
static tree
vect_create_addr_base_for_vector_ref (tree stmt,
- tree *new_stmt_list)
+ tree *new_stmt_list,
+ tree offset)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct loop *loop = STMT_VINFO_LOOP (stmt_info);
@@ -776,7 +780,8 @@ vect_create_addr_base_for_vector_ref (tree stmt,
dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
add_referenced_tmp_var (dest);
- data_ref_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
+ data_ref_base =
+ force_gimple_operand (data_ref_base, &new_stmt, false, dest);
append_to_statement_list_force (new_stmt, new_stmt_list);
vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
@@ -794,6 +799,17 @@ vect_create_addr_base_for_vector_ref (tree stmt,
init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
append_to_statement_list_force (new_stmt, new_stmt_list);
+ if (offset)
+ {
+ tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
+ add_referenced_tmp_var (tmp);
+ vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
+ vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
+ init_val = make_ssa_name (tmp, vec_stmt);
+ TREE_OPERAND (vec_stmt, 0) = init_val;
+ append_to_statement_list_force (vec_stmt, new_stmt_list);
+ }
+
array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
NULL_TREE, NULL_TREE);
addr_base = build_fold_addr_expr (array_ref);
@@ -806,6 +822,7 @@ vect_create_addr_base_for_vector_ref (tree stmt,
new_temp = make_ssa_name (addr_expr, vec_stmt);
TREE_OPERAND (vec_stmt, 0) = new_temp;
append_to_statement_list_force (vec_stmt, new_stmt_list);
+
return new_temp;
}
@@ -856,31 +873,56 @@ vect_align_data_ref (tree stmt)
}
-/* Function vect_create_data_ref.
+/* Function vect_create_data_ref_ptr.
Create a memory reference expression for vector access, to be used in a
- vector load/store stmt.
+ vector load/store stmt. The reference is based on a new pointer to vector
+ type (vp).
Input:
- STMT: a stmt that references memory. expected to be of the form
- MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
- BSI: block_stmt_iterator where new stmts can be added.
+ 1. STMT: a stmt that references memory. Expected to be of the form
+ MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
+ 2. BSI: block_stmt_iterator where new stmts can be added.
+ 3. OFFSET (optional): an offset to be added to the initial address accessed
+ by the data-ref in STMT.
+ 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
+ pointing to the initial address.
Output:
- 1. Declare a new ptr to vector_type, and have it point to the array base.
- For example, for vector of type V8HI:
- v8hi *p0;
- p0 = (v8hi *)&a;
- 2. Create a data-reference based on the new vector pointer p0, and using
- a new index variable 'idx'. Return the expression '(*p0)[idx]'.
+ 1. Declare a new ptr to vector_type, and have it point to the base of the
+ data reference (initial addressed accessed by the data reference).
+ For example, for vector of type V8HI, the following code is generated:
+
+ v8hi *vp;
+ vp = (v8hi *)initial_address;
+
+ if OFFSET is not supplied:
+ initial_address = &a[init];
+ if OFFSET is supplied:
+ initial_address = &a[init + OFFSET];
+
+ Return the initial_address in INITIAL_ADDRESS.
+
+ 2. Create a data-reference in the loop based on the new vector pointer vp,
+ and using a new index variable 'idx' as follows:
+
+ vp' = vp + update
+
+ where if ONLY_INIT is true:
+ update = zero
+ and otherwise
+ update = idx + vector_type_size
+
+ Return the pointer vp'.
+
FORNOW: handle only aligned and consecutive accesses. */
static tree
-vect_create_data_ref (tree stmt, block_stmt_iterator *bsi)
+vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
+ tree *initial_address, bool only_init)
{
- tree base_name, data_ref_base, data_ref_base_type;
- tree array_type;
+ tree base_name;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
struct loop *loop = STMT_VINFO_LOOP (stmt_info);
@@ -897,55 +939,44 @@ vect_create_data_ref (tree stmt, block_stmt_iterator *bsi)
tree vec_stmt;
tree new_stmt_list = NULL_TREE;
tree idx;
- tree new_base;
- tree data_ref;
- edge pe;
+ edge pe = loop_preheader_edge (loop);
basic_block new_bb;
-
- /* FORNOW: make sure the data reference is aligned. */
- vect_align_data_ref (stmt);
+ tree vect_ptr_init;
+ tree vectype_size;
+ tree ptr_update;
+ tree data_ref_ptr;
base_name = unshare_expr (DR_BASE_NAME (dr));
- data_ref_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
- data_ref_base_type = TREE_TYPE (data_ref_base);
-
- array_type = build_array_type (vectype, 0);
- TYPE_ALIGN (array_type) = TYPE_ALIGN (data_ref_base_type);
- vect_ptr_type = build_pointer_type (array_type);
-
if (vect_debug_details (NULL))
{
+ tree data_ref_base = base_name;
fprintf (dump_file, "create array_ref of type: ");
print_generic_expr (dump_file, vectype, TDF_SLIM);
- }
-
- /* Create: vectype *p; */
- vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
- get_name (base_name));
- add_referenced_tmp_var (vect_ptr);
-
- if (vect_debug_details (NULL))
- {
if (TREE_CODE (data_ref_base) == VAR_DECL)
- fprintf (dump_file, "vectorizing a one dimensional array ref: ");
+ fprintf (dump_file, "vectorizing a one dimensional array ref: ");
else if (TREE_CODE (data_ref_base) == ARRAY_REF)
- fprintf (dump_file, "vectorizing a multidimensional array ref: ");
+ fprintf (dump_file, "vectorizing a multidimensional array ref: ");
else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
- fprintf (dump_file, "vectorizing a record based array ref: ");
+ fprintf (dump_file, "vectorizing a record based array ref: ");
else if (TREE_CODE (data_ref_base) == SSA_NAME)
- fprintf (dump_file, "vectorizing a pointer ref: ");
- else if (TREE_CODE (data_ref_base) == ADDR_EXPR
- || TREE_CODE (data_ref_base) == PLUS_EXPR
- || TREE_CODE (data_ref_base) == MINUS_EXPR)
- fprintf (dump_file, "vectorizing an address expr: ");
+ fprintf (dump_file, "vectorizing a pointer ref: ");
print_generic_expr (dump_file, base_name, TDF_SLIM);
}
- /* Handle aliasing: */
+ /** (1) Create the new vector-pointer variable: **/
+
+ vect_ptr_type = build_pointer_type (vectype);
+ vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
+ get_name (base_name));
+ add_referenced_tmp_var (vect_ptr);
+
+
+ /** (2) Handle aliasing information of the new vector-pointer: **/
+
tag = STMT_VINFO_MEMTAG (stmt_info);
gcc_assert (tag);
get_var_ann (vect_ptr)->type_mem_tag = tag;
-
+
/* Mark for renaming all aliased variables
(i.e, the may-aliases of the type-mem-tag). */
nvuses = NUM_VUSES (vuses);
@@ -969,36 +1000,56 @@ vect_create_data_ref (tree stmt, block_stmt_iterator *bsi)
if (TREE_CODE (def) == SSA_NAME)
bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
}
-
- pe = loop_preheader_edge (loop);
- /* Create: (&(base[init_val]) */
- new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list);
- pe = loop_preheader_edge (loop);
- new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
- gcc_assert (!new_bb);
+ /** (3) Calculate the initial address the vector-pointer, and set
+ the vector-pointer to point to it before the loop: **/
+
+ /* Create: (&(base[init_val+offset]) in the loop preheader. */
+ new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
+ offset);
+ pe = loop_preheader_edge (loop);
+ new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
+ gcc_assert (!new_bb);
+ *initial_address = new_temp;
- /* p = (vectype_array *) addr_base */
+ /* Create: p = (vectype *) initial_base */
vec_stmt = fold_convert (vect_ptr_type, new_temp);
vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
new_temp = make_ssa_name (vect_ptr, vec_stmt);
TREE_OPERAND (vec_stmt, 0) = new_temp;
- new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
- gcc_assert (!new_bb);
+ new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
+ gcc_assert (!new_bb);
+ vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
+
+
+ /** (4) Handle the updating of the vector-pointer inside the loop: **/
+
+ if (only_init) /* No update in loop is required. */
+ return vect_ptr_init;
- /*** create data ref: '(*p)[idx]' ***/
idx = vect_create_index_for_vector_ref (loop, bsi);
- new_base = build_fold_indirect_ref (new_temp);
- data_ref = build4 (ARRAY_REF, vectype, new_base, idx, NULL_TREE, NULL_TREE);
- if (vect_debug_details (NULL))
- {
- fprintf (dump_file, "created new data-ref: ");
- print_generic_expr (dump_file, data_ref, TDF_SLIM);
- }
+ /* Create: update = idx * vectype_size */
+ ptr_update = create_tmp_var (integer_type_node, "update");
+ add_referenced_tmp_var (ptr_update);
+ vectype_size = build_int_cst (integer_type_node,
+ GET_MODE_SIZE (TYPE_MODE (vectype)));
+ vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
+ vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
+ new_temp = make_ssa_name (ptr_update, vec_stmt);
+ TREE_OPERAND (vec_stmt, 0) = new_temp;
+ bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
- return data_ref;
+ /* Create: data_ref_ptr = vect_ptr_init + update */
+ vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
+ vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
+ new_temp = make_ssa_name (vect_ptr, vec_stmt);
+ TREE_OPERAND (vec_stmt, 0) = new_temp;
+ bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+ data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
+
+ return data_ref_ptr;
}
@@ -1220,8 +1271,8 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
/* Make sure bsi points to the stmt that is being vectorized. */
- /* Assumption: any stmts created for the vectorization of smtmt S are
- inserted before S. BSI may point to S or some new stmt before it. */
+ /* Assumption: any stmts created for the vectorization of stmt S were
+ inserted before S. BSI is expected to point to S or some new stmt before S. */
while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
bsi_next (bsi);
@@ -1424,6 +1475,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
struct loop *loop = STMT_VINFO_LOOP (stmt_info);
enum machine_mode vec_mode;
+ tree dummy;
/* Is vectorizable store? */
@@ -1452,6 +1504,9 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
+ if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
+ return false;
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
@@ -1467,7 +1522,10 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
/* Handle def. */
- data_ref = vect_create_data_ref (stmt, bsi);
+ /* FORNOW: make sure the data reference is aligned. */
+ vect_align_data_ref (stmt);
+ data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
+ data_ref = build_fold_indirect_ref (data_ref);
/* Arguments are ready. create the new vector stmt. */
*vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
@@ -1493,9 +1551,17 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
tree data_ref = NULL;
tree op;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree new_temp;
- enum machine_mode vec_mode;
+ int mode;
+ tree init_addr;
+ tree new_stmt;
+ tree dummy;
+ basic_block new_bb;
+ struct loop *loop = STMT_VINFO_LOOP (stmt_info);
+ edge pe = loop_preheader_edge (loop);
+ bool software_pipeline_loads_p = false;
/* Is vectorizable load? */
@@ -1513,11 +1579,31 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- vec_mode = TYPE_MODE (vectype);
+ mode = (int) TYPE_MODE (vectype);
+
/* FORNOW. In some cases can vectorize even if data-type not supported
- (e.g. - data copies). */
- if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
- return false;
+ (e.g. - data copies). */
+ if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
+ {
+ if (vect_debug_details (loop))
+ fprintf (dump_file, "Aligned load, but unsupported type.");
+ return false;
+ }
+
+ if (!aligned_access_p (dr))
+ {
+ if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
+ && (!targetm.vectorize.builtin_mask_for_load
+ || targetm.vectorize.builtin_mask_for_load ()))
+ software_pipeline_loads_p = true;
+ else if (!targetm.vectorize.misaligned_mem_ok (mode))
+ {
+ /* Possibly unaligned access, and can't sofware pipeline the loads */
+ if (vect_debug_details (loop))
+ fprintf (dump_file, "Arbitrary load not supported.");
+ return false;
+ }
+ }
if (!vec_stmt) /* transformation not required. */
{
@@ -1530,19 +1616,130 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (vect_debug_details (NULL))
fprintf (dump_file, "transform load.");
- /* Handle def. */
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ if (!software_pipeline_loads_p)
+ {
+ /* Create:
+ p = initial_addr;
+ indx = 0;
+ loop {
+ vec_dest = *(p);
+ indx = indx + 1;
+ }
+ */
+
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
+ if (aligned_access_p (dr))
+ data_ref = build_fold_indirect_ref (data_ref);
+ else
+ {
+ int mis = DR_MISALIGNMENT (dr);
+ tree tmis = (mis == -1 ?
+ integer_zero_node :
+ build_int_cst (integer_type_node, mis));
+ tmis = int_const_binop (MULT_EXPR, tmis,
+ build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
+ data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
+ }
+ new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ TREE_OPERAND (new_stmt, 0) = new_temp;
+ vect_finish_stmt_generation (stmt, new_stmt, bsi);
+ }
+ else /* software-pipeline the loads */
+ {
+ /* Create:
+ p1 = initial_addr;
+ msq_init = *(floor(p1))
+ p2 = initial_addr + VS - 1;
+ magic = have_builtin ? builtin_result : initial_address;
+ indx = 0;
+ loop {
+ p2' = p2 + indx * vectype_size
+ lsq = *(floor(p2'))
+ vec_dest = realign_load (msq, lsq, magic)
+ indx = indx + 1;
+ msq = lsq;
+ }
+ */
+
+ tree offset;
+ tree magic;
+ tree phi_stmt;
+ tree msq_init;
+ tree msq, lsq;
+ tree dataref_ptr;
+ tree params;
+
+ /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
+ &init_addr, true);
+ data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
+ new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ TREE_OPERAND (new_stmt, 0) = new_temp;
+ new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
+ gcc_assert (!new_bb);
+ msq_init = TREE_OPERAND (new_stmt, 0);
+
+
+ /* <2> Create lsq = *(floor(p2')) in the loop */
+ offset = build_int_cst (integer_type_node,
+ GET_MODE_NUNITS (TYPE_MODE (vectype)));
+ offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
+ data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
+ new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ TREE_OPERAND (new_stmt, 0) = new_temp;
+ vect_finish_stmt_generation (stmt, new_stmt, bsi);
+ lsq = TREE_OPERAND (new_stmt, 0);
+
+
+ /* <3> */
+ if (targetm.vectorize.builtin_mask_for_load)
+ {
+ /* Create permutation mask, if required, in loop preheader. */
+ tree builtin_decl;
+ params = build_tree_list (NULL_TREE, init_addr);
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ builtin_decl = targetm.vectorize.builtin_mask_for_load ();
+ new_stmt = build_function_call_expr (builtin_decl, params);
+ new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ TREE_OPERAND (new_stmt, 0) = new_temp;
+ new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
+ gcc_assert (!new_bb);
+ magic = TREE_OPERAND (new_stmt, 0);
+ }
+ else
+ {
+ /* Use current address instead of init_addr for reduced reg pressure. */
+ magic = dataref_ptr;
+ }
- /* Handle use. */
- op = TREE_OPERAND (stmt, 1);
- data_ref = vect_create_data_ref (stmt, bsi);
- /* Arguments are ready. create the new vector stmt. */
- *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
- new_temp = make_ssa_name (vec_dest, *vec_stmt);
- TREE_OPERAND (*vec_stmt, 0) = new_temp;
- vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+ /* <4> Create msq = phi <msq_init, lsq> in loop */
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ msq = make_ssa_name (vec_dest, NULL_TREE);
+ phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
+ SSA_NAME_DEF_STMT (msq) = phi_stmt;
+ add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
+ add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
+
+ /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
+ new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ TREE_OPERAND (new_stmt, 0) = new_temp;
+ vect_finish_stmt_generation (stmt, new_stmt, bsi);
+ }
+
+ *vec_stmt = new_stmt;
return true;
}
@@ -2726,7 +2923,7 @@ vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
FOR NOW: No transformation is actually performed. TODO. */
static void
-vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo ATTRIBUTE_UNUSED)
+vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
{
/*
This pass will require a cost model to guide it whether to apply peeling
@@ -2824,7 +3021,8 @@ static bool
vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
{
varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
- varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
+ /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
+
unsigned int i;
if (vect_debug_details (NULL))
@@ -2864,6 +3062,11 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
}
}
+ /* The vectorizer now supports misaligned loads, so we don't fail anymore
+ in the presence of a misaligned read dataref. For some targets however
+ it may be preferable not to vectorize in such a case as misaligned
+ accesses are very costly. This should be considered in the future. */
+/*
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
@@ -2875,6 +3078,7 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
return false;
}
}
+*/
return true;
}
@@ -3158,7 +3362,6 @@ vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
case ARRAY_REF:
offset = size_zero_node;
- array_base = TREE_OPERAND (memref, 0);
/* Store the array base in the stmt info.
For one dimensional array ref a[i], the base is a,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 405ecb24df2..37c57067914 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -119,6 +119,7 @@ vinfo_for_stmt (tree stmt)
/* Info on data references alignment. */
/*-----------------------------------------------------------------*/
+/* The misalignment of the memory access in bytes. */
#define DR_MISALIGNMENT(DR) (DR)->aux
static inline bool
diff --git a/gcc/tree.c b/gcc/tree.c
index 98ab92411f5..a0cb800470f 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -1493,6 +1493,8 @@ staticp (tree arg)
case BIT_FIELD_REF:
return NULL;
+ case MISALIGNED_INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
case INDIRECT_REF:
return TREE_CONSTANT (TREE_OPERAND (arg, 0)) ? arg : NULL;
@@ -2412,6 +2414,8 @@ build1_stat (enum tree_code code, tree type, tree node MEM_STAT_DECL)
TREE_READONLY (t) = 0;
break;
+ case MISALIGNED_INDIRECT_REF:
+ case ALIGN_INDIRECT_REF:
case INDIRECT_REF:
/* Whether a dereference is readonly has nothing to do with whether
its operand is readonly. */
diff --git a/gcc/tree.def b/gcc/tree.def
index fa58abafad7..4c1f9861f9d 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -366,6 +366,17 @@ DEFTREECODE (BIT_FIELD_REF, "bit_field_ref", tcc_reference, 3)
/* C unary `*' or Pascal `^'. One operand, an expression for a pointer. */
DEFTREECODE (INDIRECT_REF, "indirect_ref", tcc_reference, 1)
+/* Like above, but aligns the referenced address (i.e, if the address
+ in P is not aligned on TYPE_ALIGN boundary, then &(*P) != P). */
+DEFTREECODE (ALIGN_INDIRECT_REF, "align_indirect_ref", tcc_reference, 1)
+
+/* Same as INDIRECT_REF, but also specifies the alignment of the referenced
+ address:
+ Operand 0 is the referenced address (a pointer);
+ Operand 1 is an INTEGER_CST which represents the alignment of the address,
+ or 0 if the alignment is unknown. */
+DEFTREECODE (MISALIGNED_INDIRECT_REF, "misaligned_indirect_ref", tcc_reference, 2)
+
/* Array indexing.
Operand 0 is the array; operand 1 is a (single) array index.
Operand 2, if present, is a copy of TYPE_MIN_VALUE of the index.
@@ -886,6 +897,28 @@ DEFTREECODE (TREE_BINFO, "tree_binfo", tcc_exceptional, 0)
Operand 1 is the size of the type in the expression. */
DEFTREECODE (WITH_SIZE_EXPR, "with_size_expr", tcc_expression, 2)
+/* Extract elements from two input vectors Operand 0 and Operand 1
+ size VS, according to the offset OFF defined by Operand 2 as
+ follows:
+ If OFF > 0, the last VS - OFF elements of vector OP0 are concatenated to
+ the first OFF elements of the vector OP1.
+ If OFF == 0, then the returned vector is OP1.
+ On different targets OFF may take different forms; It can be an address, in
+ which case its low log2(VS)-1 bits define the offset, or it can be a mask
+ generated by the builtin targetm.vectorize.mask_for_load_builtin_decl. */
+DEFTREECODE (REALIGN_LOAD_EXPR, "realign_load", tcc_expression, 3)
+
+/* Extract elements from two input vectors Operand 0 and Operand 1
+ size VS, according to the offset OFF defined by Operand 2 as
+ follows:
+ If OFF > 0, the last OFF elements of vector OP0 are concatenated to
+ the first VS - OFF elements of the vector OP1.
+ If OFF == 0, then the returned vector is OP0.
+ On different targets OFF may take different forms; It can be an address, in
+ which case its low log2(VS)-1 bits define the offset, or it can be a mask
+ generated by the builtin targetm.vectorize.mask_for_store_builtin_decl. */
+DEFTREECODE (REALIGN_STORE_EXPR, "realign_store", tcc_expression, 3)
+
/*
Local variables:
mode:c
diff --git a/gcc/tree.h b/gcc/tree.h
index db2a4fb5fc4..a913d74c589 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -390,7 +390,7 @@ struct tree_common GTY(())
..._TYPE
TREE_THIS_NOTRAP in
- INDIRECT_REF, ARRAY_REF, ARRAY_RANGE_REF
+ (ALIGN/MISALIGNED_)INDIRECT_REF, ARRAY_REF, ARRAY_RANGE_REF
deprecated_flag:
@@ -901,7 +901,8 @@ extern void tree_operand_check_failed (int, enum tree_code,
/* Nonzero means this node will not trap. In an INDIRECT_REF, means
accessing the memory pointed to won't generate a trap. However,
this only applies to an object when used appropriately: it doesn't
- mean that writing a READONLY mem won't trap.
+ mean that writing a READONLY mem won't trap. Similarly for
+ ALIGN_INDIRECT_REF and MISALIGNED_INDIRECT_REF.
In ARRAY_REF and ARRAY_RANGE_REF means that we know that the index
(or slice of the array) always belongs to the range of the array.
@@ -1145,8 +1146,9 @@ struct tree_vec GTY(())
#define TREE_OPERAND(NODE, I) TREE_OPERAND_CHECK (NODE, I)
#define TREE_COMPLEXITY(NODE) (EXPR_CHECK (NODE)->exp.complexity)
-/* In INDIRECT_REF. */
-#define REF_ORIGINAL(NODE) TREE_CHAIN (TREE_CHECK (NODE, INDIRECT_REF))
+/* In INDIRECT_REF, ALIGN_INDIRECT_REF, MISALIGNED_INDIRECT_REF. */
+#define REF_ORIGINAL(NODE) TREE_CHAIN (TREE_CHECK3 (NODE, \
+ INDIRECT_REF, ALIGN_INDIRECT_REF, MISALIGNED_INDIRECT_REF))
/* In a LABELED_BLOCK_EXPR node. */
#define LABELED_BLOCK_LABEL(NODE) \