summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog34
-rw-r--r--gcc/config/ia64/ia64-modes.def8
-rw-r--r--gcc/config/ia64/ia64-protos.h7
-rw-r--r--gcc/config/ia64/ia64.c421
-rw-r--r--gcc/config/ia64/ia64.h47
-rw-r--r--gcc/config/ia64/ia64.md12
-rw-r--r--gcc/config/ia64/itanium1.md19
-rw-r--r--gcc/config/ia64/itanium2.md23
-rw-r--r--gcc/config/ia64/predicates.md14
-rw-r--r--gcc/config/ia64/vect.md996
-rw-r--r--gcc/testsuite/ChangeLog10
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-30.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-8.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect.exp2
-rw-r--r--gcc/testsuite/lib/target-supports.exp9
15 files changed, 1544 insertions, 66 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ec06ccbdf5a..ec51b646fe5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,39 @@
2005-01-03 Richard Henderson <rth@redhat.com>
+ * config/ia64/ia64.c (TARGET_VECTOR_MODE_SUPPORTED_P): New.
+ (ia64_const_ok_for_letter_p): New.
+ (ia64_const_double_ok_for_letter_p): New.
+ (ia64_extra_constraint): New.
+ (ia64_expand_vecint_compare): New.
+ (ia64_expand_vcondu_v2si): New.
+ (ia64_expand_vecint_cmov): New.
+ (ia64_expand_vecint_minmax): New.
+ (ia64_print_operand): Add 'v'.
+ (ia64_preferred_reload_class): New.
+ (ia64_vector_mode_supported_p): New.
+ * config/ia64/ia64.h (UNITS_PER_SIMD_WORD): New.
+ (PREFERRED_RELOAD_CLASS): Move to function.
+ (CONST_OK_FOR_LETTER_P): Move to function.
+ (CONST_DOUBLE_OK_FOR_LETTER_P): Move to function.
+ (CONSTRAINT_OK_FOR_Q, CONSTRAINT_OK_FOR_R): Remove.
+ (CONSTRAINT_OK_FOR_S, CONSTRAINT_OK_FOR_T): Remove.
+ (EXTRA_CONSTRAINT): Move to function.
+ * config/ia64/ia64.md: Include vect.md.
+ (itanium_class): Add mmalua.
+ (type): Handle it.
+ * config/ia64/itanium1.md (1_mmalua): New. Add it to bypasses.
+ (1b_mmalua): New.
+ * config/ia64/itanium2.md (2_mmalua, 2b_mmalua): Similarly.
+ * config/ia64/predicates.md (gr_reg_or_0_operand): Accept any
+ CONST0_RTX.
+ (const_int_2bit_operand): New.
+ (fr_reg_or_0_operand): New.
+ * config/ia64/ia64-modes.def: Add vector modes.
+ * config/ia64/ia64-protos.h: Update.
+ * config/ia64/vect.md: New file.
+
+2005-01-03 Richard Henderson <rth@redhat.com>
+
* simplify-rtx.c (simplify_binary_operation): Handle VEC_CONCAT.
2005-01-03 Uros Bizjak <uros@kss-loka.si>
diff --git a/gcc/config/ia64/ia64-modes.def b/gcc/config/ia64/ia64-modes.def
index 6e10ac05993..3611638a7c2 100644
--- a/gcc/config/ia64/ia64-modes.def
+++ b/gcc/config/ia64/ia64-modes.def
@@ -66,3 +66,11 @@ INT_MODE (OI, 32);
so that flow doesn't do something stupid. */
CC_MODE (CCI);
+
+/* Vector modes. */
+VECTOR_MODES (INT, 4); /* V4QI V2HI */
+VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
+VECTOR_MODE (INT, QI, 16);
+VECTOR_MODE (INT, HI, 8);
+VECTOR_MODE (INT, SI, 4);
+VECTOR_MODE (FLOAT, SF, 2);
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index 30ea5e1ba65..5b4b935cdd1 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -34,6 +34,10 @@ extern int ia64_st_address_bypass_p (rtx, rtx);
extern int ia64_ld_address_bypass_p (rtx, rtx);
extern int ia64_produce_address_p (rtx);
+extern bool ia64_const_ok_for_letter_p (HOST_WIDE_INT, char);
+extern bool ia64_const_double_ok_for_letter_p (rtx, char);
+extern bool ia64_extra_constraint (rtx, char);
+
extern rtx ia64_expand_move (rtx, rtx);
extern int ia64_move_ok (rtx, rtx);
extern int addp4_optimize_ok (rtx, rtx);
@@ -42,6 +46,8 @@ extern int ia64_depz_field_mask (rtx, rtx);
extern void ia64_split_tmode_move (rtx[]);
extern rtx spill_xfmode_operand (rtx, int);
extern rtx ia64_expand_compare (enum rtx_code, enum machine_mode);
+extern void ia64_expand_vecint_cmov (rtx[]);
+extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
extern void ia64_expand_call (rtx, rtx, rtx, int);
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
extern void ia64_reload_gp (void);
@@ -57,6 +63,7 @@ extern int ia64_hard_regno_rename_ok (int, int);
extern void ia64_initialize_trampoline (rtx, rtx, rtx);
extern void ia64_print_operand_address (FILE *, rtx);
extern void ia64_print_operand (FILE *, rtx, int);
+extern enum reg_class ia64_preferred_reload_class (rtx, enum reg_class);
extern enum reg_class ia64_secondary_reload_class (enum reg_class,
enum machine_mode, rtx);
extern void ia64_output_dwarf_dtprel (FILE*, int, rtx);
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 68e6ddbba14..a6197c0ef45 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -279,6 +279,7 @@ static void ia64_encode_section_info (tree, rtx, int);
static rtx ia64_struct_value_rtx (tree, int);
static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
+static bool ia64_vector_mode_supported_p (enum machine_mode mode);
/* Table of valid machine attributes. */
@@ -423,6 +424,8 @@ static const struct attribute_spec ia64_attribute_table[] =
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
in an order different from the specified program order. */
@@ -472,7 +475,8 @@ ia64_get_addr_area (tree decl)
}
static tree
-ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+ia64_handle_model_attribute (tree *node, tree name, tree args,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
ia64_addr_area addr_area = ADDR_AREA_NORMAL;
ia64_addr_area area;
@@ -554,6 +558,103 @@ ia64_encode_section_info (tree decl, rtx rtl, int first)
ia64_encode_addr_area (decl, XEXP (rtl, 0));
}
+/* Implement CONST_OK_FOR_LETTER_P. */
+
+bool
+ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
+{
+ switch (c)
+ {
+ case 'I':
+ return CONST_OK_FOR_I (value);
+ case 'J':
+ return CONST_OK_FOR_J (value);
+ case 'K':
+ return CONST_OK_FOR_K (value);
+ case 'L':
+ return CONST_OK_FOR_L (value);
+ case 'M':
+ return CONST_OK_FOR_M (value);
+ case 'N':
+ return CONST_OK_FOR_N (value);
+ case 'O':
+ return CONST_OK_FOR_O (value);
+ case 'P':
+ return CONST_OK_FOR_P (value);
+ default:
+ return false;
+ }
+}
+
+/* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */
+
+bool
+ia64_const_double_ok_for_letter_p (rtx value, char c)
+{
+ switch (c)
+ {
+ case 'G':
+ return CONST_DOUBLE_OK_FOR_G (value);
+ default:
+ return false;
+ }
+}
+
+/* Implement EXTRA_CONSTRAINT. */
+
+bool
+ia64_extra_constraint (rtx value, char c)
+{
+ switch (c)
+ {
+ case 'Q':
+ /* Non-volatile memory for FP_REG loads/stores. */
+ return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
+
+ case 'R':
+ /* 1..4 for shladd arguments. */
+ return (GET_CODE (value) == CONST_INT
+ && INTVAL (value) >= 1 && INTVAL (value) <= 4);
+
+ case 'S':
+ /* Non-post-inc memory for asms and other unsavory creatures. */
+ return (GET_CODE (value) == MEM
+ && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
+ && (reload_in_progress || memory_operand (value, VOIDmode)));
+
+ case 'T':
+ /* Symbol ref to small-address-area. */
+ return (GET_CODE (value) == SYMBOL_REF
+ && SYMBOL_REF_SMALL_ADDR_P (value));
+
+ case 'U':
+ /* Vector zero. */
+ return value == CONST0_RTX (GET_MODE (value));
+
+ case 'W':
+ /* An integer vector, such that conversion to an integer yields a
+ value appropriate for an integer 'J' constraint. */
+ if (GET_CODE (value) == CONST_VECTOR
+ && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
+ {
+ value = simplify_subreg (DImode, value, GET_MODE (value), 0);
+ return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
+ }
+ return false;
+
+ case 'Y':
+ /* A V2SF vector containing elements that satisfy 'G'. */
+ return
+ (GET_CODE (value) == CONST_VECTOR
+ && GET_MODE (value) == V2SFmode
+ && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
+ && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
+
+ default:
+ return false;
+ }
+}
+
/* Return 1 if the operands of a move are ok. */
int
@@ -1166,6 +1267,264 @@ ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
}
+/* Generate an integral vector comparison. */
+
+static bool
+ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
+ rtx dest, rtx op0, rtx op1)
+{
+ bool negate = false;
+ rtx x;
+
+ switch (code)
+ {
+ case EQ:
+ case GT:
+ break;
+
+ case NE:
+ code = EQ;
+ negate = true;
+ break;
+
+ case LE:
+ code = GT;
+ negate = true;
+ break;
+
+ case GE:
+ negate = true;
+ /* FALLTHRU */
+
+ case LT:
+ x = op0;
+ op0 = op1;
+ op1 = x;
+ code = GT;
+ break;
+
+ case GTU:
+ case GEU:
+ case LTU:
+ case LEU:
+ {
+ rtx w0h, w0l, w1h, w1l, ch, cl;
+ enum machine_mode wmode;
+ rtx (*unpack_l) (rtx, rtx, rtx);
+ rtx (*unpack_h) (rtx, rtx, rtx);
+ rtx (*pack) (rtx, rtx, rtx);
+
+ /* We don't have native unsigned comparisons, but we can generate
+ them better than generic code can. */
+
+ if (mode == V2SImode)
+ abort ();
+ else if (mode == V8QImode)
+ {
+ wmode = V4HImode;
+ pack = gen_pack2_sss;
+ unpack_l = gen_unpack1_l;
+ unpack_h = gen_unpack1_h;
+ }
+ else if (mode == V4HImode)
+ {
+ wmode = V2SImode;
+ pack = gen_pack4_sss;
+ unpack_l = gen_unpack2_l;
+ unpack_h = gen_unpack2_h;
+ }
+ else
+ abort ();
+
+ /* Unpack into wider vectors, zero extending the elements. */
+
+ w0l = gen_reg_rtx (wmode);
+ w0h = gen_reg_rtx (wmode);
+ w1l = gen_reg_rtx (wmode);
+ w1h = gen_reg_rtx (wmode);
+ emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode)));
+ emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode)));
+ emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode)));
+ emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode)));
+
+ /* Compare in the wider mode. */
+
+ cl = gen_reg_rtx (wmode);
+ ch = gen_reg_rtx (wmode);
+ code = signed_condition (code);
+ ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l);
+ negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h);
+
+ /* Repack into a single narrower vector. */
+
+ emit_insn (pack (dest, cl, ch));
+ }
+ return negate;
+
+ default:
+ abort ();
+ }
+
+ x = gen_rtx_fmt_ee (code, mode, op0, op1);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+ return negate;
+}
+
+static void
+ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[])
+{
+ rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x;
+
+ /* In this case, we extract the two SImode quantities and generate
+ normal comparisons for each of them. */
+
+ op1l = gen_lowpart (SImode, operands[1]);
+ op2l = gen_lowpart (SImode, operands[2]);
+ op4l = gen_lowpart (SImode, operands[4]);
+ op5l = gen_lowpart (SImode, operands[5]);
+
+ op1h = gen_reg_rtx (SImode);
+ op2h = gen_reg_rtx (SImode);
+ op4h = gen_reg_rtx (SImode);
+ op5h = gen_reg_rtx (SImode);
+
+ emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h),
+ gen_lowpart (DImode, operands[1]), GEN_INT (32)));
+ emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h),
+ gen_lowpart (DImode, operands[2]), GEN_INT (32)));
+ emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h),
+ gen_lowpart (DImode, operands[4]), GEN_INT (32)));
+ emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h),
+ gen_lowpart (DImode, operands[5]), GEN_INT (32)));
+
+ bl = gen_reg_rtx (BImode);
+ x = gen_rtx_fmt_ee (code, BImode, op4l, op5l);
+ emit_insn (gen_rtx_SET (VOIDmode, bl, x));
+
+ bh = gen_reg_rtx (BImode);
+ x = gen_rtx_fmt_ee (code, BImode, op4h, op5h);
+ emit_insn (gen_rtx_SET (VOIDmode, bh, x));
+
+ /* With the results of the comparisons, emit conditional moves. */
+
+ dl = gen_reg_rtx (SImode);
+ x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l);
+ emit_insn (gen_rtx_SET (VOIDmode, dl, x));
+
+ dh = gen_reg_rtx (SImode);
+ x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h);
+ emit_insn (gen_rtx_SET (VOIDmode, dh, x));
+
+ /* Merge the two partial results back into a vector. */
+
+ x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+}
+
+/* Emit an integral vector conditional move. */
+
+void
+ia64_expand_vecint_cmov (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum rtx_code code = GET_CODE (operands[3]);
+ bool negate;
+ rtx cmp, x, ot, of;
+
+ /* Since we don't have unsigned V2SImode comparisons, it's more efficient
+ to special-case them entirely. */
+ if (mode == V2SImode
+ && (code == GTU || code == GEU || code == LEU || code == LTU))
+ {
+ ia64_expand_vcondu_v2si (code, operands);
+ return;
+ }
+
+ cmp = gen_reg_rtx (mode);
+ negate = ia64_expand_vecint_compare (code, mode, cmp,
+ operands[4], operands[5]);
+
+ ot = operands[1+negate];
+ of = operands[2-negate];
+
+ if (ot == CONST0_RTX (mode))
+ {
+ if (of == CONST0_RTX (mode))
+ {
+ emit_move_insn (operands[0], ot);
+ return;
+ }
+
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, of);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+ }
+ else if (of == CONST0_RTX (mode))
+ {
+ x = gen_rtx_AND (mode, cmp, ot);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+ }
+ else
+ {
+ rtx t, f;
+
+ t = gen_reg_rtx (mode);
+ x = gen_rtx_AND (mode, cmp, operands[1+negate]);
+ emit_insn (gen_rtx_SET (VOIDmode, t, x));
+
+ f = gen_reg_rtx (mode);
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, operands[2-negate]);
+ emit_insn (gen_rtx_SET (VOIDmode, f, x));
+
+ x = gen_rtx_IOR (mode, t, f);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+ }
+}
+
+/* Emit an integral vector min or max operation. Return true if all done. */
+
+bool
+ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx xops[5];
+
+ /* These four combinations are supported directly. */
+ if (mode == V8QImode && (code == UMIN || code == UMAX))
+ return false;
+ if (mode == V4HImode && (code == SMIN || code == SMAX))
+ return false;
+
+ /* Everything else implemented via vector comparisons. */
+ xops[0] = operands[0];
+ xops[4] = xops[1] = operands[1];
+ xops[5] = xops[2] = operands[2];
+
+ switch (code)
+ {
+ case UMIN:
+ code = LTU;
+ break;
+ case UMAX:
+ code = GTU;
+ break;
+ case SMIN:
+ code = LT;
+ break;
+ case SMAX:
+ code = GT;
+ break;
+ default:
+ abort ();
+ }
+ xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
+
+ ia64_expand_vecint_cmov (xops);
+ return true;
+}
+
/* Emit the appropriate sequence for a call. */
void
@@ -3613,7 +3972,9 @@ ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
U Print an 8-bit sign extended number (K) as a 64-bit unsigned number
for Intel assembler.
r Print register name, or constant 0 as r0. HP compatibility for
- Linux kernel. */
+ Linux kernel.
+ v Print vector constant value as an 8-byte integer value. */
+
void
ia64_print_operand (FILE * file, rtx x, int code)
{
@@ -3771,6 +4132,11 @@ ia64_print_operand (FILE * file, rtx x, int code)
output_operand_lossage ("invalid %%r value");
return;
+ case 'v':
+ gcc_assert (GET_CODE (x) == CONST_VECTOR);
+ x = simplify_subreg (DImode, x, GET_MODE (x), 0);
+ break;
+
case '+':
{
const char *which;
@@ -3994,6 +4360,39 @@ ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
return 2;
}
+/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS
+ to use when copying X into that class. */
+
+enum reg_class
+ia64_preferred_reload_class (rtx x, enum reg_class class)
+{
+ switch (class)
+ {
+ case FR_REGS:
+ /* Don't allow volatile mem reloads into floating point registers.
+ This is defined to force reload to choose the r/m case instead
+ of the f/f case when reloading (set (reg fX) (mem/v)). */
+ if (MEM_P (x) && MEM_VOLATILE_P (x))
+ return NO_REGS;
+
+ /* Force all unrecognized constants into the constant pool. */
+ if (CONSTANT_P (x))
+ return NO_REGS;
+ break;
+
+ case AR_M_REGS:
+ case AR_I_REGS:
+ if (!OBJECT_P (x))
+ return NO_REGS;
+ break;
+
+ default:
+ break;
+ }
+
+ return class;
+}
+
/* This function returns the register class required for a secondary
register when copying between one of the registers in CLASS, and X,
using MODE. A return value of NO_REGS means that no secondary register
@@ -8587,4 +8986,22 @@ ia64_scalar_mode_supported_p (enum machine_mode mode)
}
}
+static bool
+ia64_vector_mode_supported_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case V8QImode:
+ case V4HImode:
+ case V2SImode:
+ return true;
+
+ case V2SFmode:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
#include "gt-ia64.h"
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index c38854b8bbb..21f9dc3b091 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -338,6 +338,8 @@ extern const char *ia64_tune_string;
#define UNITS_PER_WORD 8
+#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
+
#define POINTER_SIZE (TARGET_ILP32 ? 32 : 64)
/* A C expression whose value is zero if pointers that need to be extended
@@ -1024,18 +1026,8 @@ enum reg_class
The value is a register class; perhaps CLASS, or perhaps another, smaller
class. */
-/* Don't allow volatile mem reloads into floating point registers. This
- is defined to force reload to choose the r/m case instead of the f/f case
- when reloading (set (reg fX) (mem/v)).
-
- Do not reload expressions into AR regs. */
-
#define PREFERRED_RELOAD_CLASS(X, CLASS) \
- (CLASS == FR_REGS && GET_CODE (X) == MEM && MEM_VOLATILE_P (X) ? NO_REGS \
- : CLASS == FR_REGS && GET_CODE (X) == CONST_DOUBLE ? NO_REGS \
- : !OBJECT_P (X) \
- && (CLASS == AR_M_REGS || CLASS == AR_I_REGS) ? NO_REGS \
- : CLASS)
+ ia64_preferred_reload_class (X, CLASS)
/* You should define this macro to indicate to the reload phase that it may
need to allocate at least one register for a reload in addition to the
@@ -1106,15 +1098,7 @@ enum reg_class
#define CONST_OK_FOR_P(VALUE) ((VALUE) == 0 || (VALUE) == -1)
#define CONST_OK_FOR_LETTER_P(VALUE, C) \
-((C) == 'I' ? CONST_OK_FOR_I (VALUE) \
- : (C) == 'J' ? CONST_OK_FOR_J (VALUE) \
- : (C) == 'K' ? CONST_OK_FOR_K (VALUE) \
- : (C) == 'L' ? CONST_OK_FOR_L (VALUE) \
- : (C) == 'M' ? CONST_OK_FOR_M (VALUE) \
- : (C) == 'N' ? CONST_OK_FOR_N (VALUE) \
- : (C) == 'O' ? CONST_OK_FOR_O (VALUE) \
- : (C) == 'P' ? CONST_OK_FOR_P (VALUE) \
- : 0)
+ ia64_const_ok_for_letter_p (VALUE, C)
/* A C expression that defines the machine-dependent operand constraint letters
(`G', `H') that specify particular ranges of `const_double' values. */
@@ -1125,33 +1109,14 @@ enum reg_class
|| (VALUE) == CONST1_RTX (GET_MODE (VALUE)))
#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \
- ((C) == 'G' ? CONST_DOUBLE_OK_FOR_G (VALUE) : 0)
+ ia64_const_double_ok_for_letter_p (VALUE, C)
/* A C expression that defines the optional machine-dependent constraint
letters (`Q', `R', `S', `T', `U') that can be used to segregate specific
types of operands, usually memory references, for the target machine. */
-/* Non-volatile memory for FP_REG loads/stores. */
-#define CONSTRAINT_OK_FOR_Q(VALUE) \
- (memory_operand((VALUE), VOIDmode) && ! MEM_VOLATILE_P (VALUE))
-/* 1..4 for shladd arguments. */
-#define CONSTRAINT_OK_FOR_R(VALUE) \
- (GET_CODE (VALUE) == CONST_INT && INTVAL (VALUE) >= 1 && INTVAL (VALUE) <= 4)
-/* Non-post-inc memory for asms and other unsavory creatures. */
-#define CONSTRAINT_OK_FOR_S(VALUE) \
- (GET_CODE (VALUE) == MEM \
- && GET_RTX_CLASS (GET_CODE (XEXP ((VALUE), 0))) != RTX_AUTOINC \
- && (reload_in_progress || memory_operand ((VALUE), VOIDmode)))
-/* Symbol ref to small-address-area: */
-#define CONSTRAINT_OK_FOR_T(VALUE) \
- (GET_CODE (VALUE) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (VALUE))
-
#define EXTRA_CONSTRAINT(VALUE, C) \
- ((C) == 'Q' ? CONSTRAINT_OK_FOR_Q (VALUE) \
- : (C) == 'R' ? CONSTRAINT_OK_FOR_R (VALUE) \
- : (C) == 'S' ? CONSTRAINT_OK_FOR_S (VALUE) \
- : (C) == 'T' ? CONSTRAINT_OK_FOR_T (VALUE) \
- : 0)
+ ia64_extra_constraint (VALUE, C)
/* Basic Stack Layout */
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
index 15650c5c718..8ce878d0c43 100644
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -121,9 +121,9 @@
(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,
fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,
- chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,
- syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop,nop_b,nop_f,
- nop_i,nop_m,nop_x,lfetch,pre_cycle"
+ chk_s,long_i,mmalua,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,
+ st,syst_m0, syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop,
+ nop_b,nop_f,nop_i,nop_m,nop_x,lfetch,pre_cycle"
(const_string "unknown"))
;; chk_s has an I and an M form; use type A for convenience.
@@ -132,7 +132,8 @@
(eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
(eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
(eq_attr "itanium_class" "lfetch") (const_string "M")
- (eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A")
+ (eq_attr "itanium_class" "chk_s,ialu,icmp,ilog,mmalua")
+ (const_string "A")
(eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
(eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
(eq_attr "itanium_class" "frar_i,toar_i,frbr,tobr") (const_string "I")
@@ -6036,3 +6037,6 @@
"addp4_optimize_ok (operands[1], operands[2])"
"addp4 %0 = %1, %2"
[(set_attr "itanium_class" "ialu")])
+
+;; Vector operations
+(include "vect.md")
diff --git a/gcc/config/ia64/itanium1.md b/gcc/config/ia64/itanium1.md
index 4f01560620a..f4fcd1c2fd6 100644
--- a/gcc/config/ia64/itanium1.md
+++ b/gcc/config/ia64/itanium1.md
@@ -589,6 +589,11 @@
(and (and (eq_attr "cpu" "itanium")
(eq_attr "itanium_class" "ilog"))
(eq (symbol_ref "bundling_p") (const_int 0))) "1_A")
+(define_insn_reservation "1_mmalua" 2
+ (and (and (eq_attr "cpu" "itanium")
+ (eq_attr "itanium_class" "mmalua"))
+ (eq (symbol_ref "bundling_p") (const_int 0)))
+ "1_A")
(define_insn_reservation "1_ishf" 1
(and (and (eq_attr "cpu" "itanium")
(eq_attr "itanium_class" "ishf"))
@@ -920,7 +925,7 @@
;; There is only one insn `mov ar.pfs =' for toar_i.
(define_bypass 0 "1_tobr,1_topr,1_toar_i" "1_br,1_scall")
-(define_bypass 3 "1_ialu,1_ialu_addr" "1_mmmul,1_mmshf")
+(define_bypass 3 "1_ialu,1_ialu_addr" "1_mmmul,1_mmshf,1_mmalua")
;; ??? howto describe ialu for I slot only. We use ialu_addr for that
;;(define_bypass 2 "1_ialu" "1_ld" "ia64_ld_address_bypass_p")
;; ??? howto describe ialu st/address for I slot only. We use ialu_addr
@@ -940,7 +945,7 @@
;; Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
;; but HP engineers say any non-MM operation.
-(define_bypass 4 "1_mmmul,1_mmshf"
+(define_bypass 4 "1_mmmul,1_mmshf,1_mmalua"
"1_br,1_fcmp,1_fcvtfx,1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,\
1_frbr,1_frfr,1_frpr,1_ialu,1_icmp,1_ilog,1_ishf,1_ld,1_chk_s,\
1_long_i,1_rse_m,1_sem,1_stf,1_st,1_syst_m0,1_syst_m,\
@@ -958,13 +963,13 @@
;; We don't use here fcmp because scall may be predicated.
(define_bypass 0 "1_fcvtfx,1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,\
1_frbr,1_frfr,1_frpr,1_ialu,1_ialu_addr,1_ilog,1_ishf,\
- 1_ld,1_long_i,1_mmmul,1_mmshf,1_mmshfi,1_toar_m,1_tofr,\
- 1_xmpy,1_xtd" "1_scall")
+ 1_ld,1_long_i,2_mmalua,1_mmmul,1_mmshf,1_mmshfi,1_toar_m,\
+ 1_tofr,1_xmpy,1_xtd" "1_scall")
(define_bypass 0 "1_unknown,1_ignore,1_stop_bit,1_br,1_fcmp,1_fcvtfx,\
1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,1_frbr,1_frfr,\
1_frpr,1_ialu,1_ialu_addr,1_icmp,1_ilog,1_ishf,1_ld,\
- 1_chk_s,1_long_i,1_mmmul,1_mmshf,1_mmshfi,1_nop,\
+ 1_chk_s,1_long_i,1_mmalua,1_mmmul,1_mmshf,1_mmshfi,1_nop,\
1_nop_b,1_nop_f,1_nop_i,1_nop_m,1_nop_x,1_rse_m,1_scall,\
1_sem,1_stf,1_st,1_syst_m0,1_syst_m,1_tbit,1_toar_i,\
1_toar_m,1_tobr,1_tofr,1_topr,1_xmpy,1_xtd,1_lfetch"
@@ -1458,6 +1463,10 @@
(and (and (eq_attr "cpu" "itanium")
(eq_attr "itanium_class" "ilog"))
(ne (symbol_ref "bundling_p") (const_int 0))) "1b_A")
+(define_insn_reservation "1b_mmalua" 2
+ (and (and (eq_attr "cpu" "itanium")
+ (eq_attr "itanium_class" "mmalua"))
+ (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A")
(define_insn_reservation "1b_ishf" 1
(and (and (eq_attr "cpu" "itanium")
(eq_attr "itanium_class" "ishf"))
diff --git a/gcc/config/ia64/itanium2.md b/gcc/config/ia64/itanium2.md
index b78b6165354..ad24d19931e 100644
--- a/gcc/config/ia64/itanium2.md
+++ b/gcc/config/ia64/itanium2.md
@@ -786,6 +786,10 @@
(and (and (eq_attr "cpu" "itanium2")
(eq_attr "itanium_class" "ilog"))
(eq (symbol_ref "bundling_p") (const_int 0))) "2_A")
+(define_insn_reservation "2_mmalua" 2
+ (and (and (eq_attr "cpu" "itanium2")
+ (eq_attr "itanium_class" "mmalua"))
+ (eq (symbol_ref "bundling_p") (const_int 0))) "2_A")
;; Latency time ???
(define_insn_reservation "2_ishf" 1
(and (and (eq_attr "cpu" "itanium2")
@@ -1016,23 +1020,24 @@
(define_bypass 0 "2_tbit" "2_br,2_scall")
(define_bypass 2 "2_ld" "2_ld" "ia64_ld_address_bypass_p")
(define_bypass 2 "2_ld" "2_st" "ia64_st_address_bypass_p")
-(define_bypass 2 "2_ld" "2_mmmul,2_mmshf")
-(define_bypass 3 "2_ilog" "2_mmmul,2_mmshf")
-(define_bypass 3 "2_ialu" "2_mmmul,2_mmshf")
-(define_bypass 3 "2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld")
+(define_bypass 2 "2_ld" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass 3 "2_ilog" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass 3 "2_ialu" "2_mmalua,2_mmmul,2_mmshf")
+(define_bypass 3 "2_mmalua,2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld")
(define_bypass 6 "2_tofr" "2_frfr,2_stf")
(define_bypass 7 "2_fmac" "2_frfr,2_stf")
;; We don't use here fcmp because scall may be predicated.
(define_bypass 0 "2_fcvtfx,2_fld,2_fmac,2_fmisc,2_frar_i,2_frar_m,\
2_frbr,2_frfr,2_frpr,2_ialu,2_ilog,2_ishf,2_ld,2_long_i,\
- 2_mmmul,2_mmshf,2_mmshfi,2_toar_m,2_tofr,2_xmpy,2_xtd"
+ 2_mmalua,2_mmmul,2_mmshf,2_mmshfi,2_toar_m,2_tofr,\
+ 2_xmpy,2_xtd"
"2_scall")
(define_bypass 0 "2_unknown,2_ignore,2_stop_bit,2_br,2_fcmp,2_fcvtfx,2_fld,\
2_fmac,2_fmisc,2_frar_i,2_frar_m,2_frbr,2_frfr,2_frpr,\
- 2_ialu,2_icmp,2_ilog,2_ishf,2_ld,2_chk_s,\
- 2_long_i,2_mmmul,2_mmshf,2_mmshfi,2_nop,2_nop_b,2_nop_f,\
+ 2_ialu,2_icmp,2_ilog,2_ishf,2_ld,2_chk_s,2_long_i,\
+ 2_mmalua,2_mmmul,2_mmshf,2_mmshfi,2_nop,2_nop_b,2_nop_f,\
2_nop_i,2_nop_m,2_nop_x,2_rse_m,2_scall,2_sem,2_stf,2_st,\
2_syst_m0,2_syst_m,2_tbit,2_toar_i,2_toar_m,2_tobr,2_tofr,\
2_topr,2_xmpy,2_xtd,2_lfetch" "2_ignore")
@@ -1586,6 +1591,10 @@
(and (and (eq_attr "cpu" "itanium2")
(eq_attr "itanium_class" "ilog"))
(ne (symbol_ref "bundling_p") (const_int 0))) "2b_A")
+(define_insn_reservation "2b_mmalua" 2
+ (and (and (eq_attr "cpu" "itanium2")
+ (eq_attr "itanium_class" "mmalua"))
+ (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A")
;; Latency time ???
(define_insn_reservation "2b_ishf" 1
(and (and (eq_attr "cpu" "itanium2")
diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md
index 6166612ecad..7c9a76f1dce 100644
--- a/gcc/config/ia64/predicates.md
+++ b/gcc/config/ia64/predicates.md
@@ -236,8 +236,8 @@
;; True if OP is a GR register operand, or zero.
(define_predicate "gr_reg_or_0_operand"
(ior (match_operand 0 "gr_register_operand")
- (and (match_code "const_int")
- (match_test "op == const0_rtx"))))
+ (and (match_code "const_int,const_double,const_vector")
+ (match_test "op == CONST0_RTX (GET_MODE (op))"))))
;; True if OP is a GR register operand, or a 5 bit immediate operand.
(define_predicate "gr_reg_or_5bit_operand"
@@ -320,6 +320,10 @@
INTVAL (op) == 1 || INTVAL (op) == 4 ||
INTVAL (op) == 8 || INTVAL (op) == 16")))
+;; True if OP is 0..3.
+(define_predicate "const_int_2bit_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 3")))
;; True if OP is a floating-point constant zero, one, or a register.
(define_predicate "fr_reg_or_fp01_operand"
@@ -332,6 +336,12 @@
(and (match_operand 0 "fr_reg_or_fp01_operand")
(not (match_code "subreg"))))
+;; True if OP is a constant zero, or a register.
+(define_predicate "fr_reg_or_0_operand"
+ (ior (match_operand 0 "fr_register_operand")
+ (and (match_code "const_double,const_vector")
+ (match_test "op == CONST0_RTX (GET_MODE (op))"))))
+
;; True if this is a comparison operator, which accepts a normal 8-bit
;; signed immediate operand.
(define_predicate "normal_comparison_operator"
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
new file mode 100644
index 00000000000..a42ad2a2dc1
--- /dev/null
+++ b/gcc/config/ia64/vect.md
@@ -0,0 +1,996 @@
+;; IA-64 machine description for vector operations.
+;; Copyright (C) 2004
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+
+;; Integer vector operations
+
+(define_mode_macro VECINT [V8QI V4HI V2SI])
+(define_mode_macro VECINT12 [V8QI V4HI])
+(define_mode_macro VECINT24 [V4HI V2SI])
+(define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")])
+
+(define_expand "mov<mode>"
+ [(set (match_operand:VECINT 0 "general_operand" "")
+ (match_operand:VECINT 1 "general_operand" ""))]
+ ""
+{
+ rtx op1 = ia64_expand_move (operands[0], operands[1]);
+ if (!op1)
+ DONE;
+ operands[1] = op1;
+})
+
+(define_insn "*mov<mode>_internal"
+ [(set (match_operand:VECINT 0 "destination_operand"
+ "=r,r,r,r,m ,*f ,*f,Q ,r ,*f")
+ (match_operand:VECINT 1 "move_operand"
+ "rU,W,i,m,rU,U*f,Q ,*f,*f,r "))]
+ "ia64_move_ok (operands[0], operands[1])"
+ "@
+ mov %0 = %r1
+ addl %0 = %v1, r0
+ movl %0 = %v1
+ ld8%O1 %0 = %1%P1
+ st8%Q0 %0 = %r1%P0
+ mov %0 = %F1
+ ldf8 %0 = %1%P1
+ stf8 %0 = %1%P0
+ getf.sig %0 = %1
+ setf.sig %0 = %1"
+ [(set_attr "itanium_class" "ialu,ialu,long_i,ld,st,fmisc,fld,stf,frfr,tofr")])
+
+(define_insn "one_cmpl<mode>2"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+ (not:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")))]
+ ""
+ "andcm %0 = -1, %1"
+ [(set_attr "itanium_class" "ilog")])
+
+(define_insn "and<mode>3"
+ [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+ (and:VECINT
+ (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+ (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+ ""
+ "@
+ and %0 = %2, %1
+ fand %0 = %2, %1"
+ [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "*andnot<mode>"
+ [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+ (and:VECINT
+ (not:VECINT (match_operand:VECINT 1 "grfr_register_operand" "r,*f"))
+ (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+ ""
+ "@
+ andcm %0 = %2, %1
+ fandcm %0 = %2, %1"
+ [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "ior<mode>3"
+ [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+ (ior:VECINT
+ (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+ (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+ ""
+ "@
+ or %0 = %2, %1
+ for %0 = %2, %1"
+ [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "xor<mode>3"
+ [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f")
+ (xor:VECINT
+ (match_operand:VECINT 1 "grfr_register_operand" "r,*f")
+ (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))]
+ ""
+ "@
+ xor %0 = %2, %1
+ fxor %0 = %2, %1"
+ [(set_attr "itanium_class" "ilog,fmisc")])
+
+(define_insn "neg<mode>2"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+ (neg:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")))]
+ ""
+ "psub<vecsize> %0 = r0, %1"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "add<mode>3"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+ (plus:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")
+ (match_operand:VECINT 2 "gr_register_operand" "r")))]
+ ""
+ "padd<vecsize> %0 = %1, %2"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*ssadd<mode>3"
+ [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+ (ss_plus:VECINT12
+ (match_operand:VECINT12 1 "gr_register_operand" "r")
+ (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+ ""
+ "padd<vecsize>.sss %0 = %1, %2"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*usadd<mode>3"
+ [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+ (us_plus:VECINT12
+ (match_operand:VECINT12 1 "gr_register_operand" "r")
+ (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+ ""
+ "padd<vecsize>.uuu %0 = %1, %2"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "sub<mode>3"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+ (minus:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")
+ (match_operand:VECINT 2 "gr_register_operand" "r")))]
+ ""
+ "psub<vecsize> %0 = %1, %2"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*sssub<mode>3"
+ [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+ (ss_minus:VECINT12
+ (match_operand:VECINT12 1 "gr_register_operand" "r")
+ (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+ ""
+ "psub<vecsize>.sss %0 = %1, %2"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*ussub<mode>3"
+ [(set (match_operand:VECINT12 0 "gr_register_operand" "=r")
+ (us_minus:VECINT12
+ (match_operand:VECINT12 1 "gr_register_operand" "r")
+ (match_operand:VECINT12 2 "gr_register_operand" "r")))]
+ ""
+ "psub<vecsize>.uuu %0 = %1, %2"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_expand "mulv8qi3"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "")
+ (mult:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+ (match_operand:V8QI 2 "gr_register_operand" "r")))]
+ ""
+{
+ rtx l1, h1, l2, h2, lm, hm, lz, hz;
+
+ l1 = gen_reg_rtx (V4HImode);
+ h1 = gen_reg_rtx (V4HImode);
+ l2 = gen_reg_rtx (V4HImode);
+ h2 = gen_reg_rtx (V4HImode);
+
+ /* Zero-extend the QImode elements into two words of HImode elements. */
+ emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1),
+ operands[1], CONST0_RTX (V8QImode)));
+ emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2),
+ operands[2], CONST0_RTX (V8QImode)));
+ emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1),
+ operands[1], CONST0_RTX (V8QImode)));
+ emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2),
+ operands[2], CONST0_RTX (V8QImode)));
+
+ /* Multiply. */
+ lm = gen_reg_rtx (V4HImode);
+ hm = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mulv4hi3 (lm, l1, l2));
+ emit_insn (gen_mulv4hi3 (hm, h1, h2));
+
+ /* Zap the high order bytes of the HImode elements. There are several
+ ways that this could be done. On Itanium2, there's 1 cycle latency
+ moving between the ALU units and the PALU units, so using AND would
+ be 3 cycles latency into the eventual pack insn, whereas using MIX
+ is only 2 cycles. */
+ lz = gen_reg_rtx (V4HImode);
+ hz = gen_reg_rtx (V4HImode);
+ emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
+ gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
+ emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
+ gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
+
+ /* Repack the HImode elements as QImode elements. */
+ emit_insn (gen_pack2_sss (operands[0], lz, hz));
+ DONE;
+})
+
+(define_insn "mulv4hi3"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (mult:V4HI (match_operand:V4HI 1 "gr_register_operand" "r")
+ (match_operand:V4HI 2 "gr_register_operand" "r")))]
+ ""
+ "pmpyshr2 %0 = %1, %2, 0"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_expand "umax<mode>3"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "")
+ (smax:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
+ (match_operand:VECINT 2 "gr_register_operand" "")))]
+ ""
+{
+ if (ia64_expand_vecint_minmax (UMAX, <MODE>mode, operands))
+ DONE;
+})
+
+(define_expand "smax<mode>3"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "")
+ (smax:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+ (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+ ""
+{
+ if (ia64_expand_vecint_minmax (SMAX, <MODE>mode, operands))
+ DONE;
+})
+
+(define_expand "umin<mode>3"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "")
+ (umin:VECINT (match_operand:VECINT 1 "gr_register_operand" "")
+ (match_operand:VECINT 2 "gr_register_operand" "")))]
+ ""
+{
+ if (ia64_expand_vecint_minmax (UMIN, <MODE>mode, operands))
+ DONE;
+})
+
+(define_expand "smin<mode>3"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "")
+ (smin:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+ (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+ ""
+{
+ if (ia64_expand_vecint_minmax (SMIN, <MODE>mode, operands))
+ DONE;
+})
+
+(define_insn "*umaxv8qi3"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (umax:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+ (match_operand:V8QI 2 "gr_register_operand" "r")))]
+ ""
+ "pmax1.u %0 = %1, %2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*smaxv4hi3"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (smax:V4HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")))]
+ ""
+ "pmax2 %0 = %r1, %r2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*uminv8qi3"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (umin:V8QI (match_operand:V8QI 1 "gr_register_operand" "r")
+ (match_operand:V8QI 2 "gr_register_operand" "r")))]
+ ""
+ "pmin1.u %0 = %1, %2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*sminv4hi3"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (smin:V4HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")))]
+ ""
+ "pmin2 %0 = %r1, %r2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "ashl<mode>3"
+ [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+ (ashift:VECINT24
+ (match_operand:VECINT24 1 "gr_register_operand" "r")
+ (match_operand:VECINT24 2 "gr_reg_or_5bit_operand" "rn")))]
+ ""
+ "pshl<vecsize> %0 = %1, %2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "ashr<mode>3"
+ [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+ (ashiftrt:VECINT24
+ (match_operand:VECINT24 1 "gr_register_operand" "r")
+ (match_operand:VECINT24 2 "gr_reg_or_5bit_operand" "rn")))]
+ ""
+ "pshr<vecsize> %0 = %1, %2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "lshr<mode>3"
+ [(set (match_operand:VECINT24 0 "gr_register_operand" "=r")
+ (lshiftrt:VECINT24
+ (match_operand:VECINT24 1 "gr_register_operand" "r")
+ (match_operand:VECINT24 2 "gr_reg_or_5bit_operand" "rn")))]
+ ""
+ "pshr<vecsize>.u %0 = %1, %2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vcond<mode>"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "")
+ (if_then_else:VECINT
+ (match_operator 3 ""
+ [(match_operand:VECINT 4 "gr_reg_or_0_operand" "")
+ (match_operand:VECINT 5 "gr_reg_or_0_operand" "")])
+ (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+ (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+ ""
+{
+ ia64_expand_vecint_cmov (operands);
+ DONE;
+})
+
+(define_expand "vcondu<mode>"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "")
+ (if_then_else:VECINT
+ (match_operator 3 ""
+ [(match_operand:VECINT 4 "gr_reg_or_0_operand" "")
+ (match_operand:VECINT 5 "gr_reg_or_0_operand" "")])
+ (match_operand:VECINT 1 "gr_reg_or_0_operand" "")
+ (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))]
+ ""
+{
+ ia64_expand_vecint_cmov (operands);
+ DONE;
+})
+
+(define_insn "*cmpeq_<mode>"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+ (eq:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:VECINT 2 "gr_reg_or_0_operand" "rU")))]
+ ""
+ "pcmp<vecsize>.eq %0 = %r1, %r2"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "*cmpgt_<mode>"
+ [(set (match_operand:VECINT 0 "gr_register_operand" "=r")
+ (gt:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:VECINT 2 "gr_reg_or_0_operand" "rU")))]
+ ""
+ "pcmp<vecsize>.gt %0 = %r1, %r2"
+ [(set_attr "itanium_class" "mmalua")])
+
+(define_insn "pack2_sss"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_concat:V8QI
+ (ss_truncate:V4QI
+ (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU"))
+ (ss_truncate:V4QI
+ (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
+ ""
+ "pack2.sss %0 = %r1, %r2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*pack2_uss"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_concat:V8QI
+ (us_truncate:V4QI
+ (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU"))
+ (us_truncate:V4QI
+ (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
+ ""
+ "pack2.uss %0 = %r1, %r2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "pack4_sss"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (vec_concat:V4HI
+ (ss_truncate:V2HI
+ (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU"))
+ (ss_truncate:V2HI
+ (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))]
+ ""
+ "pack4.sss %0 = %r1, %r2"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "unpack1_l"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 8)
+ (const_int 9)
+ (const_int 10)
+ (const_int 11)])))]
+ ""
+ "unpack1.l %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "unpack1_h"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)
+ (const_int 12)
+ (const_int 13)
+ (const_int 14)
+ (const_int 15)])))]
+ ""
+ "unpack1.h %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "mix1_r"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 0)
+ (const_int 8)
+ (const_int 2)
+ (const_int 10)
+ (const_int 4)
+ (const_int 12)
+ (const_int 6)
+ (const_int 14)])))]
+ ""
+ "mix1.r %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mix1_l"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 1)
+ (const_int 9)
+ (const_int 3)
+ (const_int 11)
+ (const_int 5)
+ (const_int 13)
+ (const_int 7)
+ (const_int 15)])))]
+ ""
+ "mix1.l %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_rev"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (match_operand:V8QI 1 "gr_register_operand" "r")
+ (parallel [(const_int 7)
+ (const_int 6)
+ (const_int 5)
+ (const_int 4)
+ (const_int 3)
+ (const_int 2)
+ (const_int 1)
+ (const_int 0)])))]
+ ""
+ "mux1 %0 = %1, @rev"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_mix"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (match_operand:V8QI 1 "gr_register_operand" "r")
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 2)
+ (const_int 6)
+ (const_int 1)
+ (const_int 5)
+ (const_int 3)
+ (const_int 7)])))]
+ ""
+ "mux1 %0 = %1, @mix"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_shuf"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (match_operand:V8QI 1 "gr_register_operand" "r")
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 1)
+ (const_int 5)
+ (const_int 2)
+ (const_int 6)
+ (const_int 3)
+ (const_int 7)])))]
+ ""
+ "mux1 %0 = %1, @shuf"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_alt"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (match_operand:V8QI 1 "gr_register_operand" "r")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))]
+ ""
+ "mux1 %0 = %1, @alt"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_brcst_v8qi"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_select:V8QI
+ (match_operand:V8QI 1 "gr_register_operand" "r")
+ (parallel [(const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)])))]
+ ""
+ "mux1 %0 = %1, @brcst"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux1_brcst_qi"
+ [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
+ (vec_duplicate:V8QI
+ (match_operand:QI 1 "gr_register_operand" "r")))]
+ ""
+ "mux1 %0 = %1, @brcst"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "unpack2_l"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 1)
+ (const_int 5)])))]
+ ""
+ "unpack2.l %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "unpack2_h"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 2)
+ (const_int 6)
+ (const_int 3)
+ (const_int 7)])))]
+ ""
+ "unpack2.h %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mix2_r"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 2)
+ (const_int 6)])))]
+ ""
+ "mix2.r %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mix2_l"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 1)
+ (const_int 5)
+ (const_int 3)
+ (const_int 7)])))]
+ ""
+ "mix2.l %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux2"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (vec_select:V4HI
+ (match_operand:V4HI 1 "gr_register_operand" "r")
+ (parallel [(match_operand 2 "const_int_2bit_operand" "")
+ (match_operand 3 "const_int_2bit_operand" "")
+ (match_operand 4 "const_int_2bit_operand" "")
+ (match_operand 5 "const_int_2bit_operand" "")])))]
+ ""
+{
+ int mask;
+ mask = INTVAL (operands[2]);
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+ return "%,mux2 %0 = %1, %2";
+}
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_insn "*mux2_brcst_hi"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
+ (vec_duplicate:V4HI
+ (match_operand:HI 1 "gr_register_operand" "r")))]
+ ""
+ "mux2 %0 = %1, 0"
+ [(set_attr "itanium_class" "mmshf")])
+
+;; Note that mix4.r performs the exact same operation.
+(define_insn "*unpack4_l"
+ [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+ (vec_select:V2SI
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ ""
+ "unpack4.l %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+;; Note that mix4.l performs the exact same operation.
+(define_insn "*unpack4_h"
+ [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+ (vec_select:V2SI
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
+ (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ ""
+ "unpack4.h %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+(define_expand "vec_initv2si"
+ [(match_operand:V2SF 0 "gr_register_operand" "")
+ (match_operand 1 "" "")]
+ ""
+{
+ rtx op1 = XVECEXP (operands[1], 0, 0);
+ rtx op2 = XVECEXP (operands[1], 0, 1);
+ rtx x;
+
+ if (GET_CODE (op1) == CONST_INT && GET_CODE (op2) == CONST_INT)
+ {
+ x = gen_rtx_CONST_VECTOR (V2SImode, XVEC (operands[1], 0));
+ emit_move_insn (operands[0], x);
+ DONE;
+ }
+
+ if (!gr_reg_or_0_operand (op1, SImode))
+ op1 = force_reg (SImode, op1);
+ if (!gr_reg_or_0_operand (op2, SImode))
+ op2 = force_reg (SImode, op2);
+
+ x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+ DONE;
+})
+
+(define_insn "*vecinit_v2si"
+ [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "gr_reg_or_0_operand" "rO")
+ (match_operand:SI 2 "gr_reg_or_0_operand" "rO")))]
+ ""
+ "unpack4.l %0 = %r2, %r1"
+ [(set_attr "itanium_class" "mmshf")])
+
+;; Missing operations
+;; padd.uus
+;; pavg
+;; pavgsub
+;; pmpy
+;; pmpyshr, general form
+;; psad
+;; pshladd
+;; pshradd
+;; psub.uus
+;; vec_set<mode>
+;; vec_extract<mode>
+;; vec_init<mode>
+
+;; Floating point vector operations
+
+(define_expand "movv2sf"
+ [(set (match_operand:V2SF 0 "general_operand" "")
+ (match_operand:V2SF 1 "general_operand" ""))]
+ ""
+{
+ rtx op1 = ia64_expand_move (operands[0], operands[1]);
+ if (!op1)
+ DONE;
+ operands[1] = op1;
+})
+
+(define_insn "*movv2sf_internal"
+ [(set (match_operand:V2SF 0 "destination_operand"
+ "=f,f,f,Q,*r ,*r,*r,*r,m ,f ,*r")
+ (match_operand:V2SF 1 "move_operand"
+ "fU,Y,Q,f,U*r,W ,i ,m ,*r,*r,f "))]
+ "ia64_move_ok (operands[0], operands[1])"
+{
+ static const char * const alt[] = {
+ "%,mov %0 = %F1",
+ "%,fpack %0 = %F2, %F1",
+ "%,ldf8 %0 = %1%P1",
+ "%,stf8 %0 = %1%P0",
+ "%,mov %0 = %r1",
+ "%,addl %0 = %v1, r0",
+ "%,movl %0 = %v1",
+ "%,ld8%O1 %0 = %1%P1",
+ "%,st8%Q0 %0 = %r1%P0",
+ "%,setf.sig %0 = %1",
+ "%,getf.sig %0 = %1"
+ };
+
+ if (which_alternative == 1)
+ {
+ operands[2] = XVECEXP (operands[1], 0, 1);
+ operands[1] = XVECEXP (operands[1], 0, 0);
+ }
+
+ return alt[which_alternative];
+}
+ [(set_attr "itanium_class" "fmisc,fmisc,fld,stf,ialu,ialu,long_i,ld,st,tofr,frfr")])
+
+(define_insn "absv2sf2"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (abs:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")))]
+ ""
+ "fpabs %0 = %1"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "negv2sf2"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (neg:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")))]
+ ""
+ "fpneg %0 = %1"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*negabsv2sf2"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (neg:V2SF
+ (abs:V2SF (match_operand:V2SF 1 "fr_register_operand" "f"))))]
+ ""
+ "fpnegabs %0 = %1"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "addv2sf3"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "")
+ (plus:V2SF
+ (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "")
+ (match_dup 3))
+ (match_operand:V2SF 2 "fr_register_operand" "")))]
+ ""
+{
+ rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode));
+ operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v));
+})
+
+(define_expand "subv2sf3"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "")
+ (minus:V2SF
+ (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "")
+ (match_dup 3))
+ (match_operand:V2SF 2 "fr_register_operand" "")))]
+ ""
+{
+ rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode));
+ operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v));
+})
+
+(define_insn "mulv2sf3"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+ (match_operand:V2SF 2 "fr_register_operand" "f")))]
+ ""
+ "fpmpy %0 = %1, %2"
+ [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fpma"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (plus:V2SF
+ (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+ (match_operand:V2SF 2 "fr_register_operand" "f"))
+ (match_operand:V2SF 3 "fr_register_operand" "f")))]
+ ""
+ "fpma %0 = %1, %2, %3"
+ [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fpms"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (minus:V2SF
+ (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+ (match_operand:V2SF 2 "fr_register_operand" "f"))
+ (match_operand:V2SF 3 "fr_register_operand" "f")))]
+ ""
+ "fpms %0 = %1, %2, %3"
+ [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fpnmpy"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (neg:V2SF
+ (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+ (match_operand:V2SF 2 "fr_register_operand" "f"))))]
+ ""
+ "fpnmpy %0 = %1, %2"
+ [(set_attr "itanium_class" "fmac")])
+
+(define_insn "*fpnma"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (plus:V2SF
+ (neg:V2SF
+ (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+ (match_operand:V2SF 2 "fr_register_operand" "f")))
+ (match_operand:V2SF 3 "fr_register_operand" "f")))]
+ ""
+ "fpnma %0 = %1, %2, %3"
+ [(set_attr "itanium_class" "fmac")])
+
+(define_insn "smaxv2sf2"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (smax:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+ (match_operand:V2SF 2 "fr_register_operand" "f")))]
+ ""
+ "fpmax %0 = %1, %2"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "sminv2sf2"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (smin:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
+ (match_operand:V2SF 2 "fr_register_operand" "f")))]
+ ""
+ "fpmin %0 = %1, %2"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "vcondv2sf"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "")
+ (if_then_else:V2SF
+ (match_operator 3 ""
+ [(match_operand:V2SF 4 "fr_reg_or_0_operand" "")
+ (match_operand:V2SF 5 "fr_reg_or_0_operand" "")])
+ (match_operand:V2SF 1 "fr_reg_or_0_operand" "")
+ (match_operand:V2SF 2 "fr_reg_or_0_operand" "")))]
+ ""
+{
+ rtx x, cmp;
+
+ PUT_MODE (operands[3], V2SFmode);
+ switch (GET_CODE (operands[3]))
+ {
+ case EQ:
+ case NE:
+ case LT:
+ case LE:
+ case UNORDERED:
+ case ORDERED:
+ break;
+
+ case GT:
+ case GE:
+ x = XEXP (operands[3], 0);
+ XEXP (operands[3], 0) = XEXP (operands[3], 1);
+ XEXP (operands[3], 1) = x;
+ PUT_CODE (operands[3], swap_condition (GET_CODE (operands[3])));
+ break;
+
+ default:
+ abort ();
+ }
+
+ cmp = gen_reg_rtx (V2SFmode);
+ emit_insn (gen_rtx_SET (VOIDmode, cmp, operands[3]));
+
+ x = gen_rtx_IF_THEN_ELSE (V2SFmode, cmp, operands[1], operands[2]);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+ DONE;
+})
+
+(define_insn "*fpcmp"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (if_then_else:V2SF
+ (match_operand:V2SF 1 "fr_register_operand" "f")
+ (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")
+ (match_operand:V2SF 3 "fr_reg_or_0_operand" "fU")))]
+ ""
+ "fselect %0 = %2, %3, %1"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "vec_initv2sf"
+ [(match_operand:V2SF 0 "fr_register_operand" "")
+ (match_operand 1 "" "")]
+ ""
+{
+ rtx op1 = XVECEXP (operands[1], 0, 0);
+ rtx op2 = XVECEXP (operands[1], 0, 1);
+ rtx x;
+
+ if (GET_CODE (op1) == CONST_DOUBLE && GET_CODE (op2) == CONST_DOUBLE)
+ {
+ x = gen_rtx_CONST_VECTOR (V2SFmode, XVEC (operands[1], 0));
+ emit_move_insn (operands[0], x);
+ DONE;
+ }
+
+ if (!fr_reg_or_fp01_operand (op1, SFmode))
+ op1 = force_reg (SFmode, op1);
+ if (!fr_reg_or_fp01_operand (op2, SFmode))
+ op2 = force_reg (SFmode, op2);
+
+ x = gen_rtx_VEC_CONCAT (V2SFmode, op1, op2);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+ DONE;
+})
+
+(define_insn "*fpack_sfsf"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+ (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+ ""
+ "fpack %0 = %F2, %F1"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*fpack_sfxf"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
+ (float_truncate:SF
+ (match_operand 2 "fr_register_operand" "f"))))]
+ "GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode"
+ "fpack %0 = %2, %F1"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*fpack_xfsf"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (vec_concat:V2SF
+ (float_truncate:SF
+ (match_operand 1 "fr_register_operand" "f"))
+ (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
+ "GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode"
+ "fpack %0 = %F2, %1"
+ [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*fpack_xfxf"
+ [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+ (vec_concat:V2SF
+ (float_truncate:SF
+ (match_operand 1 "fr_register_operand" "f"))
+ (float_truncate:SF
+ (match_operand 2 "fr_register_operand" "f"))))]
+ "(GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode)
+ && (GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode)"
+ "fpack %0 = %2, %1"
+ [(set_attr "itanium_class" "fmisc")])
+
+;; Missing operations
+;; fprcpa
+;; fpsqrta
+;; vec_setv2sf
+;; vec_extractv2sf
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8c4ea6a4951..56e4aed962d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,4 +1,14 @@
+2005-01-03 Richard Henderson <rth@redhat.com>
+
+ * gcc.dg/vect/vect.exp: Enable for ia64.
+ * lib/target-supports.exp (check_effective_target_vect_int): Likewise.
+ (check_effective_target_vect_float): Likewise.
+ (check_effective_target_vect_no_align): Likewise.
+ * gcc.dg/vect/vect-30.c: XFAIL for vect_no_align.
+ * gcc.dg/vect/vect-8.c: Likewise.
+
2005-01-03 Uros Bizjak <uros@kss-loka.si>
+
PR target/19235
* gcc.dg/pr19236-1.c: New test case.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-30.c b/gcc/testsuite/gcc.dg/vect/vect-30.c
index 056d689c81c..c6f03ed2121 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-30.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-30.c
@@ -59,4 +59,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* Need misalignment support, or cgraph to delay emitting the arrays until
+ after vectorization can force-align them. */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-8.c b/gcc/testsuite/gcc.dg/vect/vect-8.c
index 960eb34c0df..7712a02cfab 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-8.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-8.c
@@ -34,4 +34,6 @@ int main (void)
return main1 (N);
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* Need misalignment support, or cgraph to delay emitting the arrays until
+ after vectorization can force-align them. */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp
index 14e459783a9..94fd56c588d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect.exp
+++ b/gcc/testsuite/gcc.dg/vect/vect.exp
@@ -65,6 +65,8 @@ if [istarget "powerpc*-*-*"] {
} else {
set dg-do-what-default compile
}
+} elseif [istarget "ia64-*-*"] {
+ set dg-do-what-default run
} else {
return
}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 19d95afa271..defb4d4e75d 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -455,7 +455,8 @@ proc check_effective_target_vect_int { } {
|| [istarget powerpc*-*-*]
|| [istarget x86_64-*-*]
|| [istarget sparc*-*-*]
- || [istarget alpha*-*-*] } {
+ || [istarget alpha*-*-*]
+ || [istarget ia64-*-*] } {
set et_vect_int_saved 1
}
}
@@ -496,7 +497,8 @@ proc check_effective_target_vect_float { } {
if { [istarget i?86-*-*]
|| [istarget powerpc*-*-*]
|| [istarget mipsisa64*-*-*]
- || [istarget x86_64-*-*] } {
+ || [istarget x86_64-*-*]
+ || [istarget ia64-*-*] } {
set et_vect_float_saved 1
}
}
@@ -583,7 +585,8 @@ proc check_effective_target_vect_no_align { } {
} else {
set et_vect_no_align_saved 0
if { [istarget mipsisa64*-*-*]
- || [istarget sparc*-*-*] } {
+ || [istarget sparc*-*-*]
+ || [istarget ia64-*-*] } {
set et_vect_no_align_saved 1
}
}