summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2008-09-02 21:27:57 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2008-09-02 21:27:57 +0000
commitd93d495ba54e9746659d91d69119157f038a815f (patch)
treebfc67c88ff54c4880beb663c32eedf5f95f15123 /gcc/config
parente827aa43033edbfc6bac3fa2ff04421737b78421 (diff)
downloadgcc-d93d495ba54e9746659d91d69119157f038a815f.tar.gz
2008-09-02 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk r139912 after graphite merge into trunk graphite uses PPL & CLOOG... git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@139915 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/alpha/alpha.c196
-rw-r--r--gcc/config/alpha/alpha.h2
-rw-r--r--gcc/config/arm/arm-cores.def1
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm-tune.md2
-rw-r--r--gcc/config/arm/arm.c114
-rw-r--r--gcc/config/arm/arm.h13
-rw-r--r--gcc/config/arm/arm.md48
-rw-r--r--gcc/config/arm/arm.opt4
-rw-r--r--gcc/config/arm/arm1020e.md4
-rw-r--r--gcc/config/arm/bpabi.h6
-rw-r--r--gcc/config/arm/cortex-a8-neon.md4
-rw-r--r--gcc/config/arm/cortex-a9.md65
-rw-r--r--gcc/config/arm/cortex-r4.md36
-rw-r--r--gcc/config/arm/cortex-r4f.md161
-rw-r--r--gcc/config/arm/symbian.h2
-rw-r--r--gcc/config/arm/thumb2.md5
-rw-r--r--gcc/config/arm/uclinux-elf.h2
-rw-r--r--gcc/config/arm/vfp.md59
-rw-r--r--gcc/config/arm/vfp11.md4
-rw-r--r--gcc/config/arm/vxworks.h3
-rw-r--r--gcc/config/bfin/bfin.h2
-rw-r--r--gcc/config/cris/cris.h2
-rw-r--r--gcc/config/h8300/h8300.h4
-rw-r--r--gcc/config/i386/i386.c45
-rw-r--r--gcc/config/i386/i386.h10
-rw-r--r--gcc/config/ia64/ia64.c13
-rw-r--r--gcc/config/m68hc11/m68hc11.h2
-rw-r--r--gcc/config/mips/mips.h12
-rw-r--r--gcc/config/mn10300/mn10300.h2
-rw-r--r--gcc/config/pa/pa-hpux11.h5
-rw-r--r--gcc/config/pa/pa.h2
-rw-r--r--gcc/config/pa/pa.md8
-rw-r--r--gcc/config/pa/pa64-hpux.h22
-rw-r--r--gcc/config/rs6000/rs6000.c95
-rw-r--r--gcc/config/s390/s390.h2
-rw-r--r--gcc/config/sparc/sparc.h2
-rw-r--r--gcc/config/spu/spu.c2
-rw-r--r--gcc/config/spu/spu.h2
-rw-r--r--gcc/config/spu/spu.md52
-rw-r--r--gcc/config/v850/v850.h2
41 files changed, 731 insertions, 287 deletions
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index cad90e1adce..a4d3bf1f150 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -5803,38 +5803,34 @@ alpha_build_builtin_va_list (void)
/* Helper function for alpha_stdarg_optimize_hook. Skip over casts
and constant additions. */
-static tree
+static gimple
va_list_skip_additions (tree lhs)
{
- tree rhs, stmt;
-
- if (TREE_CODE (lhs) != SSA_NAME)
- return lhs;
+ gimple stmt;
for (;;)
{
+ enum tree_code code;
+
stmt = SSA_NAME_DEF_STMT (lhs);
- if (TREE_CODE (stmt) == PHI_NODE)
+ if (gimple_code (stmt) == GIMPLE_PHI)
return stmt;
- if (TREE_CODE (stmt) != MODIFY_EXPR
- || TREE_OPERAND (stmt, 0) != lhs)
- return lhs;
-
- rhs = TREE_OPERAND (stmt, 1);
- if (TREE_CODE (rhs) == WITH_SIZE_EXPR)
- rhs = TREE_OPERAND (rhs, 0);
+ if (!is_gimple_assign (stmt)
+ || gimple_assign_lhs (stmt) != lhs)
+ return NULL;
- if (((!CONVERT_EXPR_P (rhs))
- && ((TREE_CODE (rhs) != PLUS_EXPR
- && TREE_CODE (rhs) != POINTER_PLUS_EXPR)
- || TREE_CODE (TREE_OPERAND (rhs, 1)) != INTEGER_CST
- || !host_integerp (TREE_OPERAND (rhs, 1), 1)))
- || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
- return rhs;
+ if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
+ return stmt;
+ code = gimple_assign_rhs_code (stmt);
+ if (!CONVERT_EXPR_CODE_P (code)
+ && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
+ || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
+ || !host_integerp (gimple_assign_rhs2 (stmt), 1)))
+ return stmt;
- lhs = TREE_OPERAND (rhs, 0);
+ lhs = gimple_assign_rhs1 (stmt);
}
}
@@ -5859,36 +5855,49 @@ va_list_skip_additions (tree lhs)
static bool
alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
{
- tree base, offset, arg1, arg2;
+ tree base, offset, rhs;
int offset_arg = 1;
+ gimple base_stmt;
-#if 1
- /* FIXME tuples. */
- (void) si;
- (void) stmt;
- return false;
-#else
+ if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+ != GIMPLE_SINGLE_RHS)
+ return false;
+
+ rhs = gimple_assign_rhs1 (stmt);
while (handled_component_p (rhs))
rhs = TREE_OPERAND (rhs, 0);
if (TREE_CODE (rhs) != INDIRECT_REF
|| TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
return false;
- lhs = va_list_skip_additions (TREE_OPERAND (rhs, 0));
- if (lhs == NULL_TREE
- || TREE_CODE (lhs) != POINTER_PLUS_EXPR)
+ stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
+ if (stmt == NULL
+ || !is_gimple_assign (stmt)
+ || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
return false;
- base = TREE_OPERAND (lhs, 0);
+ base = gimple_assign_rhs1 (stmt);
if (TREE_CODE (base) == SSA_NAME)
- base = va_list_skip_additions (base);
+ {
+ base_stmt = va_list_skip_additions (base);
+ if (base_stmt
+ && is_gimple_assign (base_stmt)
+ && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+ base = gimple_assign_rhs1 (base_stmt);
+ }
if (TREE_CODE (base) != COMPONENT_REF
|| TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
{
- base = TREE_OPERAND (lhs, 0);
+ base = gimple_assign_rhs2 (stmt);
if (TREE_CODE (base) == SSA_NAME)
- base = va_list_skip_additions (base);
+ {
+ base_stmt = va_list_skip_additions (base);
+ if (base_stmt
+ && is_gimple_assign (base_stmt)
+ && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+ base = gimple_assign_rhs1 (base_stmt);
+ }
if (TREE_CODE (base) != COMPONENT_REF
|| TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
@@ -5902,55 +5911,88 @@ alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
|| !bitmap_bit_p (si->va_list_vars, DECL_UID (base)))
return false;
- offset = TREE_OPERAND (lhs, offset_arg);
+ offset = gimple_op (stmt, 1 + offset_arg);
if (TREE_CODE (offset) == SSA_NAME)
- offset = va_list_skip_additions (offset);
-
- if (TREE_CODE (offset) == PHI_NODE)
{
- HOST_WIDE_INT sub;
-
- if (PHI_NUM_ARGS (offset) != 2)
- goto escapes;
+ gimple offset_stmt = va_list_skip_additions (offset);
- arg1 = va_list_skip_additions (PHI_ARG_DEF (offset, 0));
- arg2 = va_list_skip_additions (PHI_ARG_DEF (offset, 1));
- if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR)
+ if (offset_stmt
+ && gimple_code (offset_stmt) == GIMPLE_PHI)
{
- tree tem = arg1;
- arg1 = arg2;
- arg2 = tem;
+ HOST_WIDE_INT sub;
+ gimple arg1_stmt, arg2_stmt;
+ tree arg1, arg2;
+ enum tree_code code1, code2;
- if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR)
+ if (gimple_phi_num_args (offset_stmt) != 2)
goto escapes;
- }
- if (!host_integerp (TREE_OPERAND (arg2, 1), 0))
- goto escapes;
- sub = tree_low_cst (TREE_OPERAND (arg2, 1), 0);
- if (TREE_CODE (arg2) == MINUS_EXPR)
- sub = -sub;
- if (sub < -48 || sub > -32)
- goto escapes;
+ arg1_stmt
+ = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
+ arg2_stmt
+ = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
+ if (arg1_stmt == NULL
+ || !is_gimple_assign (arg1_stmt)
+ || arg2_stmt == NULL
+ || !is_gimple_assign (arg2_stmt))
+ goto escapes;
- arg2 = va_list_skip_additions (TREE_OPERAND (arg2, 0));
- if (arg1 != arg2)
- goto escapes;
+ code1 = gimple_assign_rhs_code (arg1_stmt);
+ code2 = gimple_assign_rhs_code (arg2_stmt);
+ if (code1 == COMPONENT_REF
+ && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
+ /* Do nothing. */;
+ else if (code2 == COMPONENT_REF
+ && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
+ {
+ gimple tem = arg1_stmt;
+ code2 = code1;
+ arg1_stmt = arg2_stmt;
+ arg2_stmt = tem;
+ }
+ else
+ goto escapes;
- if (TREE_CODE (arg1) == SSA_NAME)
- arg1 = va_list_skip_additions (arg1);
+ if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0))
+ goto escapes;
- if (TREE_CODE (arg1) != COMPONENT_REF
- || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
- || get_base_address (arg1) != base)
- goto escapes;
+ sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0);
+ if (code2 == MINUS_EXPR)
+ sub = -sub;
+ if (sub < -48 || sub > -32)
+ goto escapes;
- /* Need floating point regs. */
- cfun->va_list_fpr_size |= 2;
+ arg1 = gimple_assign_rhs1 (arg1_stmt);
+ arg2 = gimple_assign_rhs1 (arg2_stmt);
+ if (TREE_CODE (arg2) == SSA_NAME)
+ {
+ arg2_stmt = va_list_skip_additions (arg2);
+ if (arg2_stmt == NULL
+ || !is_gimple_assign (arg2_stmt)
+ || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
+ goto escapes;
+ arg2 = gimple_assign_rhs1 (arg2_stmt);
+ }
+ if (arg1 != arg2)
+ goto escapes;
+
+ if (TREE_CODE (arg1) != COMPONENT_REF
+ || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
+ || get_base_address (arg1) != base)
+ goto escapes;
+
+ /* Need floating point regs. */
+ cfun->va_list_fpr_size |= 2;
+ return false;
+ }
+ if (offset_stmt
+ && is_gimple_assign (offset_stmt)
+ && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
+ offset = gimple_assign_rhs1 (offset_stmt);
}
- else if (TREE_CODE (offset) != COMPONENT_REF
- || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
- || get_base_address (offset) != base)
+ if (TREE_CODE (offset) != COMPONENT_REF
+ || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
+ || get_base_address (offset) != base)
goto escapes;
else
/* Need general regs. */
@@ -5960,7 +6002,6 @@ alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
escapes:
si->va_list_escapes = true;
return false;
-#endif
}
#endif
@@ -6126,10 +6167,11 @@ alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
}
static tree
-alpha_gimplify_va_arg_1 (tree type, tree base, gimple_seq offset,
+alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
gimple_seq *pre_p)
{
- tree type_size, ptr_type, addend, t, addr, internal_post;
+ tree type_size, ptr_type, addend, t, addr;
+ gimple_seq internal_post;
/* If the type could not be passed in registers, skip the block
reserved for the registers. */
@@ -6177,7 +6219,7 @@ alpha_gimplify_va_arg_1 (tree type, tree base, gimple_seq offset,
fold_convert (sizetype, addend));
internal_post = NULL;
gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
- append_to_statement_list (internal_post, pre_p);
+ gimple_seq_add_seq (pre_p, internal_post);
/* Update the offset field. */
type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
@@ -6230,7 +6272,7 @@ alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
/* Stuff the offset temporary back into its field. */
- gimplify_assign (offset_field,
+ gimplify_assign (unshare_expr (offset_field),
fold_convert (TREE_TYPE (offset_field), offset), pre_p);
if (indirect)
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
index 8e022d6a5f8..c462f71fb1d 100644
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@@ -1079,7 +1079,7 @@ do { \
Without byte/word accesses, we want no more than four instructions;
with, several single byte accesses are better. */
-#define MOVE_RATIO (TARGET_BWX ? 7 : 2)
+#define MOVE_RATIO(speed) (TARGET_BWX ? 7 : 2)
/* Largest number of bytes of an object that can be placed in a register.
On the Alpha we have plenty of registers, so use TImode. */
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index c7e425b0c7f..fe2f2b53792 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -116,6 +116,7 @@ ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e)
ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e)
ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e)
ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e)
+ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, 9e)
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e)
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e)
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index d0e408ccf1d..ab08ef446f6 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -123,6 +123,7 @@ extern const char *fp_immediate_constant (rtx);
extern void arm_emit_call_insn (rtx, rtx);
extern const char *output_call (rtx *);
extern const char *output_call_mem (rtx *);
+void arm_emit_movpair (rtx, rtx);
extern const char *output_mov_long_double_fpa_from_arm (rtx *);
extern const char *output_mov_long_double_arm_from_fpa (rtx *);
extern const char *output_mov_long_double_arm_from_arm (rtx *);
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index ee5606b04cb..beb8f9f4173 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from arm-cores.def
(define_attr "tune"
- "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexr4f,cortexm3,cortexm1"
+ "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1"
(const (symbol_ref "arm_tune")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6a4b3ef11cd..ec28f79446f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -544,6 +544,9 @@ int arm_tune_xscale = 0;
This typically means an ARM6 or ARM7 with MMU or MPU. */
int arm_tune_wbuf = 0;
+/* Nonzero if tuning for Cortex-A9. */
+int arm_tune_cortex_a9 = 0;
+
/* Nonzero if generating Thumb instructions. */
int thumb_code = 0;
@@ -1186,12 +1189,30 @@ arm_override_options (void)
tune_flags = all_cores[(int)arm_tune].flags;
+ if (target_abi_name)
+ {
+ for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
+ {
+ if (streq (arm_all_abis[i].name, target_abi_name))
+ {
+ arm_abi = arm_all_abis[i].abi_type;
+ break;
+ }
+ }
+ if (i == ARRAY_SIZE (arm_all_abis))
+ error ("invalid ABI option: -mabi=%s", target_abi_name);
+ }
+ else
+ arm_abi = ARM_DEFAULT_ABI;
+
/* Make sure that the processor choice does not conflict with any of the
other command line choices. */
if (TARGET_ARM && !(insn_flags & FL_NOTM))
error ("target CPU does not support ARM mode");
- if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
+ /* BPABI targets use linker tricks to allow interworking on cores
+ without thumb support. */
+ if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
{
warning (0, "target CPU does not support interworking" );
target_flags &= ~MASK_INTERWORK;
@@ -1271,6 +1292,7 @@ arm_override_options (void)
arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
+ arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
/* If we are not using the default (ARM mode) section anchor offset
ranges, then set the correct ranges now. */
@@ -1304,22 +1326,6 @@ arm_override_options (void)
if (arm_arch5)
target_flags &= ~MASK_INTERWORK;
- if (target_abi_name)
- {
- for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
- {
- if (streq (arm_all_abis[i].name, target_abi_name))
- {
- arm_abi = arm_all_abis[i].abi_type;
- break;
- }
- }
- if (i == ARRAY_SIZE (arm_all_abis))
- error ("invalid ABI option: -mabi=%s", target_abi_name);
- }
- else
- arm_abi = ARM_DEFAULT_ABI;
-
if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
@@ -1924,14 +1930,22 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
{
/* Currently SET is the only monadic value for CODE, all
the rest are diadic. */
- emit_set_insn (target, GEN_INT (val));
+ if (TARGET_USE_MOVT)
+ arm_emit_movpair (target, GEN_INT (val));
+ else
+ emit_set_insn (target, GEN_INT (val));
+
return 1;
}
else
{
rtx temp = subtargets ? gen_reg_rtx (mode) : target;
- emit_set_insn (temp, GEN_INT (val));
+ if (TARGET_USE_MOVT)
+ arm_emit_movpair (temp, GEN_INT (val));
+ else
+ emit_set_insn (temp, GEN_INT (val));
+
/* For MINUS, the value is subtracted from, since we never
have subtraction of a constant. */
if (code == MINUS)
@@ -4903,7 +4917,15 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
|| (GET_CODE (XEXP (x, 0)) == SUBREG
&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
? 0 : 8));
- return (1 + ((GET_CODE (XEXP (x, 0)) == REG
+
+ extra_cost = 1;
+ /* Increase the cost of complex shifts because they aren't any faster,
+ and reduce dual issue opportunities. */
+ if (arm_tune_cortex_a9
+ && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
+ extra_cost++;
+
+ return (extra_cost + ((GET_CODE (XEXP (x, 0)) == REG
|| (GET_CODE (XEXP (x, 0)) == SUBREG
&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
? 0 : 4)
@@ -5018,7 +5040,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
&& ((INTVAL (XEXP (XEXP (x, 0), 1)) &
(INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
&& (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
- && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
+ && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1))
+ && !arm_tune_cortex_a9)
|| GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
? 0 : 4));
@@ -5115,6 +5138,10 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
case SYMBOL_REF:
return 6;
+ case HIGH:
+ case LO_SUM:
+ return (outer == SET) ? 1 : -1;
+
case CONST_DOUBLE:
if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
return outer == SET ? 2 : -1;
@@ -5341,6 +5368,13 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
*total = COSTS_N_INSNS (4);
return true;
+ case HIGH:
+ case LO_SUM:
+ /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
+ cost of these slightly. */
+ *total = COSTS_N_INSNS (1) + 1;
+ return true;
+
default:
if (mode != VOIDmode)
*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
@@ -9889,6 +9923,14 @@ output_mov_long_double_arm_from_arm (rtx *operands)
}
+/* Emit a MOVW/MOVT pair. */
+void arm_emit_movpair (rtx dest, rtx src)
+{
+ emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
+ emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
+}
+
+
/* Output a move from arm registers to an fpa registers.
OPERANDS[0] is an fpa register.
OPERANDS[1] is the first registers of an arm register pair. */
@@ -12904,10 +12946,21 @@ arm_print_operand (FILE *stream, rtx x, int code)
}
return;
- /* An integer without a preceding # sign. */
+ /* An integer or symbol address without a preceding # sign. */
case 'c':
- gcc_assert (GET_CODE (x) == CONST_INT);
- fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ switch (GET_CODE (x))
+ {
+ case CONST_INT:
+ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ break;
+
+ case SYMBOL_REF:
+ output_addr_const (stream, x);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
return;
case 'B':
@@ -18235,8 +18288,15 @@ arm_no_early_mul_dep (rtx producer, rtx consumer)
op = XVECEXP (op, 0, 0);
op = XEXP (op, 1);
- return (GET_CODE (op) == PLUS
- && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
+ if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
+ {
+ if (GET_CODE (XEXP (op, 0)) == MULT)
+ return !reg_overlap_mentioned_p (value, XEXP (op, 0));
+ else
+ return !reg_overlap_mentioned_p (value, XEXP (op, 1));
+ }
+
+ return 0;
}
/* We can't rely on the caller doing the proper promotion when
@@ -19017,7 +19077,9 @@ arm_issue_rate (void)
switch (arm_tune)
{
case cortexr4:
+ case cortexr4f:
case cortexa8:
+ case cortexa9:
return 2;
default:
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 4132b06b024..f83aabac038 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -241,6 +241,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
#define TARGET_INT_SIMD \
(TARGET_32BIT && arm_arch6 && arm_arch_notm)
+/* Should MOVW/MOVT be used in preference to a constant pool. */
+#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size)
+
/* We could use unified syntax for arm mode, but for now we just use it
for Thumb-2. */
#define TARGET_UNIFIED_ASM TARGET_THUMB2
@@ -404,6 +407,9 @@ extern int arm_tune_xscale;
/* Nonzero if tuning for stores via the write buffer. */
extern int arm_tune_wbuf;
+/* Nonzero if tuning for Cortex-A9. */
+extern int arm_tune_cortex_a9;
+
/* Nonzero if we should define __THUMB_INTERWORK__ in the
preprocessor.
XXX This is a bit of a hack, it's intended to help work around
@@ -1962,6 +1968,11 @@ typedef struct
SYMBOL's section. */
#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0
+/* Nonzero if all target requires all absolute relocations be R_ARM_ABS32. */
+#ifndef TARGET_DEFAULT_WORD_RELOCATIONS
+#define TARGET_DEFAULT_WORD_RELOCATIONS 0
+#endif
+
/* Nonzero if the constant value X is a legitimate general operand.
It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.
@@ -2244,7 +2255,7 @@ do { \
#define MOVE_MAX 4
#undef MOVE_RATIO
-#define MOVE_RATIO (arm_tune_xscale ? 4 : 2)
+#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2)
/* Define if operations between registers always perform the operation
on the full register even if a narrower mode is specified. */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 0ef91c6a003..1c279095707 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -157,7 +157,7 @@
; Floating Point Unit. If we only have floating point emulation, then there
; is no point in scheduling the floating point insns. (Well, for best
; performance we should try and group them together).
-(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
+(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon"
(const (symbol_ref "arm_fpu_attr")))
; LENGTH of an instruction (in bytes)
@@ -239,7 +239,7 @@
;
(define_attr "type"
- "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult"
+ "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult,fconsts,fconstd,fadds,faddd,ffariths,ffarithd,fcmps,fcmpd,fcpys"
(if_then_else
(eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
(const_string "mult")
@@ -331,18 +331,26 @@
;; Processor type. This is created automatically from arm-cores.def.
(include "arm-tune.md")
+(define_attr "tune_cortexr4" "yes,no"
+ (const (if_then_else
+ (eq_attr "tune" "cortexr4,cortexr4f")
+ (const_string "yes")
+ (const_string "no"))))
+
;; True if the generic scheduling description should be used.
(define_attr "generic_sched" "yes,no"
(const (if_then_else
- (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexr4")
+ (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9")
+ (eq_attr "tune_cortexr4" "yes"))
(const_string "no")
(const_string "yes"))))
(define_attr "generic_vfp" "yes,no"
(const (if_then_else
(and (eq_attr "fpu" "vfp")
- (eq_attr "tune" "!arm1020e,arm1022e,cortexa8"))
+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9")
+ (eq_attr "tune_cortexr4" "no"))
(const_string "yes")
(const_string "no"))))
@@ -352,7 +360,9 @@
(include "arm1026ejs.md")
(include "arm1136jfs.md")
(include "cortex-a8.md")
+(include "cortex-a9.md")
(include "cortex-r4.md")
+(include "cortex-r4f.md")
(include "vfp11.md")
@@ -4814,6 +4824,14 @@
optimize && can_create_pseudo_p ());
DONE;
}
+
+ if (TARGET_USE_MOVT && !target_word_relocations
+ && GET_CODE (operands[1]) == SYMBOL_REF
+ && !flag_pic && !arm_tls_referenced_p (operands[1]))
+ {
+ arm_emit_movpair (operands[0], operands[1]);
+ DONE;
+ }
}
else /* TARGET_THUMB1... */
{
@@ -4874,6 +4892,28 @@
"
)
+;; The ARM LO_SUM and HIGH are backwards - HIGH sets the low bits, and
+;; LO_SUM adds in the high bits. Fortunately these are opaque operations
+;; so this does not matter.
+(define_insn "*arm_movt"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+ (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:SI 2 "general_operand" "i")))]
+ "TARGET_32BIT"
+ "movt%?\t%0, #:upper16:%c2"
+ [(set_attr "predicable" "yes")
+ (set_attr "length" "4")]
+)
+
+(define_insn "*arm_movw"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+ (high:SI (match_operand:SI 1 "general_operand" "i")))]
+ "TARGET_32BIT"
+ "movw%?\t%0, #:lower16:%c1"
+ [(set_attr "predicable" "yes")
+ (set_attr "length" "4")]
+)
+
(define_insn "*arm_movsi_insn"
[(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
(match_operand:SI 1 "general_operand" "rk, I,K,N,mi,rk"))]
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 2b005e42fa4..c8bdcf80f48 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -156,3 +156,7 @@ Assume big endian bytes, little endian words
mvectorize-with-neon-quad
Target Report Mask(NEON_VECTORIZE_QUAD)
Use Neon quad-word (rather than double-word) registers for vectorization
+
+mword-relocations
+Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
++Only generate absolute relocations on word sized values.
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
index ed170c4b170..397ddd5f97c 100644
--- a/gcc/config/arm/arm1020e.md
+++ b/gcc/config/arm/arm1020e.md
@@ -269,12 +269,12 @@
;; first execute state. We model this by using 1020a_e in the first cycle.
(define_insn_reservation "v10_ffarith" 5
(and (eq_attr "vfp10" "yes")
- (eq_attr "type" "ffarith"))
+ (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
"1020a_e+v10_fmac")
(define_insn_reservation "v10_farith" 5
(and (eq_attr "vfp10" "yes")
- (eq_attr "type" "farith"))
+ (eq_attr "type" "faddd,fadds"))
"1020a_e+v10_fmac")
(define_insn_reservation "v10_cvt" 5
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index e28d9ead45e..38be1da261a 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -51,9 +51,11 @@
/* The BPABI integer comparison routines return { -1, 0, 1 }. */
#define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI
+#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}"
+
/* Tell the assembler to build BPABI binaries. */
#undef SUBTARGET_EXTRA_ASM_SPEC
-#define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=4}"
+#define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=4}" TARGET_FIX_V4BX_SPEC
#ifndef SUBTARGET_EXTRA_LINK_SPEC
#define SUBTARGET_EXTRA_LINK_SPEC ""
@@ -63,7 +65,7 @@
#define BPABI_LINK_SPEC \
"%{mbig-endian:-EB} %{mlittle-endian:-EL} " \
"%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} " \
- "-X" SUBTARGET_EXTRA_LINK_SPEC
+ "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC
#undef LINK_SPEC
#define LINK_SPEC BPABI_LINK_SPEC
diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md
index dd7ac25ccaa..93453b618db 100644
--- a/gcc/config/arm/cortex-a8-neon.md
+++ b/gcc/config/arm/cortex-a8-neon.md
@@ -134,7 +134,7 @@
(define_insn_reservation "cortex_a8_vfp_add_sub" 10
(and (eq_attr "tune" "cortexa8")
- (eq_attr "type" "farith"))
+ (eq_attr "type" "fconsts,fconstd,fadds,faddd"))
"cortex_a8_vfp,cortex_a8_vfplite*9")
(define_insn_reservation "cortex_a8_vfp_muls" 12
@@ -172,7 +172,7 @@
;; take four cycles, we pick that latency.
(define_insn_reservation "cortex_a8_vfp_farith" 4
(and (eq_attr "tune" "cortexa8")
- (eq_attr "type" "ffarith"))
+ (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
"cortex_a8_vfp,cortex_a8_vfplite*3")
(define_insn_reservation "cortex_a8_vfp_cvt" 7
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
new file mode 100644
index 00000000000..121fd2da747
--- /dev/null
+++ b/gcc/config/arm/cortex-a9.md
@@ -0,0 +1,65 @@
+;; ARM Cortex-A9 VFP pipeline description
+;; Copyright (C) 2008 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a9")
+
+;; FIXME: We model a single pipeline for all instructions.
+;; Is dual-issue possible, and do we have other pipelines?
+(define_cpu_unit "cortex_a9_vfp" "cortex_a9")
+
+(define_insn_reservation "cortex_a9_ffarith" 1
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd,fconsts,fconstd"))
+ "cortex_a9_vfp")
+
+(define_insn_reservation "cortex_a9_fadd" 4
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "type" "fadds,faddd,f_cvt"))
+ "cortex_a9_vfp")
+
+(define_insn_reservation "cortex_a9_fmuls" 5
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "type" "fmuls"))
+ "cortex_a9_vfp")
+
+(define_insn_reservation "cortex_a9_fmuld" 6
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "type" "fmuld"))
+ "cortex_a9_vfp*2")
+
+(define_insn_reservation "cortex_a9_fmacs" 8
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "type" "fmacs"))
+ "cortex_a9_vfp")
+
+(define_insn_reservation "cortex_a9_fmacd" 8
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "type" "fmacd"))
+ "cortex_a9_vfp*2")
+
+(define_insn_reservation "cortex_a9_fdivs" 15
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "type" "fdivs"))
+ "cortex_a9_vfp*10")
+
+(define_insn_reservation "cortex_a9_fdivd" 25
+ (and (eq_attr "tune" "cortexa9")
+ (eq_attr "type" "fdivd"))
+ "cortex_a9_vfp*20")
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
index 34467345acb..e26c3d45d5e 100644
--- a/gcc/config/arm/cortex-r4.md
+++ b/gcc/config/arm/cortex-r4.md
@@ -77,24 +77,24 @@
;; Data processing instructions. Moves without shifts are kept separate
;; for the purposes of the dual-issue constraints above.
(define_insn_reservation "cortex_r4_alu" 2
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(and (eq_attr "type" "alu")
(not (eq_attr "insn" "mov"))))
"cortex_r4_alu")
(define_insn_reservation "cortex_r4_mov" 2
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(and (eq_attr "type" "alu")
(eq_attr "insn" "mov")))
"cortex_r4_mov")
(define_insn_reservation "cortex_r4_alu_shift" 2
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "type" "alu_shift"))
"cortex_r4_alu")
(define_insn_reservation "cortex_r4_alu_shift_reg" 2
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "type" "alu_shift_reg"))
"cortex_r4_alu_shift_reg")
@@ -127,32 +127,32 @@
;; Multiplication instructions.
(define_insn_reservation "cortex_r4_mul_4" 4
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "insn" "mul,smmul"))
"cortex_r4_mul_2")
(define_insn_reservation "cortex_r4_mul_3" 3
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
"cortex_r4_mul")
(define_insn_reservation "cortex_r4_mla_4" 4
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "insn" "mla,smmla"))
"cortex_r4_mul_2")
(define_insn_reservation "cortex_r4_mla_3" 3
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
"cortex_r4_mul")
(define_insn_reservation "cortex_r4_smlald" 3
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "insn" "smlald,smlsld"))
"cortex_r4_mul")
(define_insn_reservation "cortex_r4_mull" 4
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "insn" "smull,umull,umlal,umaal"))
"cortex_r4_mul_2")
@@ -195,19 +195,19 @@
;; is performed with B having ten more leading zeros than A.
;; This gives a latency of nine for udiv and ten for sdiv.
(define_insn_reservation "cortex_r4_udiv" 9
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "insn" "udiv"))
"cortex_r4_div_9")
(define_insn_reservation "cortex_r4_sdiv" 10
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "insn" "sdiv"))
"cortex_r4_div_10")
;; Branches. We assume correct prediction.
(define_insn_reservation "cortex_r4_branch" 0
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "type" "branch"))
"cortex_r4_branch")
@@ -215,7 +215,7 @@
;; number is used as "positive infinity" so that everything should be
;; finished by the time of return.
(define_insn_reservation "cortex_r4_call" 32
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "type" "call"))
"nothing")
@@ -226,12 +226,12 @@
;; accesses following are correctly aligned.
(define_insn_reservation "cortex_r4_load_1_2" 3
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "type" "load1,load2"))
"cortex_r4_load_store")
(define_insn_reservation "cortex_r4_load_3_4" 4
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "type" "load3,load4"))
"cortex_r4_load_store_2")
@@ -281,12 +281,12 @@
;; Store instructions.
(define_insn_reservation "cortex_r4_store_1_2" 0
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "type" "store1,store2"))
"cortex_r4_load_store")
(define_insn_reservation "cortex_r4_store_3_4" 0
- (and (eq_attr "tune" "cortexr4")
+ (and (eq_attr "tune_cortexr4" "yes")
(eq_attr "type" "store3,store4"))
"cortex_r4_load_store_2")
diff --git a/gcc/config/arm/cortex-r4f.md b/gcc/config/arm/cortex-r4f.md
new file mode 100644
index 00000000000..8982bc068eb
--- /dev/null
+++ b/gcc/config/arm/cortex-r4f.md
@@ -0,0 +1,161 @@
+;; ARM Cortex-R4F VFP pipeline description
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; With the exception of simple VMOV <freg>, <freg> instructions and
+;; the accululate operand of a multiply-accumulate instruction, all
+;; registers are early registers. Thus base latencies are 1 more than
+;; those listed in the TRM.
+
+;; We use the A, B abd C units from the integer core, plus two additional
+;; units to enforce VFP dual issue constraints.
+
+;; A B C V1 VMLA
+;; fcpy 1 2
+;; farith 1 2 1
+;; fmrc 1 2
+;; fconst 1 2 * *
+;; ffarith 1 2 * *
+;; fmac 1 2 1 2
+;; fdiv 1 2 *
+;; f_loads * * *
+;; f_stores * * *
+
+(define_cpu_unit "cortex_r4_v1" "cortex_r4")
+
+(define_cpu_unit "cortex_r4_vmla" "cortex_r4")
+
+(define_reservation "cortex_r4_issue_ab"
+ "(cortex_r4_issue_a|cortex_r4_issue_b)")
+(define_reservation "cortex_r4_single_issue"
+ "cortex_r4_issue_a+cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fcpys" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "fcpys"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_ffariths" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "ffariths,fconsts,fcmps"))
+ "cortex_r4_issue_ab+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fariths" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "fadds,fmuls"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fmacs" 6
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "fmacs"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|(cortex_r4_issue_b+cortex_r4_vmla)")
+
+(define_insn_reservation "cortex_r4_fdivs" 17
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "fdivs"))
+ "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_floads" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "f_loads"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fstores" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "f_stores"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_vmla")
+
+(define_insn_reservation "cortex_r4_mcr" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "r_2_f"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_mrc" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "f_2_r"))
+ "cortex_r4_issue_ab")
+
+;; Bypasses for normal (not early) regs.
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+ "cortex_r4_fcpys")
+(define_bypass 2 "cortex_r4_fariths"
+ "cortex_r4_fcpys")
+(define_bypass 5 "cortex_r4_fmacs"
+ "cortex_r4_fcpys")
+(define_bypass 16 "cortex_r4_fdivs"
+ "cortex_r4_fcpys")
+
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+ "cortex_r4_fmacs"
+ "arm_no_early_mul_dep")
+(define_bypass 2 "cortex_r4_fariths"
+ "cortex_r4_fmacs"
+ "arm_no_early_mul_dep")
+;; mac->mac has an extra forwarding path.
+(define_bypass 3 "cortex_r4_fmacs"
+ "cortex_r4_fmacs"
+ "arm_no_early_mul_dep")
+(define_bypass 16 "cortex_r4_fdivs"
+ "cortex_r4_fmacs"
+ "arm_no_early_mul_dep")
+
+;; Double precision operations. These can not dual issue.
+
+(define_insn_reservation "cortex_r4_fmacd" 20
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "fmacd"))
+ "cortex_r4_single_issue*13")
+
+(define_insn_reservation "cortex_r4_farith" 10
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "faddd,fmuld"))
+ "cortex_r4_single_issue*3")
+
+;; FIXME: The short cycle count suggests these instructions complete
+;; out of order. Chances are this is not a pipelined operation.
+(define_insn_reservation "cortex_r4_fdivd" 97
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "fdivd"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_ffarithd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "ffarithd,fconstd"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_fcmpd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "fcmpd"))
+ "cortex_r4_single_issue*2")
+
+(define_insn_reservation "cortex_r4_f_cvt" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "f_cvt"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_f_memd" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "f_loadd,f_stored"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_f_flag" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+ (eq_attr "type" "f_stores"))
+ "cortex_r4_single_issue")
+
diff --git a/gcc/config/arm/symbian.h b/gcc/config/arm/symbian.h
index 3e583b3a912..af92c72b7ba 100644
--- a/gcc/config/arm/symbian.h
+++ b/gcc/config/arm/symbian.h
@@ -101,3 +101,5 @@
/* SymbianOS cannot merge entities with vague linkage at runtime. */
#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P false
+
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index f33d8206a1f..2417650adbb 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -755,15 +755,12 @@
(clobber (reg:CC CC_REGNUM))]
"TARGET_THUMB2"
"*
- if (GET_CODE (operands[3]) == LT && operands[3] == const0_rtx)
+ if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
return \"asr\\t%0, %1, #31\";
if (GET_CODE (operands[3]) == NE)
return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0\";
- if (GET_CODE (operands[3]) == GT)
- return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, %0, asr #31\";
-
output_asm_insn (\"cmp\\t%1, %2\", operands);
output_asm_insn (\"ite\\t%D3\", operands);
output_asm_insn (\"mov%D3\\t%0, #0\", operands);
diff --git a/gcc/config/arm/uclinux-elf.h b/gcc/config/arm/uclinux-elf.h
index 98a78505631..89b96f257ba 100644
--- a/gcc/config/arm/uclinux-elf.h
+++ b/gcc/config/arm/uclinux-elf.h
@@ -83,3 +83,5 @@
"%{pthread:-lpthread} \
%{shared:-lc} \
%{!shared:%{profile:-lc_p}%{!profile:-lc}}"
+
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 64bb9564d9c..737f81ccb27 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -24,8 +24,15 @@
)
;; The VFP "type" attributes differ from those used in the FPA model.
-;; ffarith Fast floating point insns, e.g. abs, neg, cpy, cmp.
-;; farith Most arithmetic insns.
+;; fcpys Single precision cpy.
+;; ffariths Single precision abs, neg.
+;; ffarithd Double precision abs, neg, cpy.
+;; fadds Single precision add/sub.
+;; faddd Double precision add/sub.
+;; fconsts Single precision load immediate.
+;; fconstd Double precision load immediate.
+;; fcmps Single precision comparison.
+;; fcmpd Double precision comparison.
;; fmuls Single precision multiply.
;; fmuld Double precision multiply.
;; fmacs Single precision multiply-accumulate.
@@ -74,7 +81,7 @@
}
"
[(set_attr "predicable" "yes")
- (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,ffarith,f_loads,f_stores")
+ (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
(set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*")
(set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
)
@@ -111,7 +118,7 @@
}
"
[(set_attr "predicable" "yes")
- (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,ffarith,f_load,f_store")
+ (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_load,f_store")
(set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*")
(set_attr "neg_pool_range" "*,*,*,*, 0,*,*,*,*,1008,*")]
)
@@ -145,7 +152,7 @@
gcc_unreachable ();
}
"
- [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarith,f_loadd,f_stored")
+ [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
(set_attr "length" "8,8,8,4,4,4,4,4")
(set_attr "pool_range" "*,1020,*,*,*,*,1020,*")
(set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")]
@@ -172,7 +179,7 @@
abort ();
}
"
- [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarith,f_load,f_store")
+ [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_load,f_store")
(set_attr "length" "8,8,8,4,4,4,4,4")
(set_attr "pool_range" "*,4096,*,*,*,*,1020,*")
(set_attr "neg_pool_range" "*, 0,*,*,*,*,1008,*")]
@@ -214,7 +221,7 @@
"
[(set_attr "predicable" "yes")
(set_attr "type"
- "r_2_f,f_2_r,farith,f_loads,f_stores,load1,store1,ffarith,*")
+ "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
(set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
(set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
)
@@ -250,7 +257,7 @@
"
[(set_attr "predicable" "yes")
(set_attr "type"
- "r_2_f,f_2_r,farith,f_load,f_store,load1,store1,ffarith,*")
+ "r_2_f,f_2_r,fconsts,f_load,f_store,load1,store1,fcpys,*")
(set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
(set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
)
@@ -288,7 +295,7 @@
}
"
[(set_attr "type"
- "r_2_f,f_2_r,farith,f_loadd,f_stored,load2,store2,ffarith,*")
+ "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
(set_attr "length" "4,4,4,8,8,4,4,4,8")
(set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
(set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")]
@@ -320,7 +327,7 @@
}
"
[(set_attr "type"
- "r_2_f,f_2_r,farith,load2,store2,f_load,f_store,ffarith,*")
+ "r_2_f,f_2_r,fconstd,load2,store2,f_load,f_store,ffarithd,*")
(set_attr "length" "4,4,4,8,8,4,4,4,8")
(set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
(set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
@@ -349,7 +356,7 @@
fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
[(set_attr "conds" "use")
(set_attr "length" "4,4,8,4,4,8,4,4,8")
- (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+ (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
)
(define_insn "*thumb2_movsfcc_vfp"
@@ -372,7 +379,7 @@
ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
[(set_attr "conds" "use")
(set_attr "length" "6,6,10,6,6,10,6,6,10")
- (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+ (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
)
(define_insn "*movdfcc_vfp"
@@ -395,7 +402,7 @@
fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
[(set_attr "conds" "use")
(set_attr "length" "4,4,8,4,4,8,4,4,8")
- (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+ (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
)
(define_insn "*thumb2_movdfcc_vfp"
@@ -418,7 +425,7 @@
ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
[(set_attr "conds" "use")
(set_attr "length" "6,6,10,6,6,10,6,6,10")
- (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+ (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
)
@@ -430,7 +437,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fabss%?\\t%0, %1"
[(set_attr "predicable" "yes")
- (set_attr "type" "ffarith")]
+ (set_attr "type" "ffariths")]
)
(define_insn "*absdf2_vfp"
@@ -439,7 +446,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fabsd%?\\t%P0, %P1"
[(set_attr "predicable" "yes")
- (set_attr "type" "ffarith")]
+ (set_attr "type" "ffarithd")]
)
(define_insn "*negsf2_vfp"
@@ -450,7 +457,7 @@
fnegs%?\\t%0, %1
eor%?\\t%0, %1, #-2147483648"
[(set_attr "predicable" "yes")
- (set_attr "type" "ffarith")]
+ (set_attr "type" "ffariths")]
)
(define_insn_and_split "*negdf2_vfp"
@@ -496,7 +503,7 @@
"
[(set_attr "predicable" "yes")
(set_attr "length" "4,4,8")
- (set_attr "type" "ffarith")]
+ (set_attr "type" "ffarithd")]
)
@@ -509,7 +516,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fadds%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
- (set_attr "type" "farith")]
+ (set_attr "type" "fadds")]
)
(define_insn "*adddf3_vfp"
@@ -519,7 +526,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"faddd%?\\t%P0, %P1, %P2"
[(set_attr "predicable" "yes")
- (set_attr "type" "farith")]
+ (set_attr "type" "faddd")]
)
@@ -530,7 +537,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fsubs%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
- (set_attr "type" "farith")]
+ (set_attr "type" "fadds")]
)
(define_insn "*subdf3_vfp"
@@ -540,7 +547,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fsubd%?\\t%P0, %P1, %P2"
[(set_attr "predicable" "yes")
- (set_attr "type" "farith")]
+ (set_attr "type" "faddd")]
)
@@ -909,7 +916,7 @@
fcmps%?\\t%0, %1
fcmpzs%?\\t%0"
[(set_attr "predicable" "yes")
- (set_attr "type" "ffarith")]
+ (set_attr "type" "fcmps")]
)
(define_insn "*cmpsf_trap_vfp"
@@ -921,7 +928,7 @@
fcmpes%?\\t%0, %1
fcmpezs%?\\t%0"
[(set_attr "predicable" "yes")
- (set_attr "type" "ffarith")]
+ (set_attr "type" "fcmpd")]
)
(define_insn "*cmpdf_vfp"
@@ -933,7 +940,7 @@
fcmpd%?\\t%P0, %P1
fcmpzd%?\\t%P0"
[(set_attr "predicable" "yes")
- (set_attr "type" "ffarith")]
+ (set_attr "type" "fcmpd")]
)
(define_insn "*cmpdf_trap_vfp"
@@ -945,7 +952,7 @@
fcmped%?\\t%P0, %P1
fcmpezd%?\\t%P0"
[(set_attr "predicable" "yes")
- (set_attr "type" "ffarith")]
+ (set_attr "type" "fcmpd")]
)
diff --git a/gcc/config/arm/vfp11.md b/gcc/config/arm/vfp11.md
index 59699739539..8f863fd70cd 100644
--- a/gcc/config/arm/vfp11.md
+++ b/gcc/config/arm/vfp11.md
@@ -51,12 +51,12 @@
(define_insn_reservation "vfp_ffarith" 4
(and (eq_attr "generic_vfp" "yes")
- (eq_attr "type" "ffarith"))
+ (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
"fmac")
(define_insn_reservation "vfp_farith" 8
(and (eq_attr "generic_vfp" "yes")
- (eq_attr "type" "farith,f_cvt,fmuls,fmacs"))
+ (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,fmuls,fmacs"))
"fmac")
(define_insn_reservation "vfp_fmul" 9
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index 441655776e9..a7610acca5d 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -113,3 +113,6 @@ along with GCC; see the file COPYING3. If not see
cannot allow arbitrary offsets for shared libraries either. */
#undef ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1
+
+#undef TARGET_DEFAULT_WORD_RELOCATIONS
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index 826e60b9e02..4ac369f10ba 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -998,7 +998,7 @@ do { \
/* If a memory-to-memory move would take MOVE_RATIO or more simple
move-instruction pairs, we will do a movmem or libcall instead. */
-#define MOVE_RATIO 5
+#define MOVE_RATIO(speed) 5
/* STORAGE LAYOUT: target machine storage layout
Define this macro as a C expression which is nonzero if accessing
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index b8a66e96a4b..ed1ec3deb06 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -1242,7 +1242,7 @@ struct cum_args {int regs;};
word-length sizes will be emitted. The "9" will translate to
(9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions
(8 instruction sequences) or less. */
-#define MOVE_RATIO 9
+#define MOVE_RATIO(speed) 9
/* Node: Sections */
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
index 7305fc32f85..4edbb2f6ba2 100644
--- a/gcc/config/h8300/h8300.h
+++ b/gcc/config/h8300/h8300.h
@@ -1189,10 +1189,8 @@ struct cum_arg
#define FINAL_PRESCAN_INSN(insn, operand, nop) \
final_prescan_insn (insn, operand, nop)
-#define MOVE_RATIO 3
extern int h8300_move_ratio;
-#undef MOVE_RATIO
-#define MOVE_RATIO h8300_move_ratio
+#define MOVE_RATIO(speed) h8300_move_ratio
/* Machine-specific symbol_ref flags. */
#define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 025eee6a99c..2c016328e4c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1631,9 +1631,6 @@ rtx ix86_compare_op0 = NULL_RTX;
rtx ix86_compare_op1 = NULL_RTX;
rtx ix86_compare_emitted = NULL_RTX;
-/* Size of the register save area. */
-#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
-
/* Define the structure for the machine field in struct function. */
struct stack_local_entry GTY(())
@@ -6312,14 +6309,24 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
int i;
int regparm = ix86_regparm;
- if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
+ if (cum->call_abi != DEFAULT_ABI)
regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
- if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
- return;
+ /* GPR size of varargs save area. */
+ if (cfun->va_list_gpr_size)
+ ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
+ else
+ ix86_varargs_gpr_size = 0;
+
+ /* FPR size of varargs save area. We don't need it if we don't pass
+ anything in SSE registers. */
+ if (cum->sse_nregs && cfun->va_list_fpr_size)
+ ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+ else
+ ix86_varargs_fpr_size = 0;
- /* Indicate to allocate space on the stack for varargs save area. */
- ix86_save_varrargs_registers = 1;
+ if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
+ return;
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
@@ -6337,7 +6344,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
x86_64_int_parameter_registers[i]));
}
- if (cum->sse_nregs && cfun->va_list_fpr_size)
+ if (ix86_varargs_fpr_size)
{
/* Now emit code to save SSE registers. The AX parameter contains number
of SSE parameter registers used to call this function. We use
@@ -6382,7 +6389,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
tmp_reg = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
plus_constant (save_area,
- 8 * X86_64_REGPARM_MAX + 127)));
+ ix86_varargs_gpr_size + 127)));
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
@@ -6438,7 +6445,7 @@ ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
if (stdarg_p (fntype))
function_arg_advance (&next_cum, mode, type, 1);
- if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (cum->call_abi == MS_ABI)
setup_incoming_varargs_ms_64 (&next_cum);
else
setup_incoming_varargs_64 (&next_cum);
@@ -6501,7 +6508,7 @@ ix86_va_start (tree valist, rtx nextarg)
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
- if (cfun->va_list_fpr_size)
+ if (TARGET_SSE && cfun->va_list_fpr_size)
{
type = TREE_TYPE (fpr);
t = build2 (MODIFY_EXPR, type, fpr,
@@ -6520,12 +6527,15 @@ ix86_va_start (tree valist, rtx nextarg)
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
- if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+ if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
{
/* Find the register save area.
Prologue of the function save it right above stack frame. */
type = TREE_TYPE (sav);
t = make_tree (type, frame_pointer_rtx);
+ if (!ix86_varargs_gpr_size)
+ t = build2 (POINTER_PLUS_EXPR, type, t,
+ size_int (-8 * X86_64_REGPARM_MAX));
t = build2 (MODIFY_EXPR, type, sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
@@ -7500,13 +7510,8 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
offset += frame->nregs * UNITS_PER_WORD;
/* Va-arg area */
- if (ix86_save_varrargs_registers)
- {
- offset += X86_64_VARARGS_SIZE;
- frame->va_arg_size = X86_64_VARARGS_SIZE;
- }
- else
- frame->va_arg_size = 0;
+ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
+ offset += frame->va_arg_size;
/* Align start of frame for local function. */
frame->padding1 = ((offset + stack_alignment_needed - 1)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index d933c5e2389..6f6529a252e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1906,12 +1906,12 @@ do { \
If you don't define this, a reasonable default is used. */
-#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio)
+#define MOVE_RATIO(speed) ((speed) ? ix86_cost->move_ratio : 3)
/* If a clear memory operation would take CLEAR_RATIO or more simple
move-instruction sequences, we will do a clrmem or libcall instead. */
-#define CLEAR_RATIO (optimize_size ? 2 : MIN (6, ix86_cost->move_ratio))
+#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
/* Define if shifts truncate the shift count
which implies one can omit a sign-extension or zero-extension
@@ -2390,7 +2390,8 @@ struct machine_function GTY(())
{
struct stack_local_entry *stack_locals;
const char *some_ld_name;
- int save_varrargs_registers;
+ int varargs_gpr_size;
+ int varargs_fpr_size;
int accesses_prev_frame;
int optimize_mode_switching[MAX_386_ENTITIES];
int needs_cld;
@@ -2416,7 +2417,8 @@ struct machine_function GTY(())
};
#define ix86_stack_locals (cfun->machine->stack_locals)
-#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
+#define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
+#define ix86_varargs_fpr_size (cfun->machine->varargs_fpr_size)
#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
#define ix86_current_function_needs_cld (cfun->machine->needs_cld)
#define ix86_tls_descriptor_calls_expanded_in_cfun \
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index c16ecc7e3c3..28abf27d1bf 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -382,8 +382,8 @@ static const struct attribute_spec ia64_attribute_table[] =
#undef TARGET_SCHED_NEEDS_BLOCK_P
#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
-#undef TARGET_SCHED_GEN_CHECK
-#define TARGET_SCHED_GEN_CHECK ia64_gen_check
+#undef TARGET_SCHED_GEN_SPEC_CHECK
+#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_check
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
@@ -408,7 +408,7 @@ static const struct attribute_spec ia64_attribute_table[] =
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS ia64_rtx_costs
#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
#undef TARGET_UNSPEC_MAY_TRAP_P
#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
@@ -6278,10 +6278,6 @@ static rtx dfa_stop_insn;
static rtx last_scheduled_insn;
-/* The following variable value is size of the DFA state. */
-
-static size_t dfa_state_size;
-
/* The following variable value is pointer to a DFA state used as
temporary variable. */
@@ -6857,6 +6853,8 @@ ia64_set_sched_flags (spec_info_t spec_info)
mask |= BE_IN_CONTROL;
}
+ spec_info->mask = mask;
+
if (mask)
{
*flags |= USE_DEPS_LIST | DO_SPECULATION;
@@ -6864,7 +6862,6 @@ ia64_set_sched_flags (spec_info_t spec_info)
if (mask & BE_IN_SPEC)
*flags |= NEW_BBS;
- spec_info->mask = mask;
spec_info->flags = 0;
if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h
index cb789e1a54f..302c414bb5c 100644
--- a/gcc/config/m68hc11/m68hc11.h
+++ b/gcc/config/m68hc11/m68hc11.h
@@ -1505,7 +1505,7 @@ do { \
/* MOVE_RATIO is the number of move instructions that is better than a
block move. Make this small on 6811, since the code size grows very
large with each move. */
-#define MOVE_RATIO 3
+#define MOVE_RATIO(speed) 3
/* Define if shifts truncate the shift count which implies one can omit
a sign-extension or zero-extension of a shift count. */
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index e008e804781..86b886a373f 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2940,7 +2940,7 @@ while (0)
we'll have to generate a load/store pair for each, halve the
value of MIPS_CALL_RATIO to take that into account. */
-#define MOVE_RATIO \
+#define MOVE_RATIO(speed) \
(HAVE_movmemsi \
? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \
: MIPS_CALL_RATIO / 2)
@@ -2961,20 +2961,20 @@ while (0)
? (SIZE) < UNITS_PER_WORD \
: (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \
: (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
- < (unsigned int) MOVE_RATIO))
+ < (unsigned int) MOVE_RATIO (false)))
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
of the length of a memset call, but use the default otherwise. */
-#define CLEAR_RATIO \
- (optimize_size ? MIPS_CALL_RATIO : 15)
+#define CLEAR_RATIO(speed)\
+ ((speed) ? 15 : MIPS_CALL_RATIO)
/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
optimizing for size adjust the ratio to account for the overhead of
loading the constant and replicating it across the word. */
-#define SET_RATIO \
- (optimize_size ? MIPS_CALL_RATIO - 2 : 15)
+#define SET_RATIO(speed) \
+ ((speed) ? 15 : MIPS_CALL_RATIO - 2)
/* STORE_BY_PIECES_P can be used when copying a constant string, but
in that case each word takes 3 insns (lui, ori, sw), or more in
diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h
index 07035fbb06b..c1c80579427 100644
--- a/gcc/config/mn10300/mn10300.h
+++ b/gcc/config/mn10300/mn10300.h
@@ -814,7 +814,7 @@ while (0)
/* According expr.c, a value of around 6 should minimize code size, and
for the MN10300 series, that's our primary concern. */
-#define MOVE_RATIO 6
+#define MOVE_RATIO(speed) 6
#define TEXT_SECTION_ASM_OP "\t.section .text"
#define DATA_SECTION_ASM_OP "\t.section .data"
diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h
index ae9e4d31f01..8d4a807cbe3 100644
--- a/gcc/config/pa/pa-hpux11.h
+++ b/gcc/config/pa/pa-hpux11.h
@@ -122,8 +122,9 @@ along with GCC; see the file COPYING3. If not see
#undef LIB_SPEC
#define LIB_SPEC \
"%{!shared:\
- %{mt|pthread:-lpthread} -lc \
- %{static:%{!nolibdld:-a shared -ldld -a archive -lpthread -lc}}}\
+ %{static|mt|pthread:%{fopenmp:%{static:-a archive_shared} -lrt\
+ %{static:-a archive}} -lpthread} -lc\
+ %{static:%{!nolibdld:-a archive_shared -ldld -a archive -lc}}}\
%{shared:%{mt|pthread:-lpthread}}"
#undef STARTFILE_SPEC
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 5e272a2f346..2966a42ddb2 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -1506,7 +1506,7 @@ do { \
arguments passed in registers to avoid infinite recursion during argument
setup for a function call. Why? Consider how we copy the stack slots
reserved for parameters when they may be trashed by a call. */
-#define MOVE_RATIO (TARGET_64BIT ? 8 : 4)
+#define MOVE_RATIO(speed) (TARGET_64BIT ? 8 : 4)
/* Define if operations between registers always perform the operation
on the full register even if a narrower mode is specified. */
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index c3d686db5c4..b50ab4cb6e3 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -3487,7 +3487,7 @@
FAIL;
/* This does happen, but not often enough to worry much about. */
- if (size / align < MOVE_RATIO)
+ if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
FAIL;
/* Fall through means we're going to use our block move pattern. */
@@ -3675,7 +3675,7 @@
FAIL;
/* This does happen, but not often enough to worry much about. */
- if (size / align < MOVE_RATIO)
+ if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
FAIL;
/* Fall through means we're going to use our block move pattern. */
@@ -3842,7 +3842,7 @@
FAIL;
/* This does happen, but not often enough to worry much about. */
- if (size / align < MOVE_RATIO)
+ if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
FAIL;
/* Fall through means we're going to use our block clear pattern. */
@@ -3956,7 +3956,7 @@
FAIL;
/* This does happen, but not often enough to worry much about. */
- if (size / align < MOVE_RATIO)
+ if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
FAIL;
/* Fall through means we're going to use our block clear pattern. */
diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h
index ef1122d6b3b..bad5b41c2e3 100644
--- a/gcc/config/pa/pa64-hpux.h
+++ b/gcc/config/pa/pa64-hpux.h
@@ -57,25 +57,35 @@ along with GCC; see the file COPYING3. If not see
#if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD)
#define LIB_SPEC \
"%{!shared:\
- %{!p:%{!pg: %{static|mt|pthread:-lpthread} -lc\
+ %{!p:%{!pg:%{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+ %{static:-a archive}} -lpthread} -lc\
%{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
%{p:%{!pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\
- -lprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\
+ -lprof %{static:-a archive}\
+ %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+ %{static:-a archive}} -lpthread} -lc\
%{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
%{pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\
- -lgprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\
+ -lgprof %{static:-a archive}\
+ %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+ %{static:-a archive}} -lpthread} -lc\
%{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
%{shared:%{mt|pthread:-lpthread}}"
#else
#define LIB_SPEC \
"%{!shared:\
- %{!p:%{!pg: %{static|mt|pthread:-lpthread} -lc\
+ %{!p:%{!pg:%{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+ %{static:-a archive}} -lpthread} -lc\
%{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
%{p:%{!pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\
- -lprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\
+ -lprof %{static:-a archive}\
+ %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+ %{static:-a archive}} -lpthread} -lc\
%{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
%{pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\
- -lgprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\
+ -lgprof %{static:-a archive}\
+ %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+ %{static:-a archive}} -lpthread} -lc\
%{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
%{shared:%{mt|pthread:-lpthread}}"
#endif
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d3821a8cfa9..6ab34969d02 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -857,6 +857,10 @@ static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
static int rs6000_use_sched_lookahead (void);
static int rs6000_use_sched_lookahead_guard (rtx);
+static void * rs6000_alloc_sched_context (void);
+static void rs6000_init_sched_context (void *, bool);
+static void rs6000_set_sched_context (void *);
+static void rs6000_free_sched_context (void *);
static tree rs6000_builtin_reciprocal (unsigned int, bool, bool);
static tree rs6000_builtin_mask_for_load (void);
static tree rs6000_builtin_mul_widen_even (tree);
@@ -1131,6 +1135,15 @@ static const char alt_reg_names[][8] =
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
+#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
+#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
+#undef TARGET_SCHED_INIT_SCHED_CONTEXT
+#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
+#undef TARGET_SCHED_SET_SCHED_CONTEXT
+#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
+#undef TARGET_SCHED_FREE_SCHED_CONTEXT
+#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
+
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
@@ -19476,7 +19489,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
for (i=pos; i<*pn_ready-1; i++)
ready[i] = ready[i + 1];
ready[*pn_ready-1] = tmp;
- if INSN_PRIORITY_KNOWN (tmp)
+
+ if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
INSN_PRIORITY (tmp)++;
break;
}
@@ -19493,7 +19507,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
while (pos >= 0)
{
if (is_load_insn (ready[pos])
- && INSN_PRIORITY_KNOWN (ready[pos]))
+ && !sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[pos]))
{
INSN_PRIORITY (ready[pos])++;
@@ -19535,8 +19550,10 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
for (i=pos; i<*pn_ready-1; i++)
ready[i] = ready[i + 1];
ready[*pn_ready-1] = tmp;
- if INSN_PRIORITY_KNOWN (tmp)
+
+ if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
INSN_PRIORITY (tmp)++;
+
first_store_pos = -1;
break;
@@ -19555,7 +19572,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
for (i=first_store_pos; i<*pn_ready-1; i++)
ready[i] = ready[i + 1];
ready[*pn_ready-1] = tmp;
- if INSN_PRIORITY_KNOWN (tmp)
+ if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
INSN_PRIORITY (tmp)++;
}
}
@@ -19569,7 +19586,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
while (pos >= 0)
{
if (is_store_insn (ready[pos])
- && INSN_PRIORITY_KNOWN (ready[pos]))
+ && !sel_sched_p ()
+ && INSN_PRIORITY_KNOWN (ready[pos]))
{
INSN_PRIORITY (ready[pos])++;
@@ -20071,7 +20089,7 @@ pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
if (group_end)
{
/* If the scheduler had marked group termination at this location
- (between insn and next_indn), and neither insn nor next_insn will
+ (between insn and next_insn), and neither insn nor next_insn will
force group termination, pad the group with nops to force group
termination. */
if (can_issue_more
@@ -20125,6 +20143,10 @@ rs6000_sched_finish (FILE *dump, int sched_verbose)
if (reload_completed && rs6000_sched_groups)
{
+ /* Do not run sched_finish hook when selective scheduling enabled. */
+ if (sel_sched_p ())
+ return;
+
if (rs6000_sched_insert_nops == sched_finish_none)
return;
@@ -20145,6 +20167,67 @@ rs6000_sched_finish (FILE *dump, int sched_verbose)
}
}
}
+
+struct _rs6000_sched_context
+{
+ short cached_can_issue_more;
+ rtx last_scheduled_insn;
+ int load_store_pendulum;
+};
+
+typedef struct _rs6000_sched_context rs6000_sched_context_def;
+typedef rs6000_sched_context_def *rs6000_sched_context_t;
+
+/* Allocate store for new scheduling context. */
+static void *
+rs6000_alloc_sched_context (void)
+{
+ return xmalloc (sizeof (rs6000_sched_context_def));
+}
+
+/* If CLEAN_P is true then initializes _SC with clean data,
+ and from the global context otherwise. */
+static void
+rs6000_init_sched_context (void *_sc, bool clean_p)
+{
+ rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
+
+ if (clean_p)
+ {
+ sc->cached_can_issue_more = 0;
+ sc->last_scheduled_insn = NULL_RTX;
+ sc->load_store_pendulum = 0;
+ }
+ else
+ {
+ sc->cached_can_issue_more = cached_can_issue_more;
+ sc->last_scheduled_insn = last_scheduled_insn;
+ sc->load_store_pendulum = load_store_pendulum;
+ }
+}
+
+/* Sets the global scheduling context to the one pointed to by _SC. */
+static void
+rs6000_set_sched_context (void *_sc)
+{
+ rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
+
+ gcc_assert (sc != NULL);
+
+ cached_can_issue_more = sc->cached_can_issue_more;
+ last_scheduled_insn = sc->last_scheduled_insn;
+ load_store_pendulum = sc->load_store_pendulum;
+}
+
+/* Free _SC. */
+static void
+rs6000_free_sched_context (void *_sc)
+{
+ gcc_assert (_sc != NULL);
+
+ free (_sc);
+}
+
/* Length in units of the trampoline for entering a nested function. */
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index a31efd24a23..32ec03624bb 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -872,7 +872,7 @@ extern struct rtx_def *s390_compare_op0, *s390_compare_op1, *s390_compare_emitte
in tree-sra with UNITS_PER_WORD to make a decision so we adjust it
here to compensate for that factor since mvc costs exactly the same
on 31 and 64 bit. */
-#define MOVE_RATIO (TARGET_64BIT? 2 : 4)
+#define MOVE_RATIO(speed) (TARGET_64BIT? 2 : 4)
/* Sections. */
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index c4910a20a31..2e39082e1f6 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -2099,7 +2099,7 @@ do { \
/* If a memory-to-memory move would take MOVE_RATIO or more simple
move-instruction pairs, we will do a movmem or libcall instead. */
-#define MOVE_RATIO (optimize_size ? 3 : 8)
+#define MOVE_RATIO(speed) ((speed) ? 8 : 3)
/* Define if operations between registers always perform the operation
on the full register even if a narrower mode is specified. */
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index eaece4c1863..0b74a9c18e4 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -663,7 +663,7 @@ spu_expand_block_move (rtx ops[])
int i;
if (GET_CODE (ops[2]) != CONST_INT
|| GET_CODE (ops[3]) != CONST_INT
- || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
+ || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
return 0;
bytes = INTVAL (ops[2]);
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
index f78eb73c429..9839822885e 100644
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -438,7 +438,7 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
#define SLOW_BYTE_ACCESS 0
-#define MOVE_RATIO 32
+#define MOVE_RATIO(speed) 32
#define NO_FUNCTION_CSE
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index 89f2109ceb3..7b4b743fc8d 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -1864,58 +1864,6 @@
DONE;
})
-;; Taken from STI's gcc
-;; Does not correctly handle INF or NAN.
-(define_expand "divdf3"
- [(set (match_operand:DF 0 "register_operand" "=r")
- (div:DF (match_operand:DF 1 "register_operand" "r")
- (match_operand:DF 2 "register_operand" "r")))]
- "flag_finite_math_only"
- "{
- /*
- double
- divdf3 (double x, double y)
- {
- float x0;
- float y_f = (float) y;
- double x1, x2;
-
- x0 = spu_extract(spu_re(spu_promote(y_f, 0)), 0);
- x1 = (double)(x0 * (2.0f - y_f * x0));
- x2 = x1 * (2.0 - y * x1);
- return (x * x2 * (2.0 - y * x2));
- }
- */
-
- rtx dst = operands[0];
- rtx x = operands[1];
- rtx y = operands[2];
- rtx y_f = gen_reg_rtx(SFmode);
- rtx x0_f = gen_reg_rtx(SFmode);
- rtx x1_f = gen_reg_rtx(SFmode);
- rtx x1 = gen_reg_rtx(DFmode);
- rtx x2 = gen_reg_rtx(DFmode);
- rtx t1_f = gen_reg_rtx(SFmode);
- rtx t1 = gen_reg_rtx(DFmode);
- rtx two = gen_reg_rtx(DFmode);
- rtx two_f = gen_reg_rtx(SFmode);
-
- emit_insn (gen_truncdfsf2 (y_f, y));
- emit_insn (gen_frest_sf (x0_f, y_f));
- emit_insn (gen_fi_sf (x0_f, y_f, x0_f));
- emit_insn (gen_movsf (two_f, spu_float_const(\"2.0\",SFmode)));
- emit_insn (gen_fnms_sf (t1_f, y_f, x0_f, two_f));
- emit_insn (gen_mulsf3 (x1_f, t1_f, x0_f));
- emit_insn (gen_extendsfdf2 (x1, x1_f));
- emit_insn (gen_extendsfdf2 (two, two_f));
- emit_insn (gen_movdf (t1, two));
- emit_insn (gen_fnms_df (t1, y, x1, t1));
- emit_insn (gen_muldf3 (x2, x1, t1));
- emit_insn (gen_fnms_df (two, y, x2, two));
- emit_insn (gen_muldf3 (dst, x2, two));
- emit_insn (gen_muldf3 (dst, dst, x));
- DONE;
-}")
;; sqrt
diff --git a/gcc/config/v850/v850.h b/gcc/config/v850/v850.h
index c897b121380..65e731fef79 100644
--- a/gcc/config/v850/v850.h
+++ b/gcc/config/v850/v850.h
@@ -865,7 +865,7 @@ do { \
/* According expr.c, a value of around 6 should minimize code size, and
for the V850 series, that's our primary concern. */
-#define MOVE_RATIO 6
+#define MOVE_RATIO(speed) 6
/* Indirect calls are expensive, never turn a direct call
into an indirect call. */