2008-09-02 Basile Starynkevitch <basile@starynkevitch.net>

MELT branch merged with trunk r139912 after graphite merge into trunk graphite uses PPL & CLOOG... git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@139915 138bc75d-0d04-0410-961f-82ee72b054a4
author: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2008-09-02 21:27:57 +0000
committer: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2008-09-02 21:27:57 +0000
commit: d93d495ba54e9746659d91d69119157f038a815f (patch)
tree: bfc67c88ff54c4880beb663c32eedf5f95f15123 /gcc/config
parent: e827aa43033edbfc6bac3fa2ff04421737b78421 (diff)
download: gcc-d93d495ba54e9746659d91d69119157f038a815f.tar.gz
41 files changed, 731 insertions, 287 deletions
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index cad90e1adce..a4d3bf1f150 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -5803,38 +5803,34 @@ alpha_build_builtin_va_list (void)
 /* Helper function for alpha_stdarg_optimize_hook.  Skip over casts
    and constant additions.  */
 
-static tree
+static gimple
 va_list_skip_additions (tree lhs)
 {
-  tree rhs, stmt;
-
-  if (TREE_CODE (lhs) != SSA_NAME)
-    return lhs;
+  gimple stmt;
 
   for (;;)
     {
+      enum tree_code code;
+
       stmt = SSA_NAME_DEF_STMT (lhs);
 
-      if (TREE_CODE (stmt) == PHI_NODE)
+      if (gimple_code (stmt) == GIMPLE_PHI)
 	return stmt;
 
-      if (TREE_CODE (stmt) != MODIFY_EXPR
-	  || TREE_OPERAND (stmt, 0) != lhs)
-	return lhs;
-
-      rhs = TREE_OPERAND (stmt, 1);
-      if (TREE_CODE (rhs) == WITH_SIZE_EXPR)
-	rhs = TREE_OPERAND (rhs, 0);
+      if (!is_gimple_assign (stmt)
+	  || gimple_assign_lhs (stmt) != lhs)
+	return NULL;
 
-      if (((!CONVERT_EXPR_P (rhs))
-	   && ((TREE_CODE (rhs) != PLUS_EXPR
-		&& TREE_CODE (rhs) != POINTER_PLUS_EXPR)
-	       || TREE_CODE (TREE_OPERAND (rhs, 1)) != INTEGER_CST
-	       || !host_integerp (TREE_OPERAND (rhs, 1), 1)))
-	  || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
-	return rhs;
+      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
+	return stmt;
+      code = gimple_assign_rhs_code (stmt);
+      if (!CONVERT_EXPR_CODE_P (code)
+	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
+	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
+	      || !host_integerp (gimple_assign_rhs2 (stmt), 1)))
+	return stmt;
 
-      lhs = TREE_OPERAND (rhs, 0);
+      lhs = gimple_assign_rhs1 (stmt);
     }
 }
 
@@ -5859,36 +5855,49 @@ va_list_skip_additions (tree lhs)
 static bool
 alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
 {
-  tree base, offset, arg1, arg2;
+  tree base, offset, rhs;
   int offset_arg = 1;
+  gimple base_stmt;
 
-#if 1
-  /* FIXME tuples.  */
-  (void) si;
-  (void) stmt;
-  return false;
-#else
+  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
+      != GIMPLE_SINGLE_RHS)
+    return false;
+
+  rhs = gimple_assign_rhs1 (stmt);
   while (handled_component_p (rhs))
     rhs = TREE_OPERAND (rhs, 0);
   if (TREE_CODE (rhs) != INDIRECT_REF
       || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
     return false;
 
-  lhs = va_list_skip_additions (TREE_OPERAND (rhs, 0));
-  if (lhs == NULL_TREE
-      || TREE_CODE (lhs) != POINTER_PLUS_EXPR)
+  stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
+  if (stmt == NULL
+      || !is_gimple_assign (stmt)
+      || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
     return false;
 
-  base = TREE_OPERAND (lhs, 0);
+  base = gimple_assign_rhs1 (stmt);
   if (TREE_CODE (base) == SSA_NAME)
-    base = va_list_skip_additions (base);
+    {
+      base_stmt = va_list_skip_additions (base);
+      if (base_stmt
+	  && is_gimple_assign (base_stmt)
+	  && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+	base = gimple_assign_rhs1 (base_stmt);
+    }
 
   if (TREE_CODE (base) != COMPONENT_REF
       || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
     {
-      base = TREE_OPERAND (lhs, 0);
+      base = gimple_assign_rhs2 (stmt);
       if (TREE_CODE (base) == SSA_NAME)
-	base = va_list_skip_additions (base);
+	{
+	  base_stmt = va_list_skip_additions (base);
+	  if (base_stmt
+	      && is_gimple_assign (base_stmt)
+	      && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+	    base = gimple_assign_rhs1 (base_stmt);
+	}
 
       if (TREE_CODE (base) != COMPONENT_REF
 	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
@@ -5902,55 +5911,88 @@ alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
       || !bitmap_bit_p (si->va_list_vars, DECL_UID (base)))
     return false;
 
-  offset = TREE_OPERAND (lhs, offset_arg);
+  offset = gimple_op (stmt, 1 + offset_arg);
   if (TREE_CODE (offset) == SSA_NAME)
-    offset = va_list_skip_additions (offset);
-
-  if (TREE_CODE (offset) == PHI_NODE)
     {
-      HOST_WIDE_INT sub;
-
-      if (PHI_NUM_ARGS (offset) != 2)
-	goto escapes;
+      gimple offset_stmt = va_list_skip_additions (offset);
 
-      arg1 = va_list_skip_additions (PHI_ARG_DEF (offset, 0));
-      arg2 = va_list_skip_additions (PHI_ARG_DEF (offset, 1));
-      if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR)
+      if (offset_stmt
+	  && gimple_code (offset_stmt) == GIMPLE_PHI)
 	{
-	  tree tem = arg1;
-	  arg1 = arg2;
-	  arg2 = tem;
+	  HOST_WIDE_INT sub;
+	  gimple arg1_stmt, arg2_stmt;
+	  tree arg1, arg2;
+	  enum tree_code code1, code2;
 
-	  if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR)
+	  if (gimple_phi_num_args (offset_stmt) != 2)
 	    goto escapes;
-	}
-      if (!host_integerp (TREE_OPERAND (arg2, 1), 0))
-	goto escapes;
 
-      sub = tree_low_cst (TREE_OPERAND (arg2, 1), 0);
-      if (TREE_CODE (arg2) == MINUS_EXPR)
-	sub = -sub;
-      if (sub < -48 || sub > -32)
-	goto escapes;
+	  arg1_stmt
+	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
+	  arg2_stmt
+	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
+	  if (arg1_stmt == NULL
+	      || !is_gimple_assign (arg1_stmt)
+	      || arg2_stmt == NULL
+	      || !is_gimple_assign (arg2_stmt))
+	    goto escapes;
 
-      arg2 = va_list_skip_additions (TREE_OPERAND (arg2, 0));
-      if (arg1 != arg2)
-	goto escapes;
+	  code1 = gimple_assign_rhs_code (arg1_stmt);
+	  code2 = gimple_assign_rhs_code (arg2_stmt);
+	  if (code1 == COMPONENT_REF
+	      && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
+	    /* Do nothing.  */;
+	  else if (code2 == COMPONENT_REF
+		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
+	    {
+	      gimple tem = arg1_stmt;
+	      code2 = code1;
+	      arg1_stmt = arg2_stmt;
+	      arg2_stmt = tem;
+	    }
+	  else
+	    goto escapes;
 
-      if (TREE_CODE (arg1) == SSA_NAME)
-	arg1 = va_list_skip_additions (arg1);
+	  if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0))
+	    goto escapes;
 
-      if (TREE_CODE (arg1) != COMPONENT_REF
-	  || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
-	  || get_base_address (arg1) != base)
-	goto escapes;
+	  sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0);
+	  if (code2 == MINUS_EXPR)
+	    sub = -sub;
+	  if (sub < -48 || sub > -32)
+	    goto escapes;
 
-      /* Need floating point regs.  */
-      cfun->va_list_fpr_size |= 2;
+	  arg1 = gimple_assign_rhs1 (arg1_stmt);
+	  arg2 = gimple_assign_rhs1 (arg2_stmt);
+	  if (TREE_CODE (arg2) == SSA_NAME)
+	    {
+	      arg2_stmt = va_list_skip_additions (arg2);
+	      if (arg2_stmt == NULL
+		  || !is_gimple_assign (arg2_stmt)
+		  || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
+		goto escapes;
+	      arg2 = gimple_assign_rhs1 (arg2_stmt);
+	    }
+	  if (arg1 != arg2)
+	    goto escapes;
+
+	  if (TREE_CODE (arg1) != COMPONENT_REF
+	      || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
+	      || get_base_address (arg1) != base)
+	    goto escapes;
+
+	  /* Need floating point regs.  */
+	  cfun->va_list_fpr_size |= 2;
+	  return false;
+	}
+      if (offset_stmt
+	  && is_gimple_assign (offset_stmt)
+	  && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
+	offset = gimple_assign_rhs1 (offset_stmt);
     }
-  else if (TREE_CODE (offset) != COMPONENT_REF
-	   || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
-	   || get_base_address (offset) != base)
+  if (TREE_CODE (offset) != COMPONENT_REF
+      || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
+      || get_base_address (offset) != base)
     goto escapes;
   else
     /* Need general regs.  */
@@ -5960,7 +6002,6 @@ alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
 escapes:
   si->va_list_escapes = true;
   return false;
-#endif
 }
 #endif
 
@@ -6126,10 +6167,11 @@ alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
 }
 
 static tree
-alpha_gimplify_va_arg_1 (tree type, tree base, gimple_seq offset,
+alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
 			 gimple_seq *pre_p)
 {
-  tree type_size, ptr_type, addend, t, addr, internal_post;
+  tree type_size, ptr_type, addend, t, addr;
+  gimple_seq internal_post;
 
   /* If the type could not be passed in registers, skip the block
      reserved for the registers.  */
@@ -6177,7 +6219,7 @@ alpha_gimplify_va_arg_1 (tree type, tree base, gimple_seq offset,
 	         fold_convert (sizetype, addend));
   internal_post = NULL;
   gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
-  append_to_statement_list (internal_post, pre_p);
+  gimple_seq_add_seq (pre_p, internal_post);
 
   /* Update the offset field.  */
   type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
@@ -6230,7 +6272,7 @@ alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
   r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
 
   /* Stuff the offset temporary back into its field.  */
-  gimplify_assign (offset_field,
+  gimplify_assign (unshare_expr (offset_field),
 		   fold_convert (TREE_TYPE (offset_field), offset), pre_p);
 
   if (indirect)
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
index 8e022d6a5f8..c462f71fb1d 100644
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@@ -1079,7 +1079,7 @@ do {									     \
    Without byte/word accesses, we want no more than four instructions;
    with, several single byte accesses are better.  */
 
-#define MOVE_RATIO  (TARGET_BWX ? 7 : 2)
+#define MOVE_RATIO(speed)  (TARGET_BWX ? 7 : 2)
 
 /* Largest number of bytes of an object that can be placed in a register.
    On the Alpha we have plenty of registers, so use TImode.  */
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index c7e425b0c7f..fe2f2b53792 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -116,6 +116,7 @@ ARM_CORE("mpcorenovfp",	  mpcorenovfp,	6K,				 FL_LDSCHED, 9e)
 ARM_CORE("mpcore",	  mpcore,	6K,				 FL_LDSCHED | FL_VFPV2, 9e)
 ARM_CORE("arm1156t2-s",	  arm1156t2s,	6T2,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-a8",	  cortexa8,	7A,				 FL_LDSCHED, 9e)
+ARM_CORE("cortex-a9",	  cortexa9,	7A,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-r4",	  cortexr4,	7R,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-r4f",	  cortexr4f,	7R,				 FL_LDSCHED, 9e)
 ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, 9e)
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index d0e408ccf1d..ab08ef446f6 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -123,6 +123,7 @@ extern const char *fp_immediate_constant (rtx);
 extern void arm_emit_call_insn (rtx, rtx);
 extern const char *output_call (rtx *);
 extern const char *output_call_mem (rtx *);
+void arm_emit_movpair (rtx, rtx);
 extern const char *output_mov_long_double_fpa_from_arm (rtx *);
 extern const char *output_mov_long_double_arm_from_fpa (rtx *);
 extern const char *output_mov_long_double_arm_from_arm (rtx *);
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index ee5606b04cb..beb8f9f4173 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from arm-cores.def
 (define_attr "tune"
-	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexr4f,cortexm3,cortexm1"
+	"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1"
 	(const (symbol_ref "arm_tune")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6a4b3ef11cd..ec28f79446f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -544,6 +544,9 @@ int arm_tune_xscale = 0;
    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 int arm_tune_wbuf = 0;
 
+/* Nonzero if tuning for Cortex-A9.  */
+int arm_tune_cortex_a9 = 0;
+
 /* Nonzero if generating Thumb instructions.  */
 int thumb_code = 0;
 
@@ -1186,12 +1189,30 @@ arm_override_options (void)
 
   tune_flags = all_cores[(int)arm_tune].flags;
 
+  if (target_abi_name)
+    {
+      for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
+	{
+	  if (streq (arm_all_abis[i].name, target_abi_name))
+	    {
+	      arm_abi = arm_all_abis[i].abi_type;
+	      break;
+	    }
+	}
+      if (i == ARRAY_SIZE (arm_all_abis))
+	error ("invalid ABI option: -mabi=%s", target_abi_name);
+    }
+  else
+    arm_abi = ARM_DEFAULT_ABI;
+
   /* Make sure that the processor choice does not conflict with any of the
      other command line choices.  */
   if (TARGET_ARM && !(insn_flags & FL_NOTM))
     error ("target CPU does not support ARM mode");
 
-  if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
+  /* BPABI targets use linker tricks to allow interworking on cores
+     without thumb support.  */
+  if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
     {
       warning (0, "target CPU does not support interworking" );
       target_flags &= ~MASK_INTERWORK;
@@ -1271,6 +1292,7 @@ arm_override_options (void)
   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
   arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
+  arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
 
   /* If we are not using the default (ARM mode) section anchor offset
      ranges, then set the correct ranges now.  */
@@ -1304,22 +1326,6 @@ arm_override_options (void)
   if (arm_arch5)
     target_flags &= ~MASK_INTERWORK;
 
-  if (target_abi_name)
-    {
-      for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
-	{
-	  if (streq (arm_all_abis[i].name, target_abi_name))
-	    {
-	      arm_abi = arm_all_abis[i].abi_type;
-	      break;
-	    }
-	}
-      if (i == ARRAY_SIZE (arm_all_abis))
-	error ("invalid ABI option: -mabi=%s", target_abi_name);
-    }
-  else
-    arm_abi = ARM_DEFAULT_ABI;
-
   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
 
@@ -1924,14 +1930,22 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
 	    {
 	      /* Currently SET is the only monadic value for CODE, all
 		 the rest are diadic.  */
-	      emit_set_insn (target, GEN_INT (val));
+	      if (TARGET_USE_MOVT)
+		arm_emit_movpair (target, GEN_INT (val));
+	      else
+		emit_set_insn (target, GEN_INT (val));
+
 	      return 1;
 	    }
 	  else
 	    {
 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
 
-	      emit_set_insn (temp, GEN_INT (val));
+	      if (TARGET_USE_MOVT)
+		arm_emit_movpair (temp, GEN_INT (val));
+	      else
+		emit_set_insn (temp, GEN_INT (val));
+
 	      /* For MINUS, the value is subtracted from, since we never
 		 have subtraction of a constant.  */
 	      if (code == MINUS)
@@ -4903,7 +4917,15 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
 		    || (GET_CODE (XEXP (x, 0)) == SUBREG
 			&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
 		   ? 0 : 8));
-      return (1 + ((GET_CODE (XEXP (x, 0)) == REG
+
+      extra_cost = 1;
+      /* Increase the cost of complex shifts because they aren't any faster,
+         and reduce dual issue opportunities.  */
+      if (arm_tune_cortex_a9
+	  && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
+	extra_cost++;
+
+      return (extra_cost + ((GET_CODE (XEXP (x, 0)) == REG
 		    || (GET_CODE (XEXP (x, 0)) == SUBREG
 			&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
 		   ? 0 : 4)
@@ -5018,7 +5040,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
 			 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
 			      (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
 		    && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
-		    && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
+		    && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1))
+			 && !arm_tune_cortex_a9)
 			|| GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
 		   ? 0 : 4));
 
@@ -5115,6 +5138,10 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
     case SYMBOL_REF:
       return 6;
 
+    case HIGH:
+    case LO_SUM:
+      return (outer == SET) ? 1 : -1;
+
     case CONST_DOUBLE:
       if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
 	return outer == SET ? 2 : -1;
@@ -5341,6 +5368,13 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
       *total = COSTS_N_INSNS (4);
       return true;
 
+    case HIGH:
+    case LO_SUM:
+      /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
+	 cost of these slightly.  */
+      *total = COSTS_N_INSNS (1) + 1;
+      return true;
+
     default:
       if (mode != VOIDmode)
 	*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
@@ -9889,6 +9923,14 @@ output_mov_long_double_arm_from_arm (rtx *operands)
 }
 
 
+/* Emit a MOVW/MOVT pair.  */
+void arm_emit_movpair (rtx dest, rtx src)
+{
+  emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
+  emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
+}
+
+
 /* Output a move from arm registers to an fpa registers.
    OPERANDS[0] is an fpa register.
    OPERANDS[1] is the first registers of an arm register pair.  */
@@ -12904,10 +12946,21 @@ arm_print_operand (FILE *stream, rtx x, int code)
       }
       return;
 
-    /* An integer without a preceding # sign.  */
+    /* An integer or symbol address without a preceding # sign.  */
     case 'c':
-      gcc_assert (GET_CODE (x) == CONST_INT);
-      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      switch (GET_CODE (x))
+	{
+	case CONST_INT:
+	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+	  break;
+
+	case SYMBOL_REF:
+	  output_addr_const (stream, x);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
       return;
 
     case 'B':
@@ -18235,8 +18288,15 @@ arm_no_early_mul_dep (rtx producer, rtx consumer)
     op = XVECEXP (op, 0, 0);
   op = XEXP (op, 1);
 
-  return (GET_CODE (op) == PLUS
-	  && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
+  if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
+    {
+      if (GET_CODE (XEXP (op, 0)) == MULT)
+	return !reg_overlap_mentioned_p (value, XEXP (op, 0));
+      else
+	return !reg_overlap_mentioned_p (value, XEXP (op, 1));
+    }
+
+  return 0;
 }
 
 /* We can't rely on the caller doing the proper promotion when
@@ -19017,7 +19077,9 @@ arm_issue_rate (void)
   switch (arm_tune)
     {
     case cortexr4:
+    case cortexr4f:
     case cortexa8:
+    case cortexa9:
       return 2;
 
     default:
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 4132b06b024..f83aabac038 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -241,6 +241,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
 #define TARGET_INT_SIMD \
   (TARGET_32BIT && arm_arch6 && arm_arch_notm)
 
+/* Should MOVW/MOVT be used in preference to a constant pool.  */
+#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size)
+
 /* We could use unified syntax for arm mode, but for now we just use it
    for Thumb-2.  */
 #define TARGET_UNIFIED_ASM TARGET_THUMB2
@@ -404,6 +407,9 @@ extern int arm_tune_xscale;
 /* Nonzero if tuning for stores via the write buffer.  */
 extern int arm_tune_wbuf;
 
+/* Nonzero if tuning for Cortex-A9.  */
+extern int arm_tune_cortex_a9;
+
 /* Nonzero if we should define __THUMB_INTERWORK__ in the
    preprocessor.
    XXX This is a bit of a hack, it's intended to help work around
@@ -1962,6 +1968,11 @@ typedef struct
    SYMBOL's section.  */
 #define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0
 
+/* Nonzero if all target requires all absolute relocations be R_ARM_ABS32.  */
+#ifndef TARGET_DEFAULT_WORD_RELOCATIONS
+#define TARGET_DEFAULT_WORD_RELOCATIONS 0
+#endif
+
 /* Nonzero if the constant value X is a legitimate general operand.
    It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.
 
@@ -2244,7 +2255,7 @@ do {							\
 #define MOVE_MAX 4
 
 #undef  MOVE_RATIO
-#define MOVE_RATIO (arm_tune_xscale ? 4 : 2)
+#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2)
 
 /* Define if operations between registers always perform the operation
    on the full register even if a narrower mode is specified.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 0ef91c6a003..1c279095707 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -157,7 +157,7 @@
 ; Floating Point Unit.  If we only have floating point emulation, then there
 ; is no point in scheduling the floating point insns.  (Well, for best
 ; performance we should try and group them together).
-(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
+(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon"
   (const (symbol_ref "arm_fpu_attr")))
 
 ; LENGTH of an instruction (in bytes)
@@ -239,7 +239,7 @@
 ;
 
 (define_attr "type"
-	"alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult"
+	"alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult,fconsts,fconstd,fadds,faddd,ffariths,ffarithd,fcmps,fcmpd,fcpys"
 	(if_then_else 
 	 (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
 	 (const_string "mult")
@@ -331,18 +331,26 @@
 ;; Processor type.  This is created automatically from arm-cores.def.
 (include "arm-tune.md")
 
+(define_attr "tune_cortexr4" "yes,no"
+  (const (if_then_else
+	  (eq_attr "tune" "cortexr4,cortexr4f")
+	  (const_string "yes")
+	  (const_string "no"))))
+
 ;; True if the generic scheduling description should be used.
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else 
-          (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexr4")
+          (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9")
+	      (eq_attr "tune_cortexr4" "yes"))
           (const_string "no")
           (const_string "yes"))))
 
 (define_attr "generic_vfp" "yes,no"
   (const (if_then_else
 	  (and (eq_attr "fpu" "vfp")
-	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa8"))
+	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9")
+	       (eq_attr "tune_cortexr4" "no"))
 	  (const_string "yes")
 	  (const_string "no"))))
 
@@ -352,7 +360,9 @@
 (include "arm1026ejs.md")
 (include "arm1136jfs.md")
 (include "cortex-a8.md")
+(include "cortex-a9.md")
 (include "cortex-r4.md")
+(include "cortex-r4f.md")
 (include "vfp11.md")
 
 
@@ -4814,6 +4824,14 @@
 			       optimize && can_create_pseudo_p ());
           DONE;
         }
+
+      if (TARGET_USE_MOVT && !target_word_relocations
+	  && GET_CODE (operands[1]) == SYMBOL_REF
+	  && !flag_pic && !arm_tls_referenced_p (operands[1]))
+	{
+	  arm_emit_movpair (operands[0], operands[1]);
+	  DONE;
+	}
     }
   else /* TARGET_THUMB1...  */
     {
@@ -4874,6 +4892,28 @@
   "
 )
 
+;; The ARM LO_SUM and HIGH are backwards - HIGH sets the low bits, and
+;; LO_SUM adds in the high bits.  Fortunately these are opaque operations
+;; so this does not matter.
+(define_insn "*arm_movt"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+		   (match_operand:SI 2 "general_operand"      "i")))]
+  "TARGET_32BIT"
+  "movt%?\t%0, #:upper16:%c2"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4")]
+)
+
+(define_insn "*arm_movw"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+	(high:SI (match_operand:SI 1 "general_operand"      "i")))]
+  "TARGET_32BIT"
+  "movw%?\t%0, #:lower16:%c1"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4")]
+)
+
 (define_insn "*arm_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
 	(match_operand:SI 1 "general_operand"      "rk, I,K,N,mi,rk"))]
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 2b005e42fa4..c8bdcf80f48 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -156,3 +156,7 @@ Assume big endian bytes, little endian words
 mvectorize-with-neon-quad
 Target Report Mask(NEON_VECTORIZE_QUAD)
 Use Neon quad-word (rather than double-word) registers for vectorization
+
+mword-relocations
+Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
++Only generate absolute relocations on word sized values.
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
index ed170c4b170..397ddd5f97c 100644
--- a/gcc/config/arm/arm1020e.md
+++ b/gcc/config/arm/arm1020e.md
@@ -269,12 +269,12 @@
 ;; first execute state.  We model this by using 1020a_e in the first cycle.
 (define_insn_reservation "v10_ffarith" 5
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "ffarith"))
+      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
  "1020a_e+v10_fmac")
 
 (define_insn_reservation "v10_farith" 5
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "farith"))
+      (eq_attr "type" "faddd,fadds"))
  "1020a_e+v10_fmac")
 
 (define_insn_reservation "v10_cvt" 5
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index e28d9ead45e..38be1da261a 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -51,9 +51,11 @@
 /* The BPABI integer comparison routines return { -1, 0, 1 }.  */
 #define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI
 
+#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}"
+
 /* Tell the assembler to build BPABI binaries.  */
 #undef  SUBTARGET_EXTRA_ASM_SPEC
-#define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=4}"
+#define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=4}" TARGET_FIX_V4BX_SPEC
 
 #ifndef SUBTARGET_EXTRA_LINK_SPEC
 #define SUBTARGET_EXTRA_LINK_SPEC ""
@@ -63,7 +65,7 @@
 #define BPABI_LINK_SPEC \
   "%{mbig-endian:-EB} %{mlittle-endian:-EL} "		\
   "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} "	\
-  "-X" SUBTARGET_EXTRA_LINK_SPEC
+  "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC
 
 #undef  LINK_SPEC
 #define LINK_SPEC BPABI_LINK_SPEC
diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md
index dd7ac25ccaa..93453b618db 100644
--- a/gcc/config/arm/cortex-a8-neon.md
+++ b/gcc/config/arm/cortex-a8-neon.md
@@ -134,7 +134,7 @@
 
 (define_insn_reservation "cortex_a8_vfp_add_sub" 10
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "farith"))
+       (eq_attr "type" "fconsts,fconstd,fadds,faddd"))
   "cortex_a8_vfp,cortex_a8_vfplite*9")
 
 (define_insn_reservation "cortex_a8_vfp_muls" 12
@@ -172,7 +172,7 @@
 ;; take four cycles, we pick that latency.
 (define_insn_reservation "cortex_a8_vfp_farith" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "ffarith"))
+       (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
   "cortex_a8_vfp,cortex_a8_vfplite*3")
 
 (define_insn_reservation "cortex_a8_vfp_cvt" 7
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
new file mode 100644
index 00000000000..121fd2da747
--- /dev/null
+++ b/gcc/config/arm/cortex-a9.md
@@ -0,0 +1,65 @@
+;; ARM Cortex-A9 VFP pipeline description
+;; Copyright (C) 2008 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a9")
+
+;; FIXME: We model a single pipeline for all instructions.
+;; Is dual-issue possible, and do we have other pipelines?
+(define_cpu_unit "cortex_a9_vfp" "cortex_a9")
+
+(define_insn_reservation "cortex_a9_ffarith" 1
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd,fconsts,fconstd"))
+ "cortex_a9_vfp")
+
+(define_insn_reservation "cortex_a9_fadd" 4
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fadds,faddd,f_cvt"))
+ "cortex_a9_vfp")
+
+(define_insn_reservation "cortex_a9_fmuls" 5
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fmuls"))
+ "cortex_a9_vfp")
+
+(define_insn_reservation "cortex_a9_fmuld" 6
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fmuld"))
+ "cortex_a9_vfp*2")
+
+(define_insn_reservation "cortex_a9_fmacs" 8
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fmacs"))
+ "cortex_a9_vfp")
+
+(define_insn_reservation "cortex_a9_fmacd" 8
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fmacd"))
+ "cortex_a9_vfp*2")
+
+(define_insn_reservation "cortex_a9_fdivs" 15
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fdivs"))
+ "cortex_a9_vfp*10")
+
+(define_insn_reservation "cortex_a9_fdivd" 25
+ (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fdivd"))
+ "cortex_a9_vfp*20")
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
index 34467345acb..e26c3d45d5e 100644
--- a/gcc/config/arm/cortex-r4.md
+++ b/gcc/config/arm/cortex-r4.md
@@ -77,24 +77,24 @@
 ;; Data processing instructions.  Moves without shifts are kept separate
 ;; for the purposes of the dual-issue constraints above.
 (define_insn_reservation "cortex_r4_alu" 2
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (and (eq_attr "type" "alu")
             (not (eq_attr "insn" "mov"))))
   "cortex_r4_alu")
 
 (define_insn_reservation "cortex_r4_mov" 2
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (and (eq_attr "type" "alu")
             (eq_attr "insn" "mov")))
   "cortex_r4_mov")
 
 (define_insn_reservation "cortex_r4_alu_shift" 2
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "alu_shift"))
   "cortex_r4_alu")
 
 (define_insn_reservation "cortex_r4_alu_shift_reg" 2
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "alu_shift_reg"))
   "cortex_r4_alu_shift_reg")
 
@@ -127,32 +127,32 @@
 ;; Multiplication instructions.
 
 (define_insn_reservation "cortex_r4_mul_4" 4
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "mul,smmul"))
   "cortex_r4_mul_2")
 
 (define_insn_reservation "cortex_r4_mul_3" 3
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_mla_4" 4
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "mla,smmla"))
   "cortex_r4_mul_2")
 
 (define_insn_reservation "cortex_r4_mla_3" 3
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_smlald" 3
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "smlald,smlsld"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_mull" 4
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "smull,umull,umlal,umaal"))
   "cortex_r4_mul_2")
 
@@ -195,19 +195,19 @@
 ;; is performed with B having ten more leading zeros than A.
 ;; This gives a latency of nine for udiv and ten for sdiv.
 (define_insn_reservation "cortex_r4_udiv" 9
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "udiv"))
   "cortex_r4_div_9")
 
 (define_insn_reservation "cortex_r4_sdiv" 10
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "sdiv"))
   "cortex_r4_div_10")
 
 ;; Branches.  We assume correct prediction.
 
 (define_insn_reservation "cortex_r4_branch" 0
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "branch"))
   "cortex_r4_branch")
 
@@ -215,7 +215,7 @@
 ;; number is used as "positive infinity" so that everything should be
 ;; finished by the time of return.
 (define_insn_reservation "cortex_r4_call" 32
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "call"))
   "nothing")
 
@@ -226,12 +226,12 @@
 ;; accesses following are correctly aligned.
 
 (define_insn_reservation "cortex_r4_load_1_2" 3
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "load1,load2"))
   "cortex_r4_load_store")
 
 (define_insn_reservation "cortex_r4_load_3_4" 4
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "load3,load4"))
   "cortex_r4_load_store_2")
 
@@ -281,12 +281,12 @@
 ;; Store instructions.
 
 (define_insn_reservation "cortex_r4_store_1_2" 0
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "store1,store2"))
   "cortex_r4_load_store")
 
 (define_insn_reservation "cortex_r4_store_3_4" 0
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "store3,store4"))
   "cortex_r4_load_store_2")
 
diff --git a/gcc/config/arm/cortex-r4f.md b/gcc/config/arm/cortex-r4f.md
new file mode 100644
index 00000000000..8982bc068eb
--- /dev/null
+++ b/gcc/config/arm/cortex-r4f.md
@@ -0,0 +1,161 @@
+;; ARM Cortex-R4F VFP pipeline description
+;; Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; With the exception of simple VMOV <freg>, <freg> instructions and
+;; the accululate operand of a multiply-accumulate instruction, all
+;; registers are early registers.  Thus base latencies are 1 more than
+;; those listed in the TRM.
+
+;; We use the A, B abd C units from the integer core, plus two additional
+;; units to enforce VFP dual issue constraints.
+
+;;		  A B C	    V1	VMLA
+;; fcpy		  1 2
+;; farith	  1 2	    1
+;; fmrc		  1 2
+;; fconst	  1 2 *	    *
+;; ffarith	  1 2 *	    *
+;; fmac		  1 2	    1	2
+;; fdiv		  1 2	    *
+;; f_loads	  *   *	    *
+;; f_stores	  *   *	    	*
+
+(define_cpu_unit "cortex_r4_v1" "cortex_r4")
+
+(define_cpu_unit "cortex_r4_vmla" "cortex_r4")
+
+(define_reservation "cortex_r4_issue_ab"
+		    "(cortex_r4_issue_a|cortex_r4_issue_b)")
+(define_reservation "cortex_r4_single_issue"
+		    "cortex_r4_issue_a+cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fcpys" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fcpys"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_ffariths" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "ffariths,fconsts,fcmps"))
+ "cortex_r4_issue_ab+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fariths" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fadds,fmuls"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fmacs" 6
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmacs"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|(cortex_r4_issue_b+cortex_r4_vmla)")
+
+(define_insn_reservation "cortex_r4_fdivs" 17
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fdivs"))
+ "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_floads" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_loads"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fstores" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_stores"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_vmla")
+
+(define_insn_reservation "cortex_r4_mcr" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "r_2_f"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_mrc" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_2_r"))
+ "cortex_r4_issue_ab")
+
+;; Bypasses for normal (not early) regs.
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+		 "cortex_r4_fcpys")
+(define_bypass 2 "cortex_r4_fariths"
+		 "cortex_r4_fcpys")
+(define_bypass 5 "cortex_r4_fmacs"
+		 "cortex_r4_fcpys")
+(define_bypass 16 "cortex_r4_fdivs"
+		  "cortex_r4_fcpys")
+
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+(define_bypass 2 "cortex_r4_fariths"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+;; mac->mac has an extra forwarding path.
+(define_bypass 3 "cortex_r4_fmacs"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+(define_bypass 16 "cortex_r4_fdivs"
+		  "cortex_r4_fmacs"
+		  "arm_no_early_mul_dep")
+
+;; Double precision operations.  These can not dual issue.
+
+(define_insn_reservation "cortex_r4_fmacd" 20
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmacd"))
+ "cortex_r4_single_issue*13")
+
+(define_insn_reservation "cortex_r4_farith" 10
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "faddd,fmuld"))
+ "cortex_r4_single_issue*3")
+
+;; FIXME: The short cycle count suggests these instructions complete
+;; out of order.  Chances are this is not a pipelined operation.
+(define_insn_reservation "cortex_r4_fdivd" 97
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fdivd"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_ffarithd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "ffarithd,fconstd"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_fcmpd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fcmpd"))
+ "cortex_r4_single_issue*2")
+
+(define_insn_reservation "cortex_r4_f_cvt" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_cvt"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_f_memd" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_loadd,f_stored"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_f_flag" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_stores"))
+ "cortex_r4_single_issue")
+
diff --git a/gcc/config/arm/symbian.h b/gcc/config/arm/symbian.h
index 3e583b3a912..af92c72b7ba 100644
--- a/gcc/config/arm/symbian.h
+++ b/gcc/config/arm/symbian.h
@@ -101,3 +101,5 @@
 
 /* SymbianOS cannot merge entities with vague linkage at runtime.  */
 #define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P false
+
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index f33d8206a1f..2417650adbb 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -755,15 +755,12 @@
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_THUMB2"
   "*
-  if (GET_CODE (operands[3]) == LT && operands[3] == const0_rtx)
+  if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx)
     return \"asr\\t%0, %1, #31\";
 
   if (GET_CODE (operands[3]) == NE)
     return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0\";
 
-  if (GET_CODE (operands[3]) == GT)
-    return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, %0, asr #31\";
-
   output_asm_insn (\"cmp\\t%1, %2\", operands);
   output_asm_insn (\"ite\\t%D3\", operands);
   output_asm_insn (\"mov%D3\\t%0, #0\", operands);
diff --git a/gcc/config/arm/uclinux-elf.h b/gcc/config/arm/uclinux-elf.h
index 98a78505631..89b96f257ba 100644
--- a/gcc/config/arm/uclinux-elf.h
+++ b/gcc/config/arm/uclinux-elf.h
@@ -83,3 +83,5 @@
   "%{pthread:-lpthread} \
    %{shared:-lc} \
    %{!shared:%{profile:-lc_p}%{!profile:-lc}}"
+
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 64bb9564d9c..737f81ccb27 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -24,8 +24,15 @@
 )
 
 ;; The VFP "type" attributes differ from those used in the FPA model.
-;; ffarith	Fast floating point insns, e.g. abs, neg, cpy, cmp.
-;; farith	Most arithmetic insns.
+;; fcpys	Single precision cpy.
+;; ffariths	Single precision abs, neg.
+;; ffarithd	Double precision abs, neg, cpy.
+;; fadds	Single precision add/sub.
+;; faddd	Double precision add/sub.
+;; fconsts	Single precision load immediate.
+;; fconstd	Double precision load immediate.
+;; fcmps	Single precision comparison.
+;; fcmpd	Double precision comparison.
 ;; fmuls	Single precision multiply.
 ;; fmuld	Double precision multiply.
 ;; fmacs	Single precision multiply-accumulate.
@@ -74,7 +81,7 @@
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,ffarith,f_loads,f_stores")
+   (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
 )
@@ -111,7 +118,7 @@
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,ffarith,f_load,f_store")
+   (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_load,f_store")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,   0,*,*,*,*,1008,*")]
 )
@@ -145,7 +152,7 @@
       gcc_unreachable ();
     }
   "
-  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarith,f_loadd,f_stored")
+  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
    (set_attr "length" "8,8,8,4,4,4,4,4")
    (set_attr "pool_range"     "*,1020,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")]
@@ -172,7 +179,7 @@
       abort ();
     }
   "
-  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarith,f_load,f_store")
+  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_load,f_store")
    (set_attr "length" "8,8,8,4,4,4,4,4")
    (set_attr "pool_range"     "*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,   0,*,*,*,*,1008,*")]
@@ -214,7 +221,7 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "type"
-     "r_2_f,f_2_r,farith,f_loads,f_stores,load1,store1,ffarith,*")
+     "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
    (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
 )
@@ -250,7 +257,7 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "type"
-     "r_2_f,f_2_r,farith,f_load,f_store,load1,store1,ffarith,*")
+     "r_2_f,f_2_r,fconsts,f_load,f_store,load1,store1,fcpys,*")
    (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
@@ -288,7 +295,7 @@
     }
   "
   [(set_attr "type"
-     "r_2_f,f_2_r,farith,f_loadd,f_stored,load2,store2,ffarith,*")
+     "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
    (set_attr "length" "4,4,4,8,8,4,4,4,8")
    (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")]
@@ -320,7 +327,7 @@
     }
   "
   [(set_attr "type"
-     "r_2_f,f_2_r,farith,load2,store2,f_load,f_store,ffarith,*")
+     "r_2_f,f_2_r,fconstd,load2,store2,f_load,f_store,ffarithd,*")
    (set_attr "length" "4,4,4,8,8,4,4,4,8")
    (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
@@ -349,7 +356,7 @@
    fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
 )
 
 (define_insn "*thumb2_movsfcc_vfp"
@@ -372,7 +379,7 @@
    ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
 )
 
 (define_insn "*movdfcc_vfp"
@@ -395,7 +402,7 @@
    fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
 )
 
 (define_insn "*thumb2_movdfcc_vfp"
@@ -418,7 +425,7 @@
    ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
 )
 
 
@@ -430,7 +437,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fabss%?\\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "ffariths")]
 )
 
 (define_insn "*absdf2_vfp"
@@ -439,7 +446,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fabsd%?\\t%P0, %P1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "ffarithd")]
 )
 
 (define_insn "*negsf2_vfp"
@@ -450,7 +457,7 @@
    fnegs%?\\t%0, %1
    eor%?\\t%0, %1, #-2147483648"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "ffariths")]
 )
 
 (define_insn_and_split "*negdf2_vfp"
@@ -496,7 +503,7 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "length" "4,4,8")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "ffarithd")]
 )
 
 
@@ -509,7 +516,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fadds%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "farith")]
+   (set_attr "type" "fadds")]
 )
 
 (define_insn "*adddf3_vfp"
@@ -519,7 +526,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "faddd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "farith")]
+   (set_attr "type" "faddd")]
 )
 
 
@@ -530,7 +537,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsubs%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "farith")]
+   (set_attr "type" "fadds")]
 )
 
 (define_insn "*subdf3_vfp"
@@ -540,7 +547,7 @@
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsubd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "farith")]
+   (set_attr "type" "faddd")]
 )
 
 
@@ -909,7 +916,7 @@
    fcmps%?\\t%0, %1
    fcmpzs%?\\t%0"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "fcmps")]
 )
 
 (define_insn "*cmpsf_trap_vfp"
@@ -921,7 +928,7 @@
    fcmpes%?\\t%0, %1
    fcmpezs%?\\t%0"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "fcmpd")]
 )
 
 (define_insn "*cmpdf_vfp"
@@ -933,7 +940,7 @@
    fcmpd%?\\t%P0, %P1
    fcmpzd%?\\t%P0"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "fcmpd")]
 )
 
 (define_insn "*cmpdf_trap_vfp"
@@ -945,7 +952,7 @@
    fcmped%?\\t%P0, %P1
    fcmpezd%?\\t%P0"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "fcmpd")]
 )
 
 
diff --git a/gcc/config/arm/vfp11.md b/gcc/config/arm/vfp11.md
index 59699739539..8f863fd70cd 100644
--- a/gcc/config/arm/vfp11.md
+++ b/gcc/config/arm/vfp11.md
@@ -51,12 +51,12 @@
 
 (define_insn_reservation "vfp_ffarith" 4
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "ffarith"))
+      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
  "fmac")
 
 (define_insn_reservation "vfp_farith" 8
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "farith,f_cvt,fmuls,fmacs"))
+      (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,fmuls,fmacs"))
  "fmac")
 
 (define_insn_reservation "vfp_fmul" 9
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index 441655776e9..a7610acca5d 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -113,3 +113,6 @@ along with GCC; see the file COPYING3.  If not see
    cannot allow arbitrary offsets for shared libraries either.  */
 #undef ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
 #define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1
+
+#undef TARGET_DEFAULT_WORD_RELOCATIONS
+#define TARGET_DEFAULT_WORD_RELOCATIONS 1
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index 826e60b9e02..4ac369f10ba 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -998,7 +998,7 @@ do {					       \
 /* If a memory-to-memory move would take MOVE_RATIO or more simple
    move-instruction pairs, we will do a movmem or libcall instead.  */
 
-#define MOVE_RATIO 5
+#define MOVE_RATIO(speed) 5
 
 /* STORAGE LAYOUT: target machine storage layout
    Define this macro as a C expression which is nonzero if accessing
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index b8a66e96a4b..ed1ec3deb06 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -1242,7 +1242,7 @@ struct cum_args {int regs;};
    word-length sizes will be emitted.  The "9" will translate to
    (9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions
    (8 instruction sequences) or less.  */
-#define MOVE_RATIO 9
+#define MOVE_RATIO(speed) 9
 
 
 /* Node: Sections */
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
index 7305fc32f85..4edbb2f6ba2 100644
--- a/gcc/config/h8300/h8300.h
+++ b/gcc/config/h8300/h8300.h
@@ -1189,10 +1189,8 @@ struct cum_arg
 #define FINAL_PRESCAN_INSN(insn, operand, nop)	\
   final_prescan_insn (insn, operand, nop)
 
-#define MOVE_RATIO 3
 extern int h8300_move_ratio;
-#undef  MOVE_RATIO
-#define MOVE_RATIO h8300_move_ratio
+#define MOVE_RATIO(speed) h8300_move_ratio
 
 /* Machine-specific symbol_ref flags.  */
 #define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 025eee6a99c..2c016328e4c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1631,9 +1631,6 @@ rtx ix86_compare_op0 = NULL_RTX;
 rtx ix86_compare_op1 = NULL_RTX;
 rtx ix86_compare_emitted = NULL_RTX;
 
-/* Size of the register save area.  */
-#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
-
 /* Define the structure for the machine field in struct function.  */
 
 struct stack_local_entry GTY(())
@@ -6312,14 +6309,24 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
   int i;
   int regparm = ix86_regparm;
 
-  if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
+  if (cum->call_abi != DEFAULT_ABI)
     regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
 
-  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
-    return;
+  /* GPR size of varargs save area.  */
+  if (cfun->va_list_gpr_size)
+    ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
+  else
+    ix86_varargs_gpr_size = 0;
+
+  /* FPR size of varargs save area.  We don't need it if we don't pass
+     anything in SSE registers.  */
+  if (cum->sse_nregs && cfun->va_list_fpr_size)
+    ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+  else
+    ix86_varargs_fpr_size = 0;
 
-  /* Indicate to allocate space on the stack for varargs save area.  */
-  ix86_save_varrargs_registers = 1;
+  if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
+    return;
 
   save_area = frame_pointer_rtx;
   set = get_varargs_alias_set ();
@@ -6337,7 +6344,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
 					x86_64_int_parameter_registers[i]));
     }
 
-  if (cum->sse_nregs && cfun->va_list_fpr_size)
+  if (ix86_varargs_fpr_size)
     {
       /* Now emit code to save SSE registers.  The AX parameter contains number
 	 of SSE parameter registers used to call this function.  We use
@@ -6382,7 +6389,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
       tmp_reg = gen_reg_rtx (Pmode);
       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
 			      plus_constant (save_area,
-					     8 * X86_64_REGPARM_MAX + 127)));
+					     ix86_varargs_gpr_size + 127)));
       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
       MEM_NOTRAP_P (mem) = 1;
       set_mem_alias_set (mem, set);
@@ -6438,7 +6445,7 @@ ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
   if (stdarg_p (fntype))
     function_arg_advance (&next_cum, mode, type, 1);
 
-  if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+  if (cum->call_abi == MS_ABI)
     setup_incoming_varargs_ms_64 (&next_cum);
   else
     setup_incoming_varargs_64 (&next_cum);
@@ -6501,7 +6508,7 @@ ix86_va_start (tree valist, rtx nextarg)
       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
     }
 
-  if (cfun->va_list_fpr_size)
+  if (TARGET_SSE && cfun->va_list_fpr_size)
     {
       type = TREE_TYPE (fpr);
       t = build2 (MODIFY_EXPR, type, fpr,
@@ -6520,12 +6527,15 @@ ix86_va_start (tree valist, rtx nextarg)
   TREE_SIDE_EFFECTS (t) = 1;
   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
 
-  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+  if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
     {
       /* Find the register save area.
 	 Prologue of the function save it right above stack frame.  */
       type = TREE_TYPE (sav);
       t = make_tree (type, frame_pointer_rtx);
+      if (!ix86_varargs_gpr_size)
+	t = build2 (POINTER_PLUS_EXPR, type, t,
+		    size_int (-8 * X86_64_REGPARM_MAX));
       t = build2 (MODIFY_EXPR, type, sav, t);
       TREE_SIDE_EFFECTS (t) = 1;
       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
@@ -7500,13 +7510,8 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
   offset += frame->nregs * UNITS_PER_WORD;
 
   /* Va-arg area */
-  if (ix86_save_varrargs_registers)
-    {
-      offset += X86_64_VARARGS_SIZE;
-      frame->va_arg_size = X86_64_VARARGS_SIZE;
-    }
-  else
-    frame->va_arg_size = 0;
+  frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
+  offset += frame->va_arg_size;
 
   /* Align start of frame for local function.  */
   frame->padding1 = ((offset + stack_alignment_needed - 1)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index d933c5e2389..6f6529a252e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1906,12 +1906,12 @@ do {									\
 
    If you don't define this, a reasonable default is used.  */
 
-#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio)
+#define MOVE_RATIO(speed) ((speed) ? ix86_cost->move_ratio : 3)
 
 /* If a clear memory operation would take CLEAR_RATIO or more simple
    move-instruction sequences, we will do a clrmem or libcall instead.  */
 
-#define CLEAR_RATIO (optimize_size ? 2 : MIN (6, ix86_cost->move_ratio))
+#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
 
 /* Define if shifts truncate the shift count
    which implies one can omit a sign-extension or zero-extension
@@ -2390,7 +2390,8 @@ struct machine_function GTY(())
 {
   struct stack_local_entry *stack_locals;
   const char *some_ld_name;
-  int save_varrargs_registers;
+  int varargs_gpr_size;
+  int varargs_fpr_size;
   int accesses_prev_frame;
   int optimize_mode_switching[MAX_386_ENTITIES];
   int needs_cld;
@@ -2416,7 +2417,8 @@ struct machine_function GTY(())
 };
 
 #define ix86_stack_locals (cfun->machine->stack_locals)
-#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
+#define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
+#define ix86_varargs_fpr_size (cfun->machine->varargs_fpr_size)
 #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
 #define ix86_current_function_needs_cld (cfun->machine->needs_cld)
 #define ix86_tls_descriptor_calls_expanded_in_cfun \
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index c16ecc7e3c3..28abf27d1bf 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -382,8 +382,8 @@ static const struct attribute_spec ia64_attribute_table[] =
 #undef TARGET_SCHED_NEEDS_BLOCK_P
 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
 
-#undef TARGET_SCHED_GEN_CHECK
-#define TARGET_SCHED_GEN_CHECK ia64_gen_check
+#undef TARGET_SCHED_GEN_SPEC_CHECK
+#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_check
 
 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
@@ -408,7 +408,7 @@ static const struct attribute_spec ia64_attribute_table[] =
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS ia64_rtx_costs
 #undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_0
+#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
 
 #undef TARGET_UNSPEC_MAY_TRAP_P
 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
@@ -6278,10 +6278,6 @@ static rtx dfa_stop_insn;
 
 static rtx last_scheduled_insn;
 
-/* The following variable value is size of the DFA state.  */
-
-static size_t dfa_state_size;
-
 /* The following variable value is pointer to a DFA state used as
    temporary variable.  */
 
@@ -6857,6 +6853,8 @@ ia64_set_sched_flags (spec_info_t spec_info)
 	    mask |= BE_IN_CONTROL;
 	}
 
+      spec_info->mask = mask;
+
       if (mask)
 	{
 	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
@@ -6864,7 +6862,6 @@ ia64_set_sched_flags (spec_info_t spec_info)
 	  if (mask & BE_IN_SPEC)
 	    *flags |= NEW_BBS;
 	  
-	  spec_info->mask = mask;
 	  spec_info->flags = 0;
       
 	  if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h
index cb789e1a54f..302c414bb5c 100644
--- a/gcc/config/m68hc11/m68hc11.h
+++ b/gcc/config/m68hc11/m68hc11.h
@@ -1505,7 +1505,7 @@ do {                                                                    \
 /* MOVE_RATIO is the number of move instructions that is better than a
    block move.  Make this small on 6811, since the code size grows very
    large with each move.  */
-#define MOVE_RATIO		3
+#define MOVE_RATIO(speed)	3
 
 /* Define if shifts truncate the shift count which implies one can omit
    a sign-extension or zero-extension of a shift count.  */
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index e008e804781..86b886a373f 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2940,7 +2940,7 @@ while (0)
    we'll have to generate a load/store pair for each, halve the
    value of MIPS_CALL_RATIO to take that into account.  */
 
-#define MOVE_RATIO					\
+#define MOVE_RATIO(speed)				\
   (HAVE_movmemsi					\
    ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX		\
    : MIPS_CALL_RATIO / 2)
@@ -2961,20 +2961,20 @@ while (0)
 	  ? (SIZE) < UNITS_PER_WORD				\
 	  : (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT))		\
    : (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1)	\
-      < (unsigned int) MOVE_RATIO))
+      < (unsigned int) MOVE_RATIO (false)))
 
 /* For CLEAR_RATIO, when optimizing for size, give a better estimate
    of the length of a memset call, but use the default otherwise.  */
 
-#define CLEAR_RATIO \
-  (optimize_size ? MIPS_CALL_RATIO : 15)
+#define CLEAR_RATIO(speed)\
+  ((speed) ? 15 : MIPS_CALL_RATIO)
 
 /* This is similar to CLEAR_RATIO, but for a non-zero constant, so when
    optimizing for size adjust the ratio to account for the overhead of
    loading the constant and replicating it across the word.  */
 
-#define SET_RATIO \
-  (optimize_size ? MIPS_CALL_RATIO - 2 : 15)
+#define SET_RATIO(speed) \
+  ((speed) ? 15 : MIPS_CALL_RATIO - 2)
 
 /* STORE_BY_PIECES_P can be used when copying a constant string, but
    in that case each word takes 3 insns (lui, ori, sw), or more in
diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h
index 07035fbb06b..c1c80579427 100644
--- a/gcc/config/mn10300/mn10300.h
+++ b/gcc/config/mn10300/mn10300.h
@@ -814,7 +814,7 @@ while (0)
 
 /* According expr.c, a value of around 6 should minimize code size, and
    for the MN10300 series, that's our primary concern.  */
-#define MOVE_RATIO 6
+#define MOVE_RATIO(speed) 6
 
 #define TEXT_SECTION_ASM_OP "\t.section .text"
 #define DATA_SECTION_ASM_OP "\t.section .data"
diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h
index ae9e4d31f01..8d4a807cbe3 100644
--- a/gcc/config/pa/pa-hpux11.h
+++ b/gcc/config/pa/pa-hpux11.h
@@ -122,8 +122,9 @@ along with GCC; see the file COPYING3.  If not see
 #undef LIB_SPEC
 #define LIB_SPEC \
   "%{!shared:\
-     %{mt|pthread:-lpthread} -lc \
-     %{static:%{!nolibdld:-a shared -ldld -a archive -lpthread -lc}}}\
+     %{static|mt|pthread:%{fopenmp:%{static:-a archive_shared} -lrt\
+       %{static:-a archive}} -lpthread} -lc\
+     %{static:%{!nolibdld:-a archive_shared -ldld -a archive -lc}}}\
    %{shared:%{mt|pthread:-lpthread}}"
 
 #undef STARTFILE_SPEC
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 5e272a2f346..2966a42ddb2 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -1506,7 +1506,7 @@ do { 									\
    arguments passed in registers to avoid infinite recursion during argument
    setup for a function call.  Why?  Consider how we copy the stack slots
    reserved for parameters when they may be trashed by a call.  */
-#define MOVE_RATIO (TARGET_64BIT ? 8 : 4)
+#define MOVE_RATIO(speed) (TARGET_64BIT ? 8 : 4)
 
 /* Define if operations between registers always perform the operation
    on the full register even if a narrower mode is specified.  */
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index c3d686db5c4..b50ab4cb6e3 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -3487,7 +3487,7 @@
     FAIL;
 
   /* This does happen, but not often enough to worry much about.  */
-  if (size / align < MOVE_RATIO)
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
     FAIL;
   
   /* Fall through means we're going to use our block move pattern.  */
@@ -3675,7 +3675,7 @@
     FAIL;
 
   /* This does happen, but not often enough to worry much about.  */
-  if (size / align < MOVE_RATIO)
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
     FAIL;
   
   /* Fall through means we're going to use our block move pattern.  */
@@ -3842,7 +3842,7 @@
     FAIL;
 
   /* This does happen, but not often enough to worry much about.  */
-  if (size / align < MOVE_RATIO)
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
     FAIL;
   
   /* Fall through means we're going to use our block clear pattern.  */
@@ -3956,7 +3956,7 @@
     FAIL;
 
   /* This does happen, but not often enough to worry much about.  */
-  if (size / align < MOVE_RATIO)
+  if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ()))
     FAIL;
   
   /* Fall through means we're going to use our block clear pattern.  */
diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h
index ef1122d6b3b..bad5b41c2e3 100644
--- a/gcc/config/pa/pa64-hpux.h
+++ b/gcc/config/pa/pa64-hpux.h
@@ -57,25 +57,35 @@ along with GCC; see the file COPYING3.  If not see
 #if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD)
 #define LIB_SPEC \
   "%{!shared:\
-     %{!p:%{!pg: %{static|mt|pthread:-lpthread} -lc\
+     %{!p:%{!pg:%{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+		  %{static:-a archive}} -lpthread} -lc\
 	    %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
      %{p:%{!pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\
-	   -lprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\
+	   -lprof %{static:-a archive}\
+	   %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+	     %{static:-a archive}} -lpthread} -lc\
 	   %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
      %{pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\
-       -lgprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\
+       -lgprof %{static:-a archive}\
+       %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+	 %{static:-a archive}} -lpthread} -lc\
        %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
    %{shared:%{mt|pthread:-lpthread}}"
 #else
 #define LIB_SPEC \
   "%{!shared:\
-     %{!p:%{!pg: %{static|mt|pthread:-lpthread} -lc\
+     %{!p:%{!pg:%{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+		  %{static:-a archive}} -lpthread} -lc\
 	    %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
      %{p:%{!pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\
-	   -lprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\
+	   -lprof %{static:-a archive}\
+	   %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+	     %{static:-a archive}} -lpthread} -lc\
 	   %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
      %{pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\
-       -lgprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\
+       -lgprof %{static:-a archive}\
+       %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\
+	 %{static:-a archive}} -lpthread} -lc\
        %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
    %{shared:%{mt|pthread:-lpthread}}"
 #endif
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d3821a8cfa9..6ab34969d02 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -857,6 +857,10 @@ static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
 static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
 static int rs6000_use_sched_lookahead (void);
 static int rs6000_use_sched_lookahead_guard (rtx);
+static void * rs6000_alloc_sched_context (void);
+static void rs6000_init_sched_context (void *, bool);
+static void rs6000_set_sched_context (void *);
+static void rs6000_free_sched_context (void *);
 static tree rs6000_builtin_reciprocal (unsigned int, bool, bool);
 static tree rs6000_builtin_mask_for_load (void);
 static tree rs6000_builtin_mul_widen_even (tree);
@@ -1131,6 +1135,15 @@ static const char alt_reg_names[][8] =
 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
 
+#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
+#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
+#undef TARGET_SCHED_INIT_SCHED_CONTEXT
+#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
+#undef TARGET_SCHED_SET_SCHED_CONTEXT
+#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
+#undef TARGET_SCHED_FREE_SCHED_CONTEXT
+#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
+
 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
@@ -19476,7 +19489,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
                   for (i=pos; i<*pn_ready-1; i++)
                     ready[i] = ready[i + 1];
                   ready[*pn_ready-1] = tmp;
-                  if INSN_PRIORITY_KNOWN (tmp)
+
+                  if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
                     INSN_PRIORITY (tmp)++;
                   break;
                 }
@@ -19493,7 +19507,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
           while (pos >= 0)
             {
               if (is_load_insn (ready[pos])
-                  && INSN_PRIORITY_KNOWN (ready[pos]))
+                  && !sel_sched_p ()
+		  && INSN_PRIORITY_KNOWN (ready[pos]))
                 {
                   INSN_PRIORITY (ready[pos])++;
 
@@ -19535,8 +19550,10 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
                       for (i=pos; i<*pn_ready-1; i++)
                         ready[i] = ready[i + 1];
                       ready[*pn_ready-1] = tmp;
-                      if INSN_PRIORITY_KNOWN (tmp)
+
+                      if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
                         INSN_PRIORITY (tmp)++;
+
                       first_store_pos = -1;
 
                       break;
@@ -19555,7 +19572,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
               for (i=first_store_pos; i<*pn_ready-1; i++)
                 ready[i] = ready[i + 1];
               ready[*pn_ready-1] = tmp;
-              if INSN_PRIORITY_KNOWN (tmp)
+              if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
                 INSN_PRIORITY (tmp)++;
             }
         }
@@ -19569,7 +19586,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
           while (pos >= 0)
             {
               if (is_store_insn (ready[pos])
-                  && INSN_PRIORITY_KNOWN (ready[pos]))
+                  && !sel_sched_p ()
+		  && INSN_PRIORITY_KNOWN (ready[pos]))
                 {
                   INSN_PRIORITY (ready[pos])++;
 
@@ -20071,7 +20089,7 @@ pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
       if (group_end)
 	{
 	  /* If the scheduler had marked group termination at this location
-	     (between insn and next_indn), and neither insn nor next_insn will
+	     (between insn and next_insn), and neither insn nor next_insn will
 	     force group termination, pad the group with nops to force group
 	     termination.  */
 	  if (can_issue_more
@@ -20125,6 +20143,10 @@ rs6000_sched_finish (FILE *dump, int sched_verbose)
 
   if (reload_completed && rs6000_sched_groups)
     {
+      /* Do not run sched_finish hook when selective scheduling enabled.  */
+      if (sel_sched_p ())
+	return;
+
       if (rs6000_sched_insert_nops == sched_finish_none)
 	return;
 
@@ -20145,6 +20167,67 @@ rs6000_sched_finish (FILE *dump, int sched_verbose)
 	}
     }
 }
+
+struct _rs6000_sched_context
+{
+  short cached_can_issue_more;
+  rtx last_scheduled_insn;
+  int load_store_pendulum;
+};
+
+typedef struct _rs6000_sched_context rs6000_sched_context_def;
+typedef rs6000_sched_context_def *rs6000_sched_context_t;
+
+/* Allocate store for new scheduling context.  */
+static void *
+rs6000_alloc_sched_context (void)
+{
+  return xmalloc (sizeof (rs6000_sched_context_def));
+}
+
+/* If CLEAN_P is true then initializes _SC with clean data,
+   and from the global context otherwise.  */
+static void
+rs6000_init_sched_context (void *_sc, bool clean_p)
+{
+  rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
+
+  if (clean_p)
+    {
+      sc->cached_can_issue_more = 0;
+      sc->last_scheduled_insn = NULL_RTX;
+      sc->load_store_pendulum = 0;
+    }
+  else
+    {
+      sc->cached_can_issue_more = cached_can_issue_more;
+      sc->last_scheduled_insn = last_scheduled_insn;
+      sc->load_store_pendulum = load_store_pendulum;
+    }
+}
+
+/* Sets the global scheduling context to the one pointed to by _SC.  */
+static void
+rs6000_set_sched_context (void *_sc)
+{
+  rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
+
+  gcc_assert (sc != NULL);
+
+  cached_can_issue_more = sc->cached_can_issue_more;
+  last_scheduled_insn = sc->last_scheduled_insn;
+  load_store_pendulum = sc->load_store_pendulum;
+}
+
+/* Free _SC.  */
+static void
+rs6000_free_sched_context (void *_sc)
+{
+  gcc_assert (_sc != NULL);
+
+  free (_sc);
+}
+
 
 /* Length in units of the trampoline for entering a nested function.  */
 
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index a31efd24a23..32ec03624bb 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -872,7 +872,7 @@ extern struct rtx_def *s390_compare_op0, *s390_compare_op1, *s390_compare_emitte
    in tree-sra with UNITS_PER_WORD to make a decision so we adjust it
    here to compensate for that factor since mvc costs exactly the same
    on 31 and 64 bit.  */
-#define MOVE_RATIO (TARGET_64BIT? 2 : 4)
+#define MOVE_RATIO(speed) (TARGET_64BIT? 2 : 4)
 
 
 /* Sections.  */
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index c4910a20a31..2e39082e1f6 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -2099,7 +2099,7 @@ do {                                                                    \
 /* If a memory-to-memory move would take MOVE_RATIO or more simple
    move-instruction pairs, we will do a movmem or libcall instead.  */
 
-#define MOVE_RATIO (optimize_size ? 3 : 8)
+#define MOVE_RATIO(speed) ((speed) ? 8 : 3)
 
 /* Define if operations between registers always perform the operation
    on the full register even if a narrower mode is specified.  */
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index eaece4c1863..0b74a9c18e4 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -663,7 +663,7 @@ spu_expand_block_move (rtx ops[])
   int i;
   if (GET_CODE (ops[2]) != CONST_INT
       || GET_CODE (ops[3]) != CONST_INT
-      || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
+      || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
     return 0;
 
   bytes = INTVAL (ops[2]);
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
index f78eb73c429..9839822885e 100644
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -438,7 +438,7 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin;	\
 
 #define SLOW_BYTE_ACCESS 0
 
-#define MOVE_RATIO 32
+#define MOVE_RATIO(speed) 32
 
 #define NO_FUNCTION_CSE
 
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index 89f2109ceb3..7b4b743fc8d 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -1864,58 +1864,6 @@
     DONE;
   })
 
-;; Taken from STI's gcc
-;; Does not correctly handle INF or NAN.
-(define_expand "divdf3"
-  [(set (match_operand:DF 0 "register_operand" "=r")
-        (div:DF (match_operand:DF 1 "register_operand" "r")
-                (match_operand:DF 2 "register_operand" "r")))]
-  "flag_finite_math_only"
-  "{    
-    /*
-    double
-    divdf3 (double x, double y)
-    {
-        float x0;
-        float y_f = (float) y;
-        double x1, x2;
-
-        x0 = spu_extract(spu_re(spu_promote(y_f, 0)), 0);
-        x1 = (double)(x0 * (2.0f - y_f * x0)); 
-        x2 = x1 * (2.0 - y * x1);
-        return (x * x2 * (2.0 - y * x2));
-    }
-    */
-
-    rtx dst = operands[0];
-    rtx x   = operands[1];
-    rtx y   = operands[2];
-    rtx y_f = gen_reg_rtx(SFmode);
-    rtx x0_f = gen_reg_rtx(SFmode);
-    rtx x1_f = gen_reg_rtx(SFmode);
-    rtx x1 = gen_reg_rtx(DFmode);
-    rtx x2 = gen_reg_rtx(DFmode);
-    rtx t1_f = gen_reg_rtx(SFmode);
-    rtx t1 = gen_reg_rtx(DFmode);
-    rtx two = gen_reg_rtx(DFmode);
-    rtx two_f = gen_reg_rtx(SFmode);
-
-    emit_insn (gen_truncdfsf2 (y_f, y));
-    emit_insn (gen_frest_sf (x0_f, y_f));
-    emit_insn (gen_fi_sf (x0_f, y_f, x0_f));
-    emit_insn (gen_movsf (two_f, spu_float_const(\"2.0\",SFmode)));
-    emit_insn (gen_fnms_sf (t1_f, y_f, x0_f, two_f));
-    emit_insn (gen_mulsf3 (x1_f, t1_f, x0_f));
-    emit_insn (gen_extendsfdf2 (x1, x1_f));
-    emit_insn (gen_extendsfdf2 (two, two_f));
-    emit_insn (gen_movdf (t1, two));
-    emit_insn (gen_fnms_df (t1, y, x1, t1));
-    emit_insn (gen_muldf3 (x2, x1, t1));
-    emit_insn (gen_fnms_df (two, y, x2, two));
-    emit_insn (gen_muldf3 (dst, x2, two));
-    emit_insn (gen_muldf3 (dst, dst, x));
-    DONE;
-}")
 
 ;; sqrt
 
diff --git a/gcc/config/v850/v850.h b/gcc/config/v850/v850.h
index c897b121380..65e731fef79 100644
--- a/gcc/config/v850/v850.h
+++ b/gcc/config/v850/v850.h
@@ -865,7 +865,7 @@ do {									\
 
 /* According expr.c, a value of around 6 should minimize code size, and
    for the V850 series, that's our primary concern.  */
-#define MOVE_RATIO 6
+#define MOVE_RATIO(speed) 6
 
 /* Indirect calls are expensive, never turn a direct call
    into an indirect call.  */
author	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2008-09-02 21:27:57 +0000
committer	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2008-09-02 21:27:57 +0000
commit	d93d495ba54e9746659d91d69119157f038a815f (patch)
tree	bfc67c88ff54c4880beb663c32eedf5f95f15123 /gcc/config
parent	e827aa43033edbfc6bac3fa2ff04421737b78421 (diff)
download	gcc-d93d495ba54e9746659d91d69119157f038a815f.tar.gz