diff options
Diffstat (limited to 'gcc/config')
41 files changed, 731 insertions, 287 deletions
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index cad90e1adce..a4d3bf1f150 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -5803,38 +5803,34 @@ alpha_build_builtin_va_list (void) /* Helper function for alpha_stdarg_optimize_hook. Skip over casts and constant additions. */ -static tree +static gimple va_list_skip_additions (tree lhs) { - tree rhs, stmt; - - if (TREE_CODE (lhs) != SSA_NAME) - return lhs; + gimple stmt; for (;;) { + enum tree_code code; + stmt = SSA_NAME_DEF_STMT (lhs); - if (TREE_CODE (stmt) == PHI_NODE) + if (gimple_code (stmt) == GIMPLE_PHI) return stmt; - if (TREE_CODE (stmt) != MODIFY_EXPR - || TREE_OPERAND (stmt, 0) != lhs) - return lhs; - - rhs = TREE_OPERAND (stmt, 1); - if (TREE_CODE (rhs) == WITH_SIZE_EXPR) - rhs = TREE_OPERAND (rhs, 0); + if (!is_gimple_assign (stmt) + || gimple_assign_lhs (stmt) != lhs) + return NULL; - if (((!CONVERT_EXPR_P (rhs)) - && ((TREE_CODE (rhs) != PLUS_EXPR - && TREE_CODE (rhs) != POINTER_PLUS_EXPR) - || TREE_CODE (TREE_OPERAND (rhs, 1)) != INTEGER_CST - || !host_integerp (TREE_OPERAND (rhs, 1), 1))) - || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME) - return rhs; + if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) + return stmt; + code = gimple_assign_rhs_code (stmt); + if (!CONVERT_EXPR_CODE_P (code) + && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR) + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST + || !host_integerp (gimple_assign_rhs2 (stmt), 1))) + return stmt; - lhs = TREE_OPERAND (rhs, 0); + lhs = gimple_assign_rhs1 (stmt); } } @@ -5859,36 +5855,49 @@ va_list_skip_additions (tree lhs) static bool alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt) { - tree base, offset, arg1, arg2; + tree base, offset, rhs; int offset_arg = 1; + gimple base_stmt; -#if 1 - /* FIXME tuples. */ - (void) si; - (void) stmt; - return false; -#else + if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) + != GIMPLE_SINGLE_RHS) + return false; + + rhs = gimple_assign_rhs1 (stmt); while (handled_component_p (rhs)) rhs = TREE_OPERAND (rhs, 0); if (TREE_CODE (rhs) != INDIRECT_REF || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME) return false; - lhs = va_list_skip_additions (TREE_OPERAND (rhs, 0)); - if (lhs == NULL_TREE - || TREE_CODE (lhs) != POINTER_PLUS_EXPR) + stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0)); + if (stmt == NULL + || !is_gimple_assign (stmt) + || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR) return false; - base = TREE_OPERAND (lhs, 0); + base = gimple_assign_rhs1 (stmt); if (TREE_CODE (base) == SSA_NAME) - base = va_list_skip_additions (base); + { + base_stmt = va_list_skip_additions (base); + if (base_stmt + && is_gimple_assign (base_stmt) + && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) + base = gimple_assign_rhs1 (base_stmt); + } if (TREE_CODE (base) != COMPONENT_REF || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) { - base = TREE_OPERAND (lhs, 0); + base = gimple_assign_rhs2 (stmt); if (TREE_CODE (base) == SSA_NAME) - base = va_list_skip_additions (base); + { + base_stmt = va_list_skip_additions (base); + if (base_stmt + && is_gimple_assign (base_stmt) + && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) + base = gimple_assign_rhs1 (base_stmt); + } if (TREE_CODE (base) != COMPONENT_REF || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) @@ -5902,55 +5911,88 @@ alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt) || !bitmap_bit_p (si->va_list_vars, DECL_UID (base))) return false; - offset = TREE_OPERAND (lhs, offset_arg); + offset = gimple_op (stmt, 1 + offset_arg); if (TREE_CODE (offset) == SSA_NAME) - offset = va_list_skip_additions (offset); - - if (TREE_CODE (offset) == PHI_NODE) { - HOST_WIDE_INT sub; - - if (PHI_NUM_ARGS (offset) != 2) - goto escapes; + gimple offset_stmt = va_list_skip_additions (offset); - arg1 = va_list_skip_additions (PHI_ARG_DEF (offset, 0)); - arg2 = va_list_skip_additions (PHI_ARG_DEF (offset, 1)); - if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR) + if (offset_stmt + && gimple_code (offset_stmt) == GIMPLE_PHI) { - tree tem = arg1; - arg1 = arg2; - arg2 = tem; + HOST_WIDE_INT sub; + gimple arg1_stmt, arg2_stmt; + tree arg1, arg2; + enum tree_code code1, code2; - if (TREE_CODE (arg2) != MINUS_EXPR && TREE_CODE (arg2) != PLUS_EXPR) + if (gimple_phi_num_args (offset_stmt) != 2) goto escapes; - } - if (!host_integerp (TREE_OPERAND (arg2, 1), 0)) - goto escapes; - sub = tree_low_cst (TREE_OPERAND (arg2, 1), 0); - if (TREE_CODE (arg2) == MINUS_EXPR) - sub = -sub; - if (sub < -48 || sub > -32) - goto escapes; + arg1_stmt + = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0)); + arg2_stmt + = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1)); + if (arg1_stmt == NULL + || !is_gimple_assign (arg1_stmt) + || arg2_stmt == NULL + || !is_gimple_assign (arg2_stmt)) + goto escapes; - arg2 = va_list_skip_additions (TREE_OPERAND (arg2, 0)); - if (arg1 != arg2) - goto escapes; + code1 = gimple_assign_rhs_code (arg1_stmt); + code2 = gimple_assign_rhs_code (arg2_stmt); + if (code1 == COMPONENT_REF + && (code2 == MINUS_EXPR || code2 == PLUS_EXPR)) + /* Do nothing. */; + else if (code2 == COMPONENT_REF + && (code1 == MINUS_EXPR || code1 == PLUS_EXPR)) + { + gimple tem = arg1_stmt; + code2 = code1; + arg1_stmt = arg2_stmt; + arg2_stmt = tem; + } + else + goto escapes; - if (TREE_CODE (arg1) == SSA_NAME) - arg1 = va_list_skip_additions (arg1); + if (!host_integerp (gimple_assign_rhs2 (arg2_stmt), 0)) + goto escapes; - if (TREE_CODE (arg1) != COMPONENT_REF - || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field - || get_base_address (arg1) != base) - goto escapes; + sub = tree_low_cst (gimple_assign_rhs2 (arg2_stmt), 0); + if (code2 == MINUS_EXPR) + sub = -sub; + if (sub < -48 || sub > -32) + goto escapes; - /* Need floating point regs. */ - cfun->va_list_fpr_size |= 2; + arg1 = gimple_assign_rhs1 (arg1_stmt); + arg2 = gimple_assign_rhs1 (arg2_stmt); + if (TREE_CODE (arg2) == SSA_NAME) + { + arg2_stmt = va_list_skip_additions (arg2); + if (arg2_stmt == NULL + || !is_gimple_assign (arg2_stmt) + || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF) + goto escapes; + arg2 = gimple_assign_rhs1 (arg2_stmt); + } + if (arg1 != arg2) + goto escapes; + + if (TREE_CODE (arg1) != COMPONENT_REF + || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field + || get_base_address (arg1) != base) + goto escapes; + + /* Need floating point regs. */ + cfun->va_list_fpr_size |= 2; + return false; + } + if (offset_stmt + && is_gimple_assign (offset_stmt) + && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF) + offset = gimple_assign_rhs1 (offset_stmt); } - else if (TREE_CODE (offset) != COMPONENT_REF - || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field - || get_base_address (offset) != base) + if (TREE_CODE (offset) != COMPONENT_REF + || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field + || get_base_address (offset) != base) goto escapes; else /* Need general regs. */ @@ -5960,7 +6002,6 @@ alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt) escapes: si->va_list_escapes = true; return false; -#endif } #endif @@ -6126,10 +6167,11 @@ alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) } static tree -alpha_gimplify_va_arg_1 (tree type, tree base, gimple_seq offset, +alpha_gimplify_va_arg_1 (tree type, tree base, tree offset, gimple_seq *pre_p) { - tree type_size, ptr_type, addend, t, addr, internal_post; + tree type_size, ptr_type, addend, t, addr; + gimple_seq internal_post; /* If the type could not be passed in registers, skip the block reserved for the registers. */ @@ -6177,7 +6219,7 @@ alpha_gimplify_va_arg_1 (tree type, tree base, gimple_seq offset, fold_convert (sizetype, addend)); internal_post = NULL; gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue); - append_to_statement_list (internal_post, pre_p); + gimple_seq_add_seq (pre_p, internal_post); /* Update the offset field. */ type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type)); @@ -6230,7 +6272,7 @@ alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p); /* Stuff the offset temporary back into its field. */ - gimplify_assign (offset_field, + gimplify_assign (unshare_expr (offset_field), fold_convert (TREE_TYPE (offset_field), offset), pre_p); if (indirect) diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h index 8e022d6a5f8..c462f71fb1d 100644 --- a/gcc/config/alpha/alpha.h +++ b/gcc/config/alpha/alpha.h @@ -1079,7 +1079,7 @@ do { \ Without byte/word accesses, we want no more than four instructions; with, several single byte accesses are better. */ -#define MOVE_RATIO (TARGET_BWX ? 7 : 2) +#define MOVE_RATIO(speed) (TARGET_BWX ? 7 : 2) /* Largest number of bytes of an object that can be placed in a register. On the Alpha we have plenty of registers, so use TImode. */ diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def index c7e425b0c7f..fe2f2b53792 100644 --- a/gcc/config/arm/arm-cores.def +++ b/gcc/config/arm/arm-cores.def @@ -116,6 +116,7 @@ ARM_CORE("mpcorenovfp", mpcorenovfp, 6K, FL_LDSCHED, 9e) ARM_CORE("mpcore", mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) ARM_CORE("arm1156t2-s", arm1156t2s, 6T2, FL_LDSCHED, 9e) ARM_CORE("cortex-a8", cortexa8, 7A, FL_LDSCHED, 9e) +ARM_CORE("cortex-a9", cortexa9, 7A, FL_LDSCHED, 9e) ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e) ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e) ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index d0e408ccf1d..ab08ef446f6 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -123,6 +123,7 @@ extern const char *fp_immediate_constant (rtx); extern void arm_emit_call_insn (rtx, rtx); extern const char *output_call (rtx *); extern const char *output_call_mem (rtx *); +void arm_emit_movpair (rtx, rtx); extern const char *output_mov_long_double_fpa_from_arm (rtx *); extern const char *output_mov_long_double_arm_from_fpa (rtx *); extern const char *output_mov_long_double_arm_from_arm (rtx *); diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index ee5606b04cb..beb8f9f4173 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from arm-cores.def (define_attr "tune" - "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexr4,cortexr4f,cortexm3,cortexm1" + "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,cortexa8,cortexa9,cortexr4,cortexr4f,cortexm3,cortexm1" (const (symbol_ref "arm_tune"))) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 6a4b3ef11cd..ec28f79446f 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -544,6 +544,9 @@ int arm_tune_xscale = 0; This typically means an ARM6 or ARM7 with MMU or MPU. */ int arm_tune_wbuf = 0; +/* Nonzero if tuning for Cortex-A9. */ +int arm_tune_cortex_a9 = 0; + /* Nonzero if generating Thumb instructions. */ int thumb_code = 0; @@ -1186,12 +1189,30 @@ arm_override_options (void) tune_flags = all_cores[(int)arm_tune].flags; + if (target_abi_name) + { + for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) + { + if (streq (arm_all_abis[i].name, target_abi_name)) + { + arm_abi = arm_all_abis[i].abi_type; + break; + } + } + if (i == ARRAY_SIZE (arm_all_abis)) + error ("invalid ABI option: -mabi=%s", target_abi_name); + } + else + arm_abi = ARM_DEFAULT_ABI; + /* Make sure that the processor choice does not conflict with any of the other command line choices. */ if (TARGET_ARM && !(insn_flags & FL_NOTM)) error ("target CPU does not support ARM mode"); - if (TARGET_INTERWORK && !(insn_flags & FL_THUMB)) + /* BPABI targets use linker tricks to allow interworking on cores + without thumb support. */ + if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI)) { warning (0, "target CPU does not support interworking" ); target_flags &= ~MASK_INTERWORK; @@ -1271,6 +1292,7 @@ arm_override_options (void) arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; + arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; /* If we are not using the default (ARM mode) section anchor offset ranges, then set the correct ranges now. */ @@ -1304,22 +1326,6 @@ arm_override_options (void) if (arm_arch5) target_flags &= ~MASK_INTERWORK; - if (target_abi_name) - { - for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) - { - if (streq (arm_all_abis[i].name, target_abi_name)) - { - arm_abi = arm_all_abis[i].abi_type; - break; - } - } - if (i == ARRAY_SIZE (arm_all_abis)) - error ("invalid ABI option: -mabi=%s", target_abi_name); - } - else - arm_abi = ARM_DEFAULT_ABI; - if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN) error ("iwmmxt requires an AAPCS compatible ABI for proper operation"); @@ -1924,14 +1930,22 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn, { /* Currently SET is the only monadic value for CODE, all the rest are diadic. */ - emit_set_insn (target, GEN_INT (val)); + if (TARGET_USE_MOVT) + arm_emit_movpair (target, GEN_INT (val)); + else + emit_set_insn (target, GEN_INT (val)); + return 1; } else { rtx temp = subtargets ? gen_reg_rtx (mode) : target; - emit_set_insn (temp, GEN_INT (val)); + if (TARGET_USE_MOVT) + arm_emit_movpair (temp, GEN_INT (val)); + else + emit_set_insn (temp, GEN_INT (val)); + /* For MINUS, the value is subtracted from, since we never have subtraction of a constant. */ if (code == MINUS) @@ -4903,7 +4917,15 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) || (GET_CODE (XEXP (x, 0)) == SUBREG && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG)) ? 0 : 8)); - return (1 + ((GET_CODE (XEXP (x, 0)) == REG + + extra_cost = 1; + /* Increase the cost of complex shifts because they aren't any faster, + and reduce dual issue opportunities. */ + if (arm_tune_cortex_a9 + && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT) + extra_cost++; + + return (extra_cost + ((GET_CODE (XEXP (x, 0)) == REG || (GET_CODE (XEXP (x, 0)) == SUBREG && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG)) ? 0 : 4) @@ -5018,7 +5040,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) && ((INTVAL (XEXP (XEXP (x, 0), 1)) & (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))) && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0))) - && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1))) + && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)) + && !arm_tune_cortex_a9) || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)) ? 0 : 4)); @@ -5115,6 +5138,10 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer) case SYMBOL_REF: return 6; + case HIGH: + case LO_SUM: + return (outer == SET) ? 1 : -1; + case CONST_DOUBLE: if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x)) return outer == SET ? 2 : -1; @@ -5341,6 +5368,13 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total) *total = COSTS_N_INSNS (4); return true; + case HIGH: + case LO_SUM: + /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the + cost of these slightly. */ + *total = COSTS_N_INSNS (1) + 1; + return true; + default: if (mode != VOIDmode) *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); @@ -9889,6 +9923,14 @@ output_mov_long_double_arm_from_arm (rtx *operands) } +/* Emit a MOVW/MOVT pair. */ +void arm_emit_movpair (rtx dest, rtx src) +{ + emit_set_insn (dest, gen_rtx_HIGH (SImode, src)); + emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src)); +} + + /* Output a move from arm registers to an fpa registers. OPERANDS[0] is an fpa register. OPERANDS[1] is the first registers of an arm register pair. */ @@ -12904,10 +12946,21 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; - /* An integer without a preceding # sign. */ + /* An integer or symbol address without a preceding # sign. */ case 'c': - gcc_assert (GET_CODE (x) == CONST_INT); - fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + switch (GET_CODE (x)) + { + case CONST_INT: + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + case SYMBOL_REF: + output_addr_const (stream, x); + break; + + default: + gcc_unreachable (); + } return; case 'B': @@ -18235,8 +18288,15 @@ arm_no_early_mul_dep (rtx producer, rtx consumer) op = XVECEXP (op, 0, 0); op = XEXP (op, 1); - return (GET_CODE (op) == PLUS - && !reg_overlap_mentioned_p (value, XEXP (op, 0))); + if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) + { + if (GET_CODE (XEXP (op, 0)) == MULT) + return !reg_overlap_mentioned_p (value, XEXP (op, 0)); + else + return !reg_overlap_mentioned_p (value, XEXP (op, 1)); + } + + return 0; } /* We can't rely on the caller doing the proper promotion when @@ -19017,7 +19077,9 @@ arm_issue_rate (void) switch (arm_tune) { case cortexr4: + case cortexr4f: case cortexa8: + case cortexa9: return 2; default: diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 4132b06b024..f83aabac038 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -241,6 +241,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_INT_SIMD \ (TARGET_32BIT && arm_arch6 && arm_arch_notm) +/* Should MOVW/MOVT be used in preference to a constant pool. */ +#define TARGET_USE_MOVT (arm_arch_thumb2 && !optimize_size) + /* We could use unified syntax for arm mode, but for now we just use it for Thumb-2. */ #define TARGET_UNIFIED_ASM TARGET_THUMB2 @@ -404,6 +407,9 @@ extern int arm_tune_xscale; /* Nonzero if tuning for stores via the write buffer. */ extern int arm_tune_wbuf; +/* Nonzero if tuning for Cortex-A9. */ +extern int arm_tune_cortex_a9; + /* Nonzero if we should define __THUMB_INTERWORK__ in the preprocessor. XXX This is a bit of a hack, it's intended to help work around @@ -1962,6 +1968,11 @@ typedef struct SYMBOL's section. */ #define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0 +/* Nonzero if all target requires all absolute relocations be R_ARM_ABS32. */ +#ifndef TARGET_DEFAULT_WORD_RELOCATIONS +#define TARGET_DEFAULT_WORD_RELOCATIONS 0 +#endif + /* Nonzero if the constant value X is a legitimate general operand. It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. @@ -2244,7 +2255,7 @@ do { \ #define MOVE_MAX 4 #undef MOVE_RATIO -#define MOVE_RATIO (arm_tune_xscale ? 4 : 2) +#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2) /* Define if operations between registers always perform the operation on the full register even if a narrower mode is specified. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 0ef91c6a003..1c279095707 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -157,7 +157,7 @@ ; Floating Point Unit. If we only have floating point emulation, then there ; is no point in scheduling the floating point insns. (Well, for best ; performance we should try and group them together). -(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp" +(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon" (const (symbol_ref "arm_fpu_attr"))) ; LENGTH of an instruction (in bytes) @@ -239,7 +239,7 @@ ; (define_attr "type" - "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult" + "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult,fconsts,fconstd,fadds,faddd,ffariths,ffarithd,fcmps,fcmpd,fcpys" (if_then_else (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals") (const_string "mult") @@ -331,18 +331,26 @@ ;; Processor type. This is created automatically from arm-cores.def. (include "arm-tune.md") +(define_attr "tune_cortexr4" "yes,no" + (const (if_then_else + (eq_attr "tune" "cortexr4,cortexr4f") + (const_string "yes") + (const_string "no")))) + ;; True if the generic scheduling description should be used. (define_attr "generic_sched" "yes,no" (const (if_then_else - (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexr4") + (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexa9") + (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) (define_attr "generic_vfp" "yes,no" (const (if_then_else (and (eq_attr "fpu" "vfp") - (eq_attr "tune" "!arm1020e,arm1022e,cortexa8")) + (eq_attr "tune" "!arm1020e,arm1022e,cortexa8,cortexa9") + (eq_attr "tune_cortexr4" "no")) (const_string "yes") (const_string "no")))) @@ -352,7 +360,9 @@ (include "arm1026ejs.md") (include "arm1136jfs.md") (include "cortex-a8.md") +(include "cortex-a9.md") (include "cortex-r4.md") +(include "cortex-r4f.md") (include "vfp11.md") @@ -4814,6 +4824,14 @@ optimize && can_create_pseudo_p ()); DONE; } + + if (TARGET_USE_MOVT && !target_word_relocations + && GET_CODE (operands[1]) == SYMBOL_REF + && !flag_pic && !arm_tls_referenced_p (operands[1])) + { + arm_emit_movpair (operands[0], operands[1]); + DONE; + } } else /* TARGET_THUMB1... */ { @@ -4874,6 +4892,28 @@ " ) +;; The ARM LO_SUM and HIGH are backwards - HIGH sets the low bits, and +;; LO_SUM adds in the high bits. Fortunately these are opaque operations +;; so this does not matter. +(define_insn "*arm_movt" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "i")))] + "TARGET_32BIT" + "movt%?\t%0, #:upper16:%c2" + [(set_attr "predicable" "yes") + (set_attr "length" "4")] +) + +(define_insn "*arm_movw" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r") + (high:SI (match_operand:SI 1 "general_operand" "i")))] + "TARGET_32BIT" + "movw%?\t%0, #:lower16:%c1" + [(set_attr "predicable" "yes") + (set_attr "length" "4")] +) + (define_insn "*arm_movsi_insn" [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m") (match_operand:SI 1 "general_operand" "rk, I,K,N,mi,rk"))] diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 2b005e42fa4..c8bdcf80f48 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -156,3 +156,7 @@ Assume big endian bytes, little endian words mvectorize-with-neon-quad Target Report Mask(NEON_VECTORIZE_QUAD) Use Neon quad-word (rather than double-word) registers for vectorization + +mword-relocations +Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) ++Only generate absolute relocations on word sized values. diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md index ed170c4b170..397ddd5f97c 100644 --- a/gcc/config/arm/arm1020e.md +++ b/gcc/config/arm/arm1020e.md @@ -269,12 +269,12 @@ ;; first execute state. We model this by using 1020a_e in the first cycle. (define_insn_reservation "v10_ffarith" 5 (and (eq_attr "vfp10" "yes") - (eq_attr "type" "ffarith")) + (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd")) "1020a_e+v10_fmac") (define_insn_reservation "v10_farith" 5 (and (eq_attr "vfp10" "yes") - (eq_attr "type" "farith")) + (eq_attr "type" "faddd,fadds")) "1020a_e+v10_fmac") (define_insn_reservation "v10_cvt" 5 diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h index e28d9ead45e..38be1da261a 100644 --- a/gcc/config/arm/bpabi.h +++ b/gcc/config/arm/bpabi.h @@ -51,9 +51,11 @@ /* The BPABI integer comparison routines return { -1, 0, 1 }. */ #define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI +#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}" + /* Tell the assembler to build BPABI binaries. */ #undef SUBTARGET_EXTRA_ASM_SPEC -#define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=4}" +#define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=4}" TARGET_FIX_V4BX_SPEC #ifndef SUBTARGET_EXTRA_LINK_SPEC #define SUBTARGET_EXTRA_LINK_SPEC "" @@ -63,7 +65,7 @@ #define BPABI_LINK_SPEC \ "%{mbig-endian:-EB} %{mlittle-endian:-EL} " \ "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} " \ - "-X" SUBTARGET_EXTRA_LINK_SPEC + "-X" SUBTARGET_EXTRA_LINK_SPEC TARGET_FIX_V4BX_SPEC #undef LINK_SPEC #define LINK_SPEC BPABI_LINK_SPEC diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md index dd7ac25ccaa..93453b618db 100644 --- a/gcc/config/arm/cortex-a8-neon.md +++ b/gcc/config/arm/cortex-a8-neon.md @@ -134,7 +134,7 @@ (define_insn_reservation "cortex_a8_vfp_add_sub" 10 (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "farith")) + (eq_attr "type" "fconsts,fconstd,fadds,faddd")) "cortex_a8_vfp,cortex_a8_vfplite*9") (define_insn_reservation "cortex_a8_vfp_muls" 12 @@ -172,7 +172,7 @@ ;; take four cycles, we pick that latency. (define_insn_reservation "cortex_a8_vfp_farith" 4 (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "ffarith")) + (eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd")) "cortex_a8_vfp,cortex_a8_vfplite*3") (define_insn_reservation "cortex_a8_vfp_cvt" 7 diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md new file mode 100644 index 00000000000..121fd2da747 --- /dev/null +++ b/gcc/config/arm/cortex-a9.md @@ -0,0 +1,65 @@ +;; ARM Cortex-A9 VFP pipeline description +;; Copyright (C) 2008 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "cortex_a9") + +;; FIXME: We model a single pipeline for all instructions. +;; Is dual-issue possible, and do we have other pipelines? +(define_cpu_unit "cortex_a9_vfp" "cortex_a9") + +(define_insn_reservation "cortex_a9_ffarith" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd,fconsts,fconstd")) + "cortex_a9_vfp") + +(define_insn_reservation "cortex_a9_fadd" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fadds,faddd,f_cvt")) + "cortex_a9_vfp") + +(define_insn_reservation "cortex_a9_fmuls" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmuls")) + "cortex_a9_vfp") + +(define_insn_reservation "cortex_a9_fmuld" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmuld")) + "cortex_a9_vfp*2") + +(define_insn_reservation "cortex_a9_fmacs" 8 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmacs")) + "cortex_a9_vfp") + +(define_insn_reservation "cortex_a9_fmacd" 8 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmacd")) + "cortex_a9_vfp*2") + +(define_insn_reservation "cortex_a9_fdivs" 15 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fdivs")) + "cortex_a9_vfp*10") + +(define_insn_reservation "cortex_a9_fdivd" 25 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fdivd")) + "cortex_a9_vfp*20") diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md index 34467345acb..e26c3d45d5e 100644 --- a/gcc/config/arm/cortex-r4.md +++ b/gcc/config/arm/cortex-r4.md @@ -77,24 +77,24 @@ ;; Data processing instructions. Moves without shifts are kept separate ;; for the purposes of the dual-issue constraints above. (define_insn_reservation "cortex_r4_alu" 2 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (and (eq_attr "type" "alu") (not (eq_attr "insn" "mov")))) "cortex_r4_alu") (define_insn_reservation "cortex_r4_mov" 2 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (and (eq_attr "type" "alu") (eq_attr "insn" "mov"))) "cortex_r4_mov") (define_insn_reservation "cortex_r4_alu_shift" 2 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "type" "alu_shift")) "cortex_r4_alu") (define_insn_reservation "cortex_r4_alu_shift_reg" 2 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "type" "alu_shift_reg")) "cortex_r4_alu_shift_reg") @@ -127,32 +127,32 @@ ;; Multiplication instructions. (define_insn_reservation "cortex_r4_mul_4" 4 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "insn" "mul,smmul")) "cortex_r4_mul_2") (define_insn_reservation "cortex_r4_mul_3" 3 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "insn" "smulxy,smulwy,smuad,smusd")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_mla_4" 4 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "insn" "mla,smmla")) "cortex_r4_mul_2") (define_insn_reservation "cortex_r4_mla_3" 3 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_smlald" 3 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "insn" "smlald,smlsld")) "cortex_r4_mul") (define_insn_reservation "cortex_r4_mull" 4 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "insn" "smull,umull,umlal,umaal")) "cortex_r4_mul_2") @@ -195,19 +195,19 @@ ;; is performed with B having ten more leading zeros than A. ;; This gives a latency of nine for udiv and ten for sdiv. (define_insn_reservation "cortex_r4_udiv" 9 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "insn" "udiv")) "cortex_r4_div_9") (define_insn_reservation "cortex_r4_sdiv" 10 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "insn" "sdiv")) "cortex_r4_div_10") ;; Branches. We assume correct prediction. (define_insn_reservation "cortex_r4_branch" 0 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "type" "branch")) "cortex_r4_branch") @@ -215,7 +215,7 @@ ;; number is used as "positive infinity" so that everything should be ;; finished by the time of return. (define_insn_reservation "cortex_r4_call" 32 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "type" "call")) "nothing") @@ -226,12 +226,12 @@ ;; accesses following are correctly aligned. (define_insn_reservation "cortex_r4_load_1_2" 3 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "type" "load1,load2")) "cortex_r4_load_store") (define_insn_reservation "cortex_r4_load_3_4" 4 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "type" "load3,load4")) "cortex_r4_load_store_2") @@ -281,12 +281,12 @@ ;; Store instructions. (define_insn_reservation "cortex_r4_store_1_2" 0 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "type" "store1,store2")) "cortex_r4_load_store") (define_insn_reservation "cortex_r4_store_3_4" 0 - (and (eq_attr "tune" "cortexr4") + (and (eq_attr "tune_cortexr4" "yes") (eq_attr "type" "store3,store4")) "cortex_r4_load_store_2") diff --git a/gcc/config/arm/cortex-r4f.md b/gcc/config/arm/cortex-r4f.md new file mode 100644 index 00000000000..8982bc068eb --- /dev/null +++ b/gcc/config/arm/cortex-r4f.md @@ -0,0 +1,161 @@ +;; ARM Cortex-R4F VFP pipeline description +;; Copyright (C) 2007, 2008 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; With the exception of simple VMOV <freg>, <freg> instructions and +;; the accululate operand of a multiply-accumulate instruction, all +;; registers are early registers. Thus base latencies are 1 more than +;; those listed in the TRM. + +;; We use the A, B abd C units from the integer core, plus two additional +;; units to enforce VFP dual issue constraints. + +;; A B C V1 VMLA +;; fcpy 1 2 +;; farith 1 2 1 +;; fmrc 1 2 +;; fconst 1 2 * * +;; ffarith 1 2 * * +;; fmac 1 2 1 2 +;; fdiv 1 2 * +;; f_loads * * * +;; f_stores * * * + +(define_cpu_unit "cortex_r4_v1" "cortex_r4") + +(define_cpu_unit "cortex_r4_vmla" "cortex_r4") + +(define_reservation "cortex_r4_issue_ab" + "(cortex_r4_issue_a|cortex_r4_issue_b)") +(define_reservation "cortex_r4_single_issue" + "cortex_r4_issue_a+cortex_r4_issue_b") + +(define_insn_reservation "cortex_r4_fcpys" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fcpys")) + "cortex_r4_issue_ab") + +(define_insn_reservation "cortex_r4_ffariths" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "ffariths,fconsts,fcmps")) + "cortex_r4_issue_ab+cortex_r4_issue_c+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_fariths" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fadds,fmuls")) + "(cortex_r4_issue_a+cortex_r4_v1)|cortex_r4_issue_b") + +(define_insn_reservation "cortex_r4_fmacs" 6 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fmacs")) + "(cortex_r4_issue_a+cortex_r4_v1)|(cortex_r4_issue_b+cortex_r4_vmla)") + +(define_insn_reservation "cortex_r4_fdivs" 17 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fdivs")) + "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_floads" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_loads")) + "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_fstores" 1 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_stores")) + "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_vmla") + +(define_insn_reservation "cortex_r4_mcr" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "r_2_f")) + "cortex_r4_issue_ab") + +(define_insn_reservation "cortex_r4_mrc" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_2_r")) + "cortex_r4_issue_ab") + +;; Bypasses for normal (not early) regs. +(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr" + "cortex_r4_fcpys") +(define_bypass 2 "cortex_r4_fariths" + "cortex_r4_fcpys") +(define_bypass 5 "cortex_r4_fmacs" + "cortex_r4_fcpys") +(define_bypass 16 "cortex_r4_fdivs" + "cortex_r4_fcpys") + +(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +(define_bypass 2 "cortex_r4_fariths" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +;; mac->mac has an extra forwarding path. +(define_bypass 3 "cortex_r4_fmacs" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +(define_bypass 16 "cortex_r4_fdivs" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") + +;; Double precision operations. These can not dual issue. + +(define_insn_reservation "cortex_r4_fmacd" 20 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fmacd")) + "cortex_r4_single_issue*13") + +(define_insn_reservation "cortex_r4_farith" 10 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "faddd,fmuld")) + "cortex_r4_single_issue*3") + +;; FIXME: The short cycle count suggests these instructions complete +;; out of order. Chances are this is not a pipelined operation. +(define_insn_reservation "cortex_r4_fdivd" 97 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fdivd")) + "cortex_r4_single_issue*3") + +(define_insn_reservation "cortex_r4_ffarithd" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "ffarithd,fconstd")) + "cortex_r4_single_issue") + +(define_insn_reservation "cortex_r4_fcmpd" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fcmpd")) + "cortex_r4_single_issue*2") + +(define_insn_reservation "cortex_r4_f_cvt" 8 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_cvt")) + "cortex_r4_single_issue*3") + +(define_insn_reservation "cortex_r4_f_memd" 8 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_loadd,f_stored")) + "cortex_r4_single_issue") + +(define_insn_reservation "cortex_r4_f_flag" 1 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_stores")) + "cortex_r4_single_issue") + diff --git a/gcc/config/arm/symbian.h b/gcc/config/arm/symbian.h index 3e583b3a912..af92c72b7ba 100644 --- a/gcc/config/arm/symbian.h +++ b/gcc/config/arm/symbian.h @@ -101,3 +101,5 @@ /* SymbianOS cannot merge entities with vague linkage at runtime. */ #define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P false + +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index f33d8206a1f..2417650adbb 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -755,15 +755,12 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" "* - if (GET_CODE (operands[3]) == LT && operands[3] == const0_rtx) + if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) return \"asr\\t%0, %1, #31\"; if (GET_CODE (operands[3]) == NE) return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0\"; - if (GET_CODE (operands[3]) == GT) - return \"subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, %0, asr #31\"; - output_asm_insn (\"cmp\\t%1, %2\", operands); output_asm_insn (\"ite\\t%D3\", operands); output_asm_insn (\"mov%D3\\t%0, #0\", operands); diff --git a/gcc/config/arm/uclinux-elf.h b/gcc/config/arm/uclinux-elf.h index 98a78505631..89b96f257ba 100644 --- a/gcc/config/arm/uclinux-elf.h +++ b/gcc/config/arm/uclinux-elf.h @@ -83,3 +83,5 @@ "%{pthread:-lpthread} \ %{shared:-lc} \ %{!shared:%{profile:-lc_p}%{!profile:-lc}}" + +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 64bb9564d9c..737f81ccb27 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -24,8 +24,15 @@ ) ;; The VFP "type" attributes differ from those used in the FPA model. -;; ffarith Fast floating point insns, e.g. abs, neg, cpy, cmp. -;; farith Most arithmetic insns. +;; fcpys Single precision cpy. +;; ffariths Single precision abs, neg. +;; ffarithd Double precision abs, neg, cpy. +;; fadds Single precision add/sub. +;; faddd Double precision add/sub. +;; fconsts Single precision load immediate. +;; fconstd Double precision load immediate. +;; fcmps Single precision comparison. +;; fcmpd Double precision comparison. ;; fmuls Single precision multiply. ;; fmuld Double precision multiply. ;; fmacs Single precision multiply-accumulate. @@ -74,7 +81,7 @@ } " [(set_attr "predicable" "yes") - (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,ffarith,f_loads,f_stores") + (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores") (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] ) @@ -111,7 +118,7 @@ } " [(set_attr "predicable" "yes") - (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,ffarith,f_load,f_store") + (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_load,f_store") (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,*,*,*, 0,*,*,*,*,1008,*")] ) @@ -145,7 +152,7 @@ gcc_unreachable (); } " - [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarith,f_loadd,f_stored") + [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") (set_attr "length" "8,8,8,4,4,4,4,4") (set_attr "pool_range" "*,1020,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")] @@ -172,7 +179,7 @@ abort (); } " - [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarith,f_load,f_store") + [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_load,f_store") (set_attr "length" "8,8,8,4,4,4,4,4") (set_attr "pool_range" "*,4096,*,*,*,*,1020,*") (set_attr "neg_pool_range" "*, 0,*,*,*,*,1008,*")] @@ -214,7 +221,7 @@ " [(set_attr "predicable" "yes") (set_attr "type" - "r_2_f,f_2_r,farith,f_loads,f_stores,load1,store1,ffarith,*") + "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*") (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")] ) @@ -250,7 +257,7 @@ " [(set_attr "predicable" "yes") (set_attr "type" - "r_2_f,f_2_r,farith,f_load,f_store,load1,store1,ffarith,*") + "r_2_f,f_2_r,fconsts,f_load,f_store,load1,store1,fcpys,*") (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] ) @@ -288,7 +295,7 @@ } " [(set_attr "type" - "r_2_f,f_2_r,farith,f_loadd,f_stored,load2,store2,ffarith,*") + "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*") (set_attr "length" "4,4,4,8,8,4,4,4,8") (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*") (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")] @@ -320,7 +327,7 @@ } " [(set_attr "type" - "r_2_f,f_2_r,farith,load2,store2,f_load,f_store,ffarith,*") + "r_2_f,f_2_r,fconstd,load2,store2,f_load,f_store,ffarithd,*") (set_attr "length" "4,4,4,8,8,4,4,4,8") (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*") (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")] @@ -349,7 +356,7 @@ fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] ) (define_insn "*thumb2_movsfcc_vfp" @@ -372,7 +379,7 @@ ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" [(set_attr "conds" "use") (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] ) (define_insn "*movdfcc_vfp" @@ -395,7 +402,7 @@ fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" [(set_attr "conds" "use") (set_attr "length" "4,4,8,4,4,8,4,4,8") - (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] ) (define_insn "*thumb2_movdfcc_vfp" @@ -418,7 +425,7 @@ ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" [(set_attr "conds" "use") (set_attr "length" "6,6,10,6,6,10,6,6,10") - (set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] + (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")] ) @@ -430,7 +437,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fabss%?\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr "type" "ffarith")] + (set_attr "type" "ffariths")] ) (define_insn "*absdf2_vfp" @@ -439,7 +446,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fabsd%?\\t%P0, %P1" [(set_attr "predicable" "yes") - (set_attr "type" "ffarith")] + (set_attr "type" "ffarithd")] ) (define_insn "*negsf2_vfp" @@ -450,7 +457,7 @@ fnegs%?\\t%0, %1 eor%?\\t%0, %1, #-2147483648" [(set_attr "predicable" "yes") - (set_attr "type" "ffarith")] + (set_attr "type" "ffariths")] ) (define_insn_and_split "*negdf2_vfp" @@ -496,7 +503,7 @@ " [(set_attr "predicable" "yes") (set_attr "length" "4,4,8") - (set_attr "type" "ffarith")] + (set_attr "type" "ffarithd")] ) @@ -509,7 +516,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fadds%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "type" "farith")] + (set_attr "type" "fadds")] ) (define_insn "*adddf3_vfp" @@ -519,7 +526,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "faddd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") - (set_attr "type" "farith")] + (set_attr "type" "faddd")] ) @@ -530,7 +537,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fsubs%?\\t%0, %1, %2" [(set_attr "predicable" "yes") - (set_attr "type" "farith")] + (set_attr "type" "fadds")] ) (define_insn "*subdf3_vfp" @@ -540,7 +547,7 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "fsubd%?\\t%P0, %P1, %P2" [(set_attr "predicable" "yes") - (set_attr "type" "farith")] + (set_attr "type" "faddd")] ) @@ -909,7 +916,7 @@ fcmps%?\\t%0, %1 fcmpzs%?\\t%0" [(set_attr "predicable" "yes") - (set_attr "type" "ffarith")] + (set_attr "type" "fcmps")] ) (define_insn "*cmpsf_trap_vfp" @@ -921,7 +928,7 @@ fcmpes%?\\t%0, %1 fcmpezs%?\\t%0" [(set_attr "predicable" "yes") - (set_attr "type" "ffarith")] + (set_attr "type" "fcmpd")] ) (define_insn "*cmpdf_vfp" @@ -933,7 +940,7 @@ fcmpd%?\\t%P0, %P1 fcmpzd%?\\t%P0" [(set_attr "predicable" "yes") - (set_attr "type" "ffarith")] + (set_attr "type" "fcmpd")] ) (define_insn "*cmpdf_trap_vfp" @@ -945,7 +952,7 @@ fcmped%?\\t%P0, %P1 fcmpezd%?\\t%P0" [(set_attr "predicable" "yes") - (set_attr "type" "ffarith")] + (set_attr "type" "fcmpd")] ) diff --git a/gcc/config/arm/vfp11.md b/gcc/config/arm/vfp11.md index 59699739539..8f863fd70cd 100644 --- a/gcc/config/arm/vfp11.md +++ b/gcc/config/arm/vfp11.md @@ -51,12 +51,12 @@ (define_insn_reservation "vfp_ffarith" 4 (and (eq_attr "generic_vfp" "yes") - (eq_attr "type" "ffarith")) + (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd")) "fmac") (define_insn_reservation "vfp_farith" 8 (and (eq_attr "generic_vfp" "yes") - (eq_attr "type" "farith,f_cvt,fmuls,fmacs")) + (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,fmuls,fmacs")) "fmac") (define_insn_reservation "vfp_fmul" 9 diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h index 441655776e9..a7610acca5d 100644 --- a/gcc/config/arm/vxworks.h +++ b/gcc/config/arm/vxworks.h @@ -113,3 +113,6 @@ along with GCC; see the file COPYING3. If not see cannot allow arbitrary offsets for shared libraries either. */ #undef ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P #define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1 + +#undef TARGET_DEFAULT_WORD_RELOCATIONS +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h index 826e60b9e02..4ac369f10ba 100644 --- a/gcc/config/bfin/bfin.h +++ b/gcc/config/bfin/bfin.h @@ -998,7 +998,7 @@ do { \ /* If a memory-to-memory move would take MOVE_RATIO or more simple move-instruction pairs, we will do a movmem or libcall instead. */ -#define MOVE_RATIO 5 +#define MOVE_RATIO(speed) 5 /* STORAGE LAYOUT: target machine storage layout Define this macro as a C expression which is nonzero if accessing diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h index b8a66e96a4b..ed1ec3deb06 100644 --- a/gcc/config/cris/cris.h +++ b/gcc/config/cris/cris.h @@ -1242,7 +1242,7 @@ struct cum_args {int regs;}; word-length sizes will be emitted. The "9" will translate to (9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions (8 instruction sequences) or less. */ -#define MOVE_RATIO 9 +#define MOVE_RATIO(speed) 9 /* Node: Sections */ diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h index 7305fc32f85..4edbb2f6ba2 100644 --- a/gcc/config/h8300/h8300.h +++ b/gcc/config/h8300/h8300.h @@ -1189,10 +1189,8 @@ struct cum_arg #define FINAL_PRESCAN_INSN(insn, operand, nop) \ final_prescan_insn (insn, operand, nop) -#define MOVE_RATIO 3 extern int h8300_move_ratio; -#undef MOVE_RATIO -#define MOVE_RATIO h8300_move_ratio +#define MOVE_RATIO(speed) h8300_move_ratio /* Machine-specific symbol_ref flags. */ #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 025eee6a99c..2c016328e4c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1631,9 +1631,6 @@ rtx ix86_compare_op0 = NULL_RTX; rtx ix86_compare_op1 = NULL_RTX; rtx ix86_compare_emitted = NULL_RTX; -/* Size of the register save area. */ -#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16) - /* Define the structure for the machine field in struct function. */ struct stack_local_entry GTY(()) @@ -6312,14 +6309,24 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) int i; int regparm = ix86_regparm; - if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI) + if (cum->call_abi != DEFAULT_ABI) regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX; - if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size) - return; + /* GPR size of varargs save area. */ + if (cfun->va_list_gpr_size) + ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; + else + ix86_varargs_gpr_size = 0; + + /* FPR size of varargs save area. We don't need it if we don't pass + anything in SSE registers. */ + if (cum->sse_nregs && cfun->va_list_fpr_size) + ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; + else + ix86_varargs_fpr_size = 0; - /* Indicate to allocate space on the stack for varargs save area. */ - ix86_save_varrargs_registers = 1; + if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) + return; save_area = frame_pointer_rtx; set = get_varargs_alias_set (); @@ -6337,7 +6344,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) x86_64_int_parameter_registers[i])); } - if (cum->sse_nregs && cfun->va_list_fpr_size) + if (ix86_varargs_fpr_size) { /* Now emit code to save SSE registers. The AX parameter contains number of SSE parameter registers used to call this function. We use @@ -6382,7 +6389,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) tmp_reg = gen_reg_rtx (Pmode); emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, plus_constant (save_area, - 8 * X86_64_REGPARM_MAX + 127))); + ix86_varargs_gpr_size + 127))); mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); MEM_NOTRAP_P (mem) = 1; set_mem_alias_set (mem, set); @@ -6438,7 +6445,7 @@ ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, if (stdarg_p (fntype)) function_arg_advance (&next_cum, mode, type, 1); - if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) + if (cum->call_abi == MS_ABI) setup_incoming_varargs_ms_64 (&next_cum); else setup_incoming_varargs_64 (&next_cum); @@ -6501,7 +6508,7 @@ ix86_va_start (tree valist, rtx nextarg) expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } - if (cfun->va_list_fpr_size) + if (TARGET_SSE && cfun->va_list_fpr_size) { type = TREE_TYPE (fpr); t = build2 (MODIFY_EXPR, type, fpr, @@ -6520,12 +6527,15 @@ ix86_va_start (tree valist, rtx nextarg) TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); - if (cfun->va_list_gpr_size || cfun->va_list_fpr_size) + if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) { /* Find the register save area. Prologue of the function save it right above stack frame. */ type = TREE_TYPE (sav); t = make_tree (type, frame_pointer_rtx); + if (!ix86_varargs_gpr_size) + t = build2 (POINTER_PLUS_EXPR, type, t, + size_int (-8 * X86_64_REGPARM_MAX)); t = build2 (MODIFY_EXPR, type, sav, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); @@ -7500,13 +7510,8 @@ ix86_compute_frame_layout (struct ix86_frame *frame) offset += frame->nregs * UNITS_PER_WORD; /* Va-arg area */ - if (ix86_save_varrargs_registers) - { - offset += X86_64_VARARGS_SIZE; - frame->va_arg_size = X86_64_VARARGS_SIZE; - } - else - frame->va_arg_size = 0; + frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; + offset += frame->va_arg_size; /* Align start of frame for local function. */ frame->padding1 = ((offset + stack_alignment_needed - 1) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d933c5e2389..6f6529a252e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1906,12 +1906,12 @@ do { \ If you don't define this, a reasonable default is used. */ -#define MOVE_RATIO (optimize_size ? 3 : ix86_cost->move_ratio) +#define MOVE_RATIO(speed) ((speed) ? ix86_cost->move_ratio : 3) /* If a clear memory operation would take CLEAR_RATIO or more simple move-instruction sequences, we will do a clrmem or libcall instead. */ -#define CLEAR_RATIO (optimize_size ? 2 : MIN (6, ix86_cost->move_ratio)) +#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2) /* Define if shifts truncate the shift count which implies one can omit a sign-extension or zero-extension @@ -2390,7 +2390,8 @@ struct machine_function GTY(()) { struct stack_local_entry *stack_locals; const char *some_ld_name; - int save_varrargs_registers; + int varargs_gpr_size; + int varargs_fpr_size; int accesses_prev_frame; int optimize_mode_switching[MAX_386_ENTITIES]; int needs_cld; @@ -2416,7 +2417,8 @@ struct machine_function GTY(()) }; #define ix86_stack_locals (cfun->machine->stack_locals) -#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) +#define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size) +#define ix86_varargs_fpr_size (cfun->machine->varargs_fpr_size) #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching) #define ix86_current_function_needs_cld (cfun->machine->needs_cld) #define ix86_tls_descriptor_calls_expanded_in_cfun \ diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index c16ecc7e3c3..28abf27d1bf 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -382,8 +382,8 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_SCHED_NEEDS_BLOCK_P #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p -#undef TARGET_SCHED_GEN_CHECK -#define TARGET_SCHED_GEN_CHECK ia64_gen_check +#undef TARGET_SCHED_GEN_SPEC_CHECK +#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_check #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\ @@ -408,7 +408,7 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS ia64_rtx_costs #undef TARGET_ADDRESS_COST -#define TARGET_ADDRESS_COST hook_int_rtx_0 +#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 #undef TARGET_UNSPEC_MAY_TRAP_P #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p @@ -6278,10 +6278,6 @@ static rtx dfa_stop_insn; static rtx last_scheduled_insn; -/* The following variable value is size of the DFA state. */ - -static size_t dfa_state_size; - /* The following variable value is pointer to a DFA state used as temporary variable. */ @@ -6857,6 +6853,8 @@ ia64_set_sched_flags (spec_info_t spec_info) mask |= BE_IN_CONTROL; } + spec_info->mask = mask; + if (mask) { *flags |= USE_DEPS_LIST | DO_SPECULATION; @@ -6864,7 +6862,6 @@ ia64_set_sched_flags (spec_info_t spec_info) if (mask & BE_IN_SPEC) *flags |= NEW_BBS; - spec_info->mask = mask; spec_info->flags = 0; if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns) diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h index cb789e1a54f..302c414bb5c 100644 --- a/gcc/config/m68hc11/m68hc11.h +++ b/gcc/config/m68hc11/m68hc11.h @@ -1505,7 +1505,7 @@ do { \ /* MOVE_RATIO is the number of move instructions that is better than a block move. Make this small on 6811, since the code size grows very large with each move. */ -#define MOVE_RATIO 3 +#define MOVE_RATIO(speed) 3 /* Define if shifts truncate the shift count which implies one can omit a sign-extension or zero-extension of a shift count. */ diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index e008e804781..86b886a373f 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -2940,7 +2940,7 @@ while (0) we'll have to generate a load/store pair for each, halve the value of MIPS_CALL_RATIO to take that into account. */ -#define MOVE_RATIO \ +#define MOVE_RATIO(speed) \ (HAVE_movmemsi \ ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ : MIPS_CALL_RATIO / 2) @@ -2961,20 +2961,20 @@ while (0) ? (SIZE) < UNITS_PER_WORD \ : (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \ : (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ - < (unsigned int) MOVE_RATIO)) + < (unsigned int) MOVE_RATIO (false))) /* For CLEAR_RATIO, when optimizing for size, give a better estimate of the length of a memset call, but use the default otherwise. */ -#define CLEAR_RATIO \ - (optimize_size ? MIPS_CALL_RATIO : 15) +#define CLEAR_RATIO(speed)\ + ((speed) ? 15 : MIPS_CALL_RATIO) /* This is similar to CLEAR_RATIO, but for a non-zero constant, so when optimizing for size adjust the ratio to account for the overhead of loading the constant and replicating it across the word. */ -#define SET_RATIO \ - (optimize_size ? MIPS_CALL_RATIO - 2 : 15) +#define SET_RATIO(speed) \ + ((speed) ? 15 : MIPS_CALL_RATIO - 2) /* STORE_BY_PIECES_P can be used when copying a constant string, but in that case each word takes 3 insns (lui, ori, sw), or more in diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h index 07035fbb06b..c1c80579427 100644 --- a/gcc/config/mn10300/mn10300.h +++ b/gcc/config/mn10300/mn10300.h @@ -814,7 +814,7 @@ while (0) /* According expr.c, a value of around 6 should minimize code size, and for the MN10300 series, that's our primary concern. */ -#define MOVE_RATIO 6 +#define MOVE_RATIO(speed) 6 #define TEXT_SECTION_ASM_OP "\t.section .text" #define DATA_SECTION_ASM_OP "\t.section .data" diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h index ae9e4d31f01..8d4a807cbe3 100644 --- a/gcc/config/pa/pa-hpux11.h +++ b/gcc/config/pa/pa-hpux11.h @@ -122,8 +122,9 @@ along with GCC; see the file COPYING3. If not see #undef LIB_SPEC #define LIB_SPEC \ "%{!shared:\ - %{mt|pthread:-lpthread} -lc \ - %{static:%{!nolibdld:-a shared -ldld -a archive -lpthread -lc}}}\ + %{static|mt|pthread:%{fopenmp:%{static:-a archive_shared} -lrt\ + %{static:-a archive}} -lpthread} -lc\ + %{static:%{!nolibdld:-a archive_shared -ldld -a archive -lc}}}\ %{shared:%{mt|pthread:-lpthread}}" #undef STARTFILE_SPEC diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index 5e272a2f346..2966a42ddb2 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -1506,7 +1506,7 @@ do { \ arguments passed in registers to avoid infinite recursion during argument setup for a function call. Why? Consider how we copy the stack slots reserved for parameters when they may be trashed by a call. */ -#define MOVE_RATIO (TARGET_64BIT ? 8 : 4) +#define MOVE_RATIO(speed) (TARGET_64BIT ? 8 : 4) /* Define if operations between registers always perform the operation on the full register even if a narrower mode is specified. */ diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index c3d686db5c4..b50ab4cb6e3 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -3487,7 +3487,7 @@ FAIL; /* This does happen, but not often enough to worry much about. */ - if (size / align < MOVE_RATIO) + if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ())) FAIL; /* Fall through means we're going to use our block move pattern. */ @@ -3675,7 +3675,7 @@ FAIL; /* This does happen, but not often enough to worry much about. */ - if (size / align < MOVE_RATIO) + if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ())) FAIL; /* Fall through means we're going to use our block move pattern. */ @@ -3842,7 +3842,7 @@ FAIL; /* This does happen, but not often enough to worry much about. */ - if (size / align < MOVE_RATIO) + if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ())) FAIL; /* Fall through means we're going to use our block clear pattern. */ @@ -3956,7 +3956,7 @@ FAIL; /* This does happen, but not often enough to worry much about. */ - if (size / align < MOVE_RATIO) + if (size / align < MOVE_RATIO (optimize_insn_for_speed_p ())) FAIL; /* Fall through means we're going to use our block clear pattern. */ diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h index ef1122d6b3b..bad5b41c2e3 100644 --- a/gcc/config/pa/pa64-hpux.h +++ b/gcc/config/pa/pa64-hpux.h @@ -57,25 +57,35 @@ along with GCC; see the file COPYING3. If not see #if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & MASK_GNU_LD) #define LIB_SPEC \ "%{!shared:\ - %{!p:%{!pg: %{static|mt|pthread:-lpthread} -lc\ + %{!p:%{!pg:%{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\ + %{static:-a archive}} -lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\ %{p:%{!pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\ - -lprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\ + -lprof %{static:-a archive}\ + %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\ + %{static:-a archive}} -lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\ %{pg:%{static:%{!mhp-ld:-a shared}%{mhp-ld:-a archive_shared}}\ - -lgprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\ + -lgprof %{static:-a archive}\ + %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\ + %{static:-a archive}} -lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\ %{shared:%{mt|pthread:-lpthread}}" #else #define LIB_SPEC \ "%{!shared:\ - %{!p:%{!pg: %{static|mt|pthread:-lpthread} -lc\ + %{!p:%{!pg:%{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\ + %{static:-a archive}} -lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\ %{p:%{!pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\ - -lprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\ + -lprof %{static:-a archive}\ + %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\ + %{static:-a archive}} -lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\ %{pg:%{static:%{mgnu-ld:-a shared}%{!mgnu-ld:-a archive_shared}}\ - -lgprof %{static:-a archive} %{static|mt|pthread:-lpthread} -lc\ + -lgprof %{static:-a archive}\ + %{static|mt|pthread:%{fopenmp:%{static:-a shared} -lrt\ + %{static:-a archive}} -lpthread} -lc\ %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\ %{shared:%{mt|pthread:-lpthread}}" #endif diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d3821a8cfa9..6ab34969d02 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -857,6 +857,10 @@ static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int); static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int); static int rs6000_use_sched_lookahead (void); static int rs6000_use_sched_lookahead_guard (rtx); +static void * rs6000_alloc_sched_context (void); +static void rs6000_init_sched_context (void *, bool); +static void rs6000_set_sched_context (void *); +static void rs6000_free_sched_context (void *); static tree rs6000_builtin_reciprocal (unsigned int, bool, bool); static tree rs6000_builtin_mask_for_load (void); static tree rs6000_builtin_mul_widen_even (tree); @@ -1131,6 +1135,15 @@ static const char alt_reg_names[][8] = #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard +#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT +#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context +#undef TARGET_SCHED_INIT_SCHED_CONTEXT +#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context +#undef TARGET_SCHED_SET_SCHED_CONTEXT +#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context +#undef TARGET_SCHED_FREE_SCHED_CONTEXT +#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context + #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN @@ -19476,7 +19489,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready, for (i=pos; i<*pn_ready-1; i++) ready[i] = ready[i + 1]; ready[*pn_ready-1] = tmp; - if INSN_PRIORITY_KNOWN (tmp) + + if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) INSN_PRIORITY (tmp)++; break; } @@ -19493,7 +19507,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready, while (pos >= 0) { if (is_load_insn (ready[pos]) - && INSN_PRIORITY_KNOWN (ready[pos])) + && !sel_sched_p () + && INSN_PRIORITY_KNOWN (ready[pos])) { INSN_PRIORITY (ready[pos])++; @@ -19535,8 +19550,10 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready, for (i=pos; i<*pn_ready-1; i++) ready[i] = ready[i + 1]; ready[*pn_ready-1] = tmp; - if INSN_PRIORITY_KNOWN (tmp) + + if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) INSN_PRIORITY (tmp)++; + first_store_pos = -1; break; @@ -19555,7 +19572,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready, for (i=first_store_pos; i<*pn_ready-1; i++) ready[i] = ready[i + 1]; ready[*pn_ready-1] = tmp; - if INSN_PRIORITY_KNOWN (tmp) + if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) INSN_PRIORITY (tmp)++; } } @@ -19569,7 +19586,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready, while (pos >= 0) { if (is_store_insn (ready[pos]) - && INSN_PRIORITY_KNOWN (ready[pos])) + && !sel_sched_p () + && INSN_PRIORITY_KNOWN (ready[pos])) { INSN_PRIORITY (ready[pos])++; @@ -20071,7 +20089,7 @@ pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail) if (group_end) { /* If the scheduler had marked group termination at this location - (between insn and next_indn), and neither insn nor next_insn will + (between insn and next_insn), and neither insn nor next_insn will force group termination, pad the group with nops to force group termination. */ if (can_issue_more @@ -20125,6 +20143,10 @@ rs6000_sched_finish (FILE *dump, int sched_verbose) if (reload_completed && rs6000_sched_groups) { + /* Do not run sched_finish hook when selective scheduling enabled. */ + if (sel_sched_p ()) + return; + if (rs6000_sched_insert_nops == sched_finish_none) return; @@ -20145,6 +20167,67 @@ rs6000_sched_finish (FILE *dump, int sched_verbose) } } } + +struct _rs6000_sched_context +{ + short cached_can_issue_more; + rtx last_scheduled_insn; + int load_store_pendulum; +}; + +typedef struct _rs6000_sched_context rs6000_sched_context_def; +typedef rs6000_sched_context_def *rs6000_sched_context_t; + +/* Allocate store for new scheduling context. */ +static void * +rs6000_alloc_sched_context (void) +{ + return xmalloc (sizeof (rs6000_sched_context_def)); +} + +/* If CLEAN_P is true then initializes _SC with clean data, + and from the global context otherwise. */ +static void +rs6000_init_sched_context (void *_sc, bool clean_p) +{ + rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; + + if (clean_p) + { + sc->cached_can_issue_more = 0; + sc->last_scheduled_insn = NULL_RTX; + sc->load_store_pendulum = 0; + } + else + { + sc->cached_can_issue_more = cached_can_issue_more; + sc->last_scheduled_insn = last_scheduled_insn; + sc->load_store_pendulum = load_store_pendulum; + } +} + +/* Sets the global scheduling context to the one pointed to by _SC. */ +static void +rs6000_set_sched_context (void *_sc) +{ + rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; + + gcc_assert (sc != NULL); + + cached_can_issue_more = sc->cached_can_issue_more; + last_scheduled_insn = sc->last_scheduled_insn; + load_store_pendulum = sc->load_store_pendulum; +} + +/* Free _SC. */ +static void +rs6000_free_sched_context (void *_sc) +{ + gcc_assert (_sc != NULL); + + free (_sc); +} + /* Length in units of the trampoline for entering a nested function. */ diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index a31efd24a23..32ec03624bb 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -872,7 +872,7 @@ extern struct rtx_def *s390_compare_op0, *s390_compare_op1, *s390_compare_emitte in tree-sra with UNITS_PER_WORD to make a decision so we adjust it here to compensate for that factor since mvc costs exactly the same on 31 and 64 bit. */ -#define MOVE_RATIO (TARGET_64BIT? 2 : 4) +#define MOVE_RATIO(speed) (TARGET_64BIT? 2 : 4) /* Sections. */ diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index c4910a20a31..2e39082e1f6 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -2099,7 +2099,7 @@ do { \ /* If a memory-to-memory move would take MOVE_RATIO or more simple move-instruction pairs, we will do a movmem or libcall instead. */ -#define MOVE_RATIO (optimize_size ? 3 : 8) +#define MOVE_RATIO(speed) ((speed) ? 8 : 3) /* Define if operations between registers always perform the operation on the full register even if a narrower mode is specified. */ diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index eaece4c1863..0b74a9c18e4 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -663,7 +663,7 @@ spu_expand_block_move (rtx ops[]) int i; if (GET_CODE (ops[2]) != CONST_INT || GET_CODE (ops[3]) != CONST_INT - || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8)) + || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8)) return 0; bytes = INTVAL (ops[2]); diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h index f78eb73c429..9839822885e 100644 --- a/gcc/config/spu/spu.h +++ b/gcc/config/spu/spu.h @@ -438,7 +438,7 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \ #define SLOW_BYTE_ACCESS 0 -#define MOVE_RATIO 32 +#define MOVE_RATIO(speed) 32 #define NO_FUNCTION_CSE diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md index 89f2109ceb3..7b4b743fc8d 100644 --- a/gcc/config/spu/spu.md +++ b/gcc/config/spu/spu.md @@ -1864,58 +1864,6 @@ DONE; }) -;; Taken from STI's gcc -;; Does not correctly handle INF or NAN. -(define_expand "divdf3" - [(set (match_operand:DF 0 "register_operand" "=r") - (div:DF (match_operand:DF 1 "register_operand" "r") - (match_operand:DF 2 "register_operand" "r")))] - "flag_finite_math_only" - "{ - /* - double - divdf3 (double x, double y) - { - float x0; - float y_f = (float) y; - double x1, x2; - - x0 = spu_extract(spu_re(spu_promote(y_f, 0)), 0); - x1 = (double)(x0 * (2.0f - y_f * x0)); - x2 = x1 * (2.0 - y * x1); - return (x * x2 * (2.0 - y * x2)); - } - */ - - rtx dst = operands[0]; - rtx x = operands[1]; - rtx y = operands[2]; - rtx y_f = gen_reg_rtx(SFmode); - rtx x0_f = gen_reg_rtx(SFmode); - rtx x1_f = gen_reg_rtx(SFmode); - rtx x1 = gen_reg_rtx(DFmode); - rtx x2 = gen_reg_rtx(DFmode); - rtx t1_f = gen_reg_rtx(SFmode); - rtx t1 = gen_reg_rtx(DFmode); - rtx two = gen_reg_rtx(DFmode); - rtx two_f = gen_reg_rtx(SFmode); - - emit_insn (gen_truncdfsf2 (y_f, y)); - emit_insn (gen_frest_sf (x0_f, y_f)); - emit_insn (gen_fi_sf (x0_f, y_f, x0_f)); - emit_insn (gen_movsf (two_f, spu_float_const(\"2.0\",SFmode))); - emit_insn (gen_fnms_sf (t1_f, y_f, x0_f, two_f)); - emit_insn (gen_mulsf3 (x1_f, t1_f, x0_f)); - emit_insn (gen_extendsfdf2 (x1, x1_f)); - emit_insn (gen_extendsfdf2 (two, two_f)); - emit_insn (gen_movdf (t1, two)); - emit_insn (gen_fnms_df (t1, y, x1, t1)); - emit_insn (gen_muldf3 (x2, x1, t1)); - emit_insn (gen_fnms_df (two, y, x2, two)); - emit_insn (gen_muldf3 (dst, x2, two)); - emit_insn (gen_muldf3 (dst, dst, x)); - DONE; -}") ;; sqrt diff --git a/gcc/config/v850/v850.h b/gcc/config/v850/v850.h index c897b121380..65e731fef79 100644 --- a/gcc/config/v850/v850.h +++ b/gcc/config/v850/v850.h @@ -865,7 +865,7 @@ do { \ /* According expr.c, a value of around 6 should minimize code size, and for the V850 series, that's our primary concern. */ -#define MOVE_RATIO 6 +#define MOVE_RATIO(speed) 6 /* Indirect calls are expensive, never turn a direct call into an indirect call. */ |