diff options
Diffstat (limited to 'gcc/config/arm')
-rw-r--r-- | gcc/config/arm/arm-protos.h | 5 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 91 | ||||
-rw-r--r-- | gcc/config/arm/arm.h | 11 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 20 | ||||
-rw-r--r-- | gcc/config/arm/cortex-m4-fpu.md | 111 | ||||
-rw-r--r-- | gcc/config/arm/cortex-m4.md | 111 | ||||
-rw-r--r-- | gcc/config/arm/iterators.md | 4 |
7 files changed, 330 insertions, 23 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 59e1c5024ec..c861bb6a361 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -86,6 +86,8 @@ extern int arm_coproc_mem_operand (rtx, bool); extern int neon_vector_mem_operand (rtx, int); extern int neon_struct_mem_operand (rtx); extern int arm_no_early_store_addr_dep (rtx, rtx); +extern int arm_early_store_addr_dep (rtx, rtx); +extern int arm_early_load_addr_dep (rtx, rtx); extern int arm_no_early_alu_shift_dep (rtx, rtx); extern int arm_no_early_alu_shift_value_dep (rtx, rtx); extern int arm_no_early_mul_dep (rtx, rtx); @@ -127,6 +129,7 @@ extern const char *output_move_quad (rtx *); extern const char *output_move_vfp (rtx *operands); extern const char *output_move_neon (rtx *operands); extern int arm_attr_length_move_neon (rtx); +extern int arm_address_offset_is_imm (rtx); extern const char *output_add_immediate (rtx *); extern const char *arithmetic_instr (rtx, int); extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int); @@ -148,8 +151,6 @@ extern const char *arm_output_memory_barrier (rtx *); extern const char *arm_output_sync_insn (rtx, rtx *); extern unsigned int arm_sync_loop_insns (rtx , rtx *); -extern bool arm_output_addr_const_extra (FILE *, rtx); - #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); extern bool arm_pad_arg_upward (enum machine_mode, const_tree); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 206e06cac52..44cbc8e1353 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -224,6 +224,7 @@ static bool arm_cannot_copy_insn_p (rtx); static bool arm_tls_symbol_p (rtx x); static int arm_issue_rate (void); static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; +static bool arm_output_addr_const_extra (FILE *, rtx); static bool arm_allocate_stack_slots_for_args (void); static const char *arm_invalid_parameter_type (const_tree t); static const char *arm_invalid_return_type (const_tree t); @@ -239,6 +240,7 @@ static rtx arm_pic_static_addr (rtx orig, rtx reg); static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *); static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *); static unsigned int arm_units_per_simd_word (enum machine_mode); +static bool arm_class_likely_spilled_p (reg_class_t); /* Table of machine attributes. */ @@ -308,6 +310,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra + #undef TARGET_ASM_FUNCTION_PROLOGUE #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue @@ -545,6 +550,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE arm_can_eliminate +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -5847,7 +5855,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER - && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER)) + && REGNO (XEXP (x, 0)) + <= LAST_VIRTUAL_POINTER_REGISTER)) && GET_MODE_SIZE (mode) >= 4 && GET_CODE (XEXP (x, 1)) == CONST_INT && (INTVAL (XEXP (x, 1)) & 3) == 0) @@ -13345,6 +13354,34 @@ arm_attr_length_move_neon (rtx insn) return 4; } +/* Return nonzero if the offset in the address is an immediate. Otherwise, + return zero. */ + +int +arm_address_offset_is_imm (rtx insn) +{ + rtx mem, addr; + + extract_insn_cached (insn); + + if (REG_P (recog_data.operand[0])) + return 0; + + mem = recog_data.operand[0]; + + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + if (GET_CODE (addr) == REG + || (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == REG + && GET_CODE (XEXP (addr, 1)) == CONST_INT)) + return 1; + else + return 0; +} + /* Output an ADD r, s, #n where n may be too big for one instruction. If adding zero to one register, output nothing. */ const char * @@ -21503,6 +21540,38 @@ arm_no_early_store_addr_dep (rtx producer, rtx consumer) return !reg_overlap_mentioned_p (value, addr); } +/* Return nonzero if the CONSUMER instruction (a store) does need + PRODUCER's value to calculate the address. */ + +int +arm_early_store_addr_dep (rtx producer, rtx consumer) +{ + return !arm_no_early_store_addr_dep (producer, consumer); +} + +/* Return nonzero if the CONSUMER instruction (a load) does need + PRODUCER's value to calculate the address. */ + +int +arm_early_load_addr_dep (rtx producer, rtx consumer) +{ + rtx value = PATTERN (producer); + rtx addr = PATTERN (consumer); + + if (GET_CODE (value) == COND_EXEC) + value = COND_EXEC_CODE (value); + if (GET_CODE (value) == PARALLEL) + value = XVECEXP (value, 0, 0); + value = XEXP (value, 0); + if (GET_CODE (addr) == COND_EXEC) + addr = COND_EXEC_CODE (addr); + if (GET_CODE (addr) == PARALLEL) + addr = XVECEXP (addr, 0, 0); + addr = XEXP (addr, 1); + + return reg_overlap_mentioned_p (value, addr); +} + /* Return nonzero if the CONSUMER instruction (an ALU op) does not have an early register shift value or amount dependency on the result of PRODUCER. */ @@ -21892,6 +21961,22 @@ arm_units_per_simd_word (enum machine_mode mode ATTRIBUTE_UNUSED) ? (TARGET_NEON_VECTORIZE_QUAD ? 16 : 8) : UNITS_PER_WORD); } +/* Implement TARGET_CLASS_LIKELY_SPILLED_P. + + We need to define this for LO_REGS on thumb. Otherwise we can end up + using r0-r4 for function arguments, r7 for the stack frame and don't + have enough left over to do doubleword arithmetic. */ + +static bool +arm_class_likely_spilled_p (reg_class_t rclass) +{ + if ((TARGET_THUMB && rclass == LO_REGS) + || rclass == CC_REG) + return true; + + return false; +} + /* Implements target hook small_register_classes_for_mode_p. */ bool arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) @@ -22371,7 +22456,9 @@ arm_output_dwarf_dtprel (FILE *file, int size, rtx x) fputs ("(tlsldo)", file); } -bool +/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool arm_output_addr_const_extra (FILE *fp, rtx x) { if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 8727305a6e2..9bd1c69e849 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1284,13 +1284,6 @@ enum reg_class || reg_classes_intersect_p (VFP_REGS, (CLASS)) \ : 0) -/* We need to define this for LO_REGS on thumb. Otherwise we can end up - using r0-r4 for function arguments, r7 for the stack frame and don't - have enough left over to do doubleword arithmetic. */ -#define CLASS_LIKELY_SPILLED_P(CLASS) \ - ((TARGET_THUMB && (CLASS) == LO_REGS) \ - || (CLASS) == CC_REG) - /* The class value for index registers, and the one for base regs. */ #define INDEX_REG_CLASS (TARGET_THUMB1 ? LO_REGS : GENERAL_REGS) #define BASE_REG_CLASS (TARGET_THUMB1 ? LO_REGS : CORE_REGS) @@ -2426,10 +2419,6 @@ extern int making_const_table; & ~ (unsigned HOST_WIDE_INT) 0xffffffff) \ : 0)))) -#define OUTPUT_ADDR_CONST_EXTRA(file, x, fail) \ - if (arm_output_addr_const_extra (file, x) == FALSE) \ - goto fail - /* A C expression whose value is RTL representing the value of the return address for the frame COUNT steps up from the current frame. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 61b95e3ee00..c54bb2a1f37 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -497,16 +497,16 @@ ;; True if the generic scheduling description should be used. (define_attr "generic_sched" "yes,no" - (const (if_then_else - (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9") - (eq_attr "tune_cortexr4" "yes")) + (const (if_then_else + (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4") + (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) (define_attr "generic_vfp" "yes,no" (const (if_then_else (and (eq_attr "fpu" "vfp") - (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9") + (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4") (eq_attr "tune_cortexr4" "no")) (const_string "yes") (const_string "no")))) @@ -521,6 +521,8 @@ (include "cortex-a9.md") (include "cortex-r4.md") (include "cortex-r4f.md") +(include "cortex-m4.md") +(include "cortex-m4-fpu.md") (include "vfp11.md") @@ -4040,7 +4042,8 @@ (define_insn "zero_extend<mode>di2" [(set (match_operand:DI 0 "s_register_operand" "=r") - (zero_extend:DI (match_operand:QHSI 1 "nonimmediate_operand" "rm")))] + (zero_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>" + "<qhs_extenddi_cstr>")))] "TARGET_32BIT <qhs_zextenddi_cond>" "#" [(set_attr "length" "8") @@ -4050,7 +4053,8 @@ (define_insn "extend<mode>di2" [(set (match_operand:DI 0 "s_register_operand" "=r") - (sign_extend:DI (match_operand:QHSI 1 "nonimmediate_operand" "rm")))] + (sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>" + "<qhs_extenddi_cstr>")))] "TARGET_32BIT <qhs_sextenddi_cond>" "#" [(set_attr "length" "8") @@ -5117,7 +5121,7 @@ [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0") (match_operand:SI 2 "general_operand" "i")))] - "TARGET_32BIT" + "arm_arch_thumb2" "movt%?\t%0, #:upper16:%c2" [(set_attr "predicable" "yes") (set_attr "length" "4")] @@ -10577,7 +10581,7 @@ (const_int 16) (const_int 16)) (match_operand:SI 1 "const_int_operand" ""))] - "TARGET_32BIT" + "arm_arch_thumb2" "movt%?\t%0, %c1" [(set_attr "predicable" "yes") (set_attr "length" "4")] diff --git a/gcc/config/arm/cortex-m4-fpu.md b/gcc/config/arm/cortex-m4-fpu.md new file mode 100644 index 00000000000..7de115c5209 --- /dev/null +++ b/gcc/config/arm/cortex-m4-fpu.md @@ -0,0 +1,111 @@ +;; ARM Cortex-M4 FPU pipeline description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Use an artifial unit to model FPU. +(define_cpu_unit "cortex_m4_v" "cortex_m4") + +(define_reservation "cortex_m4_ex_v" "cortex_m4_ex+cortex_m4_v") + +;; Integer instructions following VDIV or VSQRT complete out-of-order. +(define_insn_reservation "cortex_m4_fdivs" 15 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fdivs")) + "cortex_m4_ex_v,cortex_m4_v*13") + +(define_insn_reservation "cortex_m4_vmov_1" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fcpys,fconsts")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_vmov_2" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_2_r,r_2_f")) + "cortex_m4_ex_v*2") + +(define_insn_reservation "cortex_m4_fmuls" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fmuls")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_fmacs" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fmacs")) + "cortex_m4_ex_v*3") + +(define_insn_reservation "cortex_m4_ffariths" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "ffariths")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_fadds" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fadds")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_fcmps" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fcmps")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_flag" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_flag")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_cvt" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_cvt")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_load" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_load")) + "cortex_m4_ex_v*2") + +(define_insn_reservation "cortex_m4_f_store" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_store")) + "cortex_m4_ex_v*2") + +(define_insn_reservation "cortex_m4_f_loadd" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_loadd")) + "cortex_m4_ex_v*3") + +(define_insn_reservation "cortex_m4_f_stored" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_stored")) + "cortex_m4_ex_v*3") + +;; MAC instructions consume their addend one cycle later. If the result +;; of an arithmetic instruction is consumed as the addend of the following +;; MAC instruction, the latency can be decreased by one. + +(define_bypass 1 "cortex_m4_fadds,cortex_m4_fmuls,cortex_m4_f_cvt" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") + +(define_bypass 3 "cortex_m4_fmacs" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") + +(define_bypass 14 "cortex_m4_fdivs" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md new file mode 100644 index 00000000000..b71037585d0 --- /dev/null +++ b/gcc/config/arm/cortex-m4.md @@ -0,0 +1,111 @@ +;; ARM Cortex-M4 pipeline description +;; Copyright (C) 2010 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "cortex_m4") + +;; We model the pipelining of LDR instructions by using two artificial units. + +(define_cpu_unit "cortex_m4_a" "cortex_m4") + +(define_cpu_unit "cortex_m4_b" "cortex_m4") + +(define_reservation "cortex_m4_ex" "cortex_m4_a+cortex_m4_b") + +;; ALU and multiply is one cycle. +(define_insn_reservation "cortex_m4_alu" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "alu,alu_shift,alu_shift_reg,mult")) + "cortex_m4_ex") + +;; Byte, half-word and word load is two cycles. +(define_insn_reservation "cortex_m4_load1" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load_byte,load1")) + "cortex_m4_a, cortex_m4_b") + +;; str rx, [ry, #imm] is always one cycle. +(define_insn_reservation "cortex_m4_store1_1" 1 + (and (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store1")) + (ne (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0))) + "cortex_m4_a") + +;; Other byte, half-word and word load is two cycles. +(define_insn_reservation "cortex_m4_store1_2" 2 + (and (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store1")) + (eq (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0))) + "cortex_m4_a*2") + +(define_insn_reservation "cortex_m4_load2" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load2")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_store2" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store2")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_load3" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load3")) + "cortex_m4_ex*4") + +(define_insn_reservation "cortex_m4_store3" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store3")) + "cortex_m4_ex*4") + +(define_insn_reservation "cortex_m4_load4" 5 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load4")) + "cortex_m4_ex*5") + +(define_insn_reservation "cortex_m4_store4" 5 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store4")) + "cortex_m4_ex*5") + +;; If the address of load or store depends on the result of the preceding +;; instruction, the latency is increased by one. + +(define_bypass 2 "cortex_m4_alu" + "cortex_m4_load1" + "arm_early_load_addr_dep") + +(define_bypass 2 "cortex_m4_alu" + "cortex_m4_store1_1,cortex_m4_store1_2" + "arm_early_store_addr_dep") + +(define_insn_reservation "cortex_m4_branch" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "branch")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_call" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "call")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_block" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "block")) + "cortex_m4_ex") diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 8e9f1001aba..887c962baeb 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -381,6 +381,10 @@ (define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")]) (define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "&& arm_arch6")]) +(define_mode_attr qhs_extenddi_op [(SI "s_register_operand") + (HI "nonimmediate_operand") + (QI "nonimmediate_operand")]) +(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rm")]) ;;---------------------------------------------------------------------------- ;; Code attributes |