diff options
Diffstat (limited to 'gcc/config/arm/arm.c')
-rw-r--r-- | gcc/config/arm/arm.c | 5461 |
1 files changed, 1940 insertions, 3521 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 0240cc70e81..fddd7708972 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1,5 +1,5 @@ /* Output routines for GCC for ARM. - Copyright (C) 1991-2014 Free Software Foundation, Inc. + Copyright (C) 1991-2015 Free Software Foundation, Inc. Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) and Martin Simmons (@harleqn.co.uk). More major hacks by Richard Earnshaw (rearnsha@arm.com). @@ -26,7 +26,17 @@ #include "hash-table.h" #include "tm.h" #include "rtl.h" +#include "hash-set.h" +#include "machmode.h" +#include "vec.h" +#include "double-int.h" +#include "input.h" +#include "alias.h" +#include "symtab.h" +#include "wide-int.h" +#include "inchash.h" #include "tree.h" +#include "fold-const.h" #include "stringpool.h" #include "stor-layout.h" #include "calls.h" @@ -41,15 +51,39 @@ #include "flags.h" #include "reload.h" #include "function.h" +#include "hashtab.h" +#include "statistics.h" +#include "real.h" +#include "fixed-value.h" +#include "expmed.h" +#include "dojump.h" +#include "explow.h" +#include "emit-rtl.h" +#include "stmt.h" #include "expr.h" +#include "insn-codes.h" #include "optabs.h" #include "diagnostic-core.h" #include "recog.h" +#include "predict.h" +#include "dominance.h" +#include "cfg.h" +#include "cfgrtl.h" +#include "cfganal.h" +#include "lcm.h" +#include "cfgbuild.h" +#include "cfgcleanup.h" +#include "basic-block.h" +#include "hash-map.h" +#include "is-a.h" +#include "plugin-api.h" +#include "ipa-ref.h" #include "cgraph.h" #include "ggc.h" #include "except.h" #include "tm_p.h" #include "target.h" +#include "sched-int.h" #include "target-def.h" #include "debug.h" #include "langhooks.h" @@ -59,6 +93,11 @@ #include "params.h" #include "opts.h" #include "dumpfile.h" +#include "gimple-expr.h" +#include "builtins.h" +#include "tm-constrs.h" +#include "rtl-iter.h" +#include "sched-int.h" /* Forward definitions of types. */ typedef struct minipool_node Mnode; @@ -72,23 +111,23 @@ struct four_ints }; /* Forward function declarations. */ +static bool arm_const_not_ok_for_debug_p (rtx); static bool arm_lra_p (void); -static bool arm_needs_doubleword_align (enum machine_mode, const_tree); +static bool arm_needs_doubleword_align (machine_mode, const_tree); static int arm_compute_static_chain_stack_bytes (void); static arm_stack_offsets *arm_get_frame_offsets (void); static void arm_add_gc_roots (void); -static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx, +static int arm_gen_constant (enum rtx_code, machine_mode, rtx, HOST_WIDE_INT, rtx, rtx, int, int); static unsigned bit_count (unsigned long); static int arm_address_register_rtx_p (rtx, int); -static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int); -static int thumb2_legitimate_index_p (enum machine_mode, rtx, int); -static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int); -static rtx arm_legitimize_address (rtx, rtx, enum machine_mode); +static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int); +static int thumb2_legitimate_index_p (machine_mode, rtx, int); +static int thumb1_base_register_rtx_p (rtx, machine_mode, int); +static rtx arm_legitimize_address (rtx, rtx, machine_mode); static reg_class_t arm_preferred_reload_class (rtx, reg_class_t); -static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode); +static rtx thumb_legitimize_address (rtx, rtx, machine_mode); inline static int thumb1_index_register_rtx_p (rtx, int); -static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); static int thumb_far_jump_used_p (void); static bool thumb_force_lr_save (void); static unsigned arm_size_return_regs (void); @@ -104,21 +143,21 @@ static const char *output_multi_immediate (rtx *, const char *, const char *, static const char *shift_op (rtx, HOST_WIDE_INT *); static struct machine_function *arm_init_machine_status (void); static void thumb_exit (FILE *, int); -static HOST_WIDE_INT get_jump_table_size (rtx); +static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *); static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT); static Mnode *add_minipool_forward_ref (Mfix *); static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT); static Mnode *add_minipool_backward_ref (Mfix *); static void assign_minipool_offsets (Mfix *); static void arm_print_value (FILE *, rtx); -static void dump_minipool (rtx); +static void dump_minipool (rtx_insn *); static int arm_barrier_cost (rtx); static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT); -static void push_minipool_barrier (rtx, HOST_WIDE_INT); -static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode, - rtx); +static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT); +static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *, + machine_mode, rtx); static void arm_reorg (void); -static void note_invalid_constants (rtx, HOST_WIDE_INT, int); +static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int); static unsigned long arm_compute_save_reg0_reg12_mask (void); static unsigned long arm_compute_save_reg_mask (void); static unsigned long arm_isr_value (tree); @@ -133,8 +172,8 @@ static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT); static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); static int arm_comp_type_attributes (const_tree, const_tree); static void arm_set_default_type_attributes (tree); -static int arm_adjust_cost (rtx, rtx, rtx, int); -static int arm_sched_reorder (FILE *, int, rtx *, int *, int); +static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int); +static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int); static int optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, struct four_ints *return_sequence); @@ -144,20 +183,20 @@ static int optimal_immediate_sequence_1 (enum rtx_code code, int i); static int arm_get_strip_length (int); static bool arm_function_ok_for_sibcall (tree, tree); -static enum machine_mode arm_promote_function_mode (const_tree, - enum machine_mode, int *, +static machine_mode arm_promote_function_mode (const_tree, + machine_mode, int *, const_tree, int); static bool arm_return_in_memory (const_tree, const_tree); static rtx arm_function_value (const_tree, const_tree, bool); -static rtx arm_libcall_value_1 (enum machine_mode); -static rtx arm_libcall_value (enum machine_mode, const_rtx); +static rtx arm_libcall_value_1 (machine_mode); +static rtx arm_libcall_value (machine_mode, const_rtx); static bool arm_function_value_regno_p (const unsigned int); static void arm_internal_label (FILE *, const char *, unsigned long); static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); static bool arm_have_conditional_execution (void); -static bool arm_cannot_force_const_mem (enum machine_mode, rtx); -static bool arm_legitimate_constant_p (enum machine_mode, rtx); +static bool arm_cannot_force_const_mem (machine_mode, rtx); +static bool arm_legitimate_constant_p (machine_mode, rtx); static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); @@ -165,29 +204,22 @@ static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, boo static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); static bool arm_rtx_costs (rtx, int, int, int, int *, bool); -static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool); -static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t); -static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool); -static void arm_init_builtins (void); -static void arm_init_iwmmxt_builtins (void); -static rtx safe_vector_operand (rtx, enum machine_mode); -static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx); -static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int); -static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); -static tree arm_builtin_decl (unsigned, bool); +static int arm_address_cost (rtx, machine_mode, addr_space_t, bool); +static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t); +static int arm_memory_move_cost (machine_mode, reg_class_t, bool); static void emit_constant_insn (rtx cond, rtx pattern); -static rtx emit_set_insn (rtx, rtx); +static rtx_insn *emit_set_insn (rtx, rtx); static rtx emit_multi_reg_push (unsigned long, unsigned long); -static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode, +static int arm_arg_partial_bytes (cumulative_args_t, machine_mode, tree, bool); -static rtx arm_function_arg (cumulative_args_t, enum machine_mode, +static rtx arm_function_arg (cumulative_args_t, machine_mode, const_tree, bool); -static void arm_function_arg_advance (cumulative_args_t, enum machine_mode, +static void arm_function_arg_advance (cumulative_args_t, machine_mode, const_tree, bool); -static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree); -static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, +static unsigned int arm_function_arg_boundary (machine_mode, const_tree); +static rtx aapcs_allocate_return_reg (machine_mode, const_tree, const_tree); -static rtx aapcs_libcall_value (enum machine_mode); +static rtx aapcs_libcall_value (machine_mode); static int aapcs_select_return_coproc (const_tree, const_tree); #ifdef OBJECT_FORMAT_ELF @@ -201,18 +233,18 @@ static void arm_encode_section_info (tree, rtx, int); static void arm_file_end (void); static void arm_file_start (void); -static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode, +static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode, tree, int *, int); static bool arm_pass_by_reference (cumulative_args_t, - enum machine_mode, const_tree, bool); + machine_mode, const_tree, bool); static bool arm_promote_prototypes (const_tree); static bool arm_default_short_enums (void); static bool arm_align_anon_bitfield (void); static bool arm_return_in_msb (const_tree); -static bool arm_must_pass_in_stack (enum machine_mode, const_tree); +static bool arm_must_pass_in_stack (machine_mode, const_tree); static bool arm_return_in_memory (const_tree, const_tree); #if ARM_UNWIND_INFO -static void arm_unwind_emit (FILE *, rtx); +static void arm_unwind_emit (FILE *, rtx_insn *); static bool arm_output_ttype (rtx); static void arm_asm_emit_except_personality (rtx); static void arm_asm_init_sections (void); @@ -233,9 +265,12 @@ static tree arm_build_builtin_va_list (void); static void arm_expand_builtin_va_start (tree, rtx); static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); static void arm_option_override (void); -static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode); -static bool arm_cannot_copy_insn_p (rtx); +static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode); +static bool arm_macro_fusion_p (void); +static bool arm_cannot_copy_insn_p (rtx_insn *); static int arm_issue_rate (void); +static int arm_first_cycle_multipass_dfa_lookahead (void); +static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int); static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; static bool arm_output_addr_const_extra (FILE *, rtx); static bool arm_allocate_stack_slots_for_args (void); @@ -244,23 +279,23 @@ static const char *arm_invalid_parameter_type (const_tree t); static const char *arm_invalid_return_type (const_tree t); static tree arm_promoted_type (const_tree t); static tree arm_convert_to_type (tree type, tree expr); -static bool arm_scalar_mode_supported_p (enum machine_mode); +static bool arm_scalar_mode_supported_p (machine_mode); static bool arm_frame_pointer_required (void); static bool arm_can_eliminate (const int, const int); static void arm_asm_trampoline_template (FILE *); static void arm_trampoline_init (rtx, tree, rtx); static rtx arm_trampoline_adjust_address (rtx); static rtx arm_pic_static_addr (rtx orig, rtx reg); -static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *); -static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *); -static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *); -static bool arm_array_mode_supported_p (enum machine_mode, +static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *); +static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *); +static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *); +static bool arm_array_mode_supported_p (machine_mode, unsigned HOST_WIDE_INT); -static enum machine_mode arm_preferred_simd_mode (enum machine_mode); +static machine_mode arm_preferred_simd_mode (machine_mode); static bool arm_class_likely_spilled_p (reg_class_t); static HOST_WIDE_INT arm_vector_alignment (const_tree type); static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); -static bool arm_builtin_support_vector_misalignment (enum machine_mode mode, +static bool arm_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, bool is_packed); @@ -270,10 +305,13 @@ static unsigned int arm_autovectorize_vector_sizes (void); static int arm_default_branch_cost (bool, bool); static int arm_cortex_a5_branch_cost (bool, bool); static int arm_cortex_m_branch_cost (bool, bool); +static int arm_cortex_m7_branch_cost (bool, bool); -static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, +static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode, const unsigned char *sel); +static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*); + static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, tree vectype, int misalign ATTRIBUTE_UNUSED); @@ -286,6 +324,8 @@ static unsigned arm_add_stmt_cost (void *data, int count, static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value); static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); + +static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -379,6 +419,12 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_COMP_TYPE_ATTRIBUTES #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes +#undef TARGET_SCHED_MACRO_FUSION_P +#define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p + +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P +#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p + #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes @@ -581,9 +627,20 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE arm_issue_rate +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + arm_first_cycle_multipass_dfa_lookahead + +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \ + arm_first_cycle_multipass_dfa_lookahead_guard + #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE arm_mangle_type +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv + #undef TARGET_BUILD_BUILTIN_VA_LIST #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list #undef TARGET_EXPAND_BUILTIN_VA_START @@ -674,6 +731,15 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost +#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P +#define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p + +#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS +#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + +#undef TARGET_SCHED_FUSION_PRIORITY +#define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -705,77 +771,13 @@ const struct arm_fpu_desc *arm_fpu_desc; rtx thumb_call_via_label[14]; static int thumb_call_reg_needed; -/* Bit values used to identify processor capabilities. */ -#define FL_CO_PROC (1 << 0) /* Has external co-processor bus */ -#define FL_ARCH3M (1 << 1) /* Extended multiply */ -#define FL_MODE26 (1 << 2) /* 26-bit mode support */ -#define FL_MODE32 (1 << 3) /* 32-bit mode support */ -#define FL_ARCH4 (1 << 4) /* Architecture rel 4 */ -#define FL_ARCH5 (1 << 5) /* Architecture rel 5 */ -#define FL_THUMB (1 << 6) /* Thumb aware */ -#define FL_LDSCHED (1 << 7) /* Load scheduling necessary */ -#define FL_STRONG (1 << 8) /* StrongARM */ -#define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */ -#define FL_XSCALE (1 << 10) /* XScale */ -/* spare (1 << 11) */ -#define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds - media instructions. */ -#define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */ -#define FL_WBUF (1 << 14) /* Schedule for write buffer ops. - Note: ARM6 & 7 derivatives only. */ -#define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */ -#define FL_THUMB2 (1 << 16) /* Thumb-2. */ -#define FL_NOTM (1 << 17) /* Instructions not present in the 'M' - profile. */ -#define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */ -#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ -#define FL_NEON (1 << 20) /* Neon instructions. */ -#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M - architecture. */ -#define FL_ARCH7 (1 << 22) /* Architecture 7. */ -#define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */ -#define FL_ARCH8 (1 << 24) /* Architecture 8. */ -#define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */ - -#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ -#define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */ - -/* Flags that only effect tuning, not available instructions. */ -#define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ - | FL_CO_PROC) - -#define FL_FOR_ARCH2 FL_NOTM -#define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) -#define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) -#define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) -#define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB) -#define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5) -#define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB) -#define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E) -#define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB) -#define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE -#define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6) -#define FL_FOR_ARCH6J FL_FOR_ARCH6 -#define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) -#define FL_FOR_ARCH6Z FL_FOR_ARCH6 -#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K -#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) -#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) -#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) -#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) -#define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV) -#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV) -#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV) -#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) -#define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8) - /* The bits in this mask specify which instructions we are allowed to generate. */ -static unsigned long insn_flags = 0; +unsigned long insn_flags = 0; /* The bits in this mask specify which instruction scheduling options should be used. */ -static unsigned long tune_flags = 0; +unsigned long tune_flags = 0; /* The highest ARM architecture version supported by the target. */ @@ -875,15 +877,11 @@ bool arm_disable_literal_pool = false; /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we must report the mode of the memory reference from TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ -enum machine_mode output_memory_reference_mode; +machine_mode output_memory_reference_mode; /* The register number to be used for the PIC offset register. */ unsigned arm_pic_register = INVALID_REGNUM; -/* Set to 1 after arm_reorg has started. Reset to start at the start of - the next function. */ -static int after_arm_reorg = 0; - enum arm_pcs arm_pcs_default; /* For an explanation of these variables, see final_prescan_insn below. */ @@ -904,6 +902,9 @@ int arm_condexec_masklen = 0; /* Nonzero if chip supports the ARMv8 CRC instructions. */ int arm_arch_crc = 0; +/* Nonzero if the core has a very small, high-latency, multiply unit. */ +int arm_m_profile_small_mul = 0; + /* The condition codes of the ARM, and the inverse function. */ static const char * const arm_condition_codes[] = { @@ -982,6 +983,7 @@ const struct cpu_cost_table cortexa9_extra_costs = COSTS_N_INSNS (1), /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ 0, /* clz. */ + 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1065,6 +1067,209 @@ const struct cpu_cost_table cortexa9_extra_costs = } }; +const struct cpu_cost_table cortexa8_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + COSTS_N_INSNS (1), /* shift. */ + 0, /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + 0, /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + 0, /* log_shift_reg. */ + 0, /* extend. */ + 0, /* extend_arith. */ + 0, /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ + 0, /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (1), /* simple. */ + COSTS_N_INSNS (1), /* flag_setting. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (2), /* extend. */ + 0, /* add (N/A). */ + COSTS_N_INSNS (2), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (1), /* load. */ + COSTS_N_INSNS (1), /* load_sign_extend. */ + COSTS_N_INSNS (1), /* ldrd. */ + COSTS_N_INSNS (1), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* loadf. */ + COSTS_N_INSNS (1), /* loadd. */ + COSTS_N_INSNS (1), /* load_unaligned. */ + COSTS_N_INSNS (1), /* store. */ + COSTS_N_INSNS (1), /* strd. */ + COSTS_N_INSNS (1), /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* storef. */ + COSTS_N_INSNS (1), /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (36), /* div. */ + COSTS_N_INSNS (11), /* mult. */ + COSTS_N_INSNS (20), /* mult_addsub. */ + COSTS_N_INSNS (30), /* fma. */ + COSTS_N_INSNS (9), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (6), /* compare. */ + COSTS_N_INSNS (4), /* widen. */ + COSTS_N_INSNS (4), /* narrow. */ + COSTS_N_INSNS (8), /* toint. */ + COSTS_N_INSNS (8), /* fromint. */ + COSTS_N_INSNS (8) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (64), /* div. */ + COSTS_N_INSNS (16), /* mult. */ + COSTS_N_INSNS (25), /* mult_addsub. */ + COSTS_N_INSNS (30), /* fma. */ + COSTS_N_INSNS (9), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (6), /* compare. */ + COSTS_N_INSNS (6), /* widen. */ + COSTS_N_INSNS (6), /* narrow. */ + COSTS_N_INSNS (8), /* toint. */ + COSTS_N_INSNS (8), /* fromint. */ + COSTS_N_INSNS (8) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + +const struct cpu_cost_table cortexa5_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + COSTS_N_INSNS (1), /* shift. */ + COSTS_N_INSNS (1), /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + COSTS_N_INSNS (1), /* clz. */ + COSTS_N_INSNS (1), /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + + { + /* MULT SImode */ + { + 0, /* simple. */ + COSTS_N_INSNS (1), /* flag_setting. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (7) /* idiv. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (1), /* extend. */ + 0, /* add. */ + COSTS_N_INSNS (2), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (1), /* load. */ + COSTS_N_INSNS (1), /* load_sign_extend. */ + COSTS_N_INSNS (6), /* ldrd. */ + COSTS_N_INSNS (1), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* loadf. */ + COSTS_N_INSNS (4), /* loadd. */ + COSTS_N_INSNS (1), /* load_unaligned. */ + COSTS_N_INSNS (1), /* store. */ + COSTS_N_INSNS (3), /* strd. */ + COSTS_N_INSNS (1), /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (2), /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (15), /* div. */ + COSTS_N_INSNS (3), /* mult. */ + COSTS_N_INSNS (7), /* mult_addsub. */ + COSTS_N_INSNS (7), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (3), /* compare. */ + COSTS_N_INSNS (3), /* widen. */ + COSTS_N_INSNS (3), /* narrow. */ + COSTS_N_INSNS (3), /* toint. */ + COSTS_N_INSNS (3), /* fromint. */ + COSTS_N_INSNS (3) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (30), /* div. */ + COSTS_N_INSNS (6), /* mult. */ + COSTS_N_INSNS (10), /* mult_addsub. */ + COSTS_N_INSNS (7), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (3), /* compare. */ + COSTS_N_INSNS (3), /* widen. */ + COSTS_N_INSNS (3), /* narrow. */ + COSTS_N_INSNS (3), /* toint. */ + COSTS_N_INSNS (3), /* fromint. */ + COSTS_N_INSNS (3) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + const struct cpu_cost_table cortexa7_extra_costs = { @@ -1083,6 +1288,7 @@ const struct cpu_cost_table cortexa7_extra_costs = COSTS_N_INSNS (1), /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ + COSTS_N_INSNS (1), /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1184,6 +1390,7 @@ const struct cpu_cost_table cortexa12_extra_costs = 0, /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ + COSTS_N_INSNS (1), /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1284,6 +1491,7 @@ const struct cpu_cost_table cortexa15_extra_costs = COSTS_N_INSNS (1), /* bfi. */ 0, /* bfx. */ 0, /* clz. */ + 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1384,6 +1592,7 @@ const struct cpu_cost_table v7m_extra_costs = 0, /* bfi. */ 0, /* bfx. */ 0, /* clz. */ + 0, /* rev. */ COSTS_N_INSNS (1), /* non_exec. */ false /* non_exec_costs_exec. */ }, @@ -1467,6 +1676,9 @@ const struct cpu_cost_table v7m_extra_costs = } }; +#define ARM_FUSE_NOTHING (0) +#define ARM_FUSE_MOVW_MOVT (1 << 0) + const struct tune_params arm_slowmul_tune = { arm_slowmul_rtx_costs, @@ -1480,7 +1692,12 @@ const struct tune_params arm_slowmul_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_fastmul_tune = @@ -1496,7 +1713,12 @@ const struct tune_params arm_fastmul_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -1515,7 +1737,12 @@ const struct tune_params arm_strongarm_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_xscale_tune = @@ -1531,7 +1758,12 @@ const struct tune_params arm_xscale_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_9e_tune = @@ -1547,7 +1779,12 @@ const struct tune_params arm_9e_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_v6t2_tune = @@ -1563,7 +1800,12 @@ const struct tune_params arm_v6t2_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -1580,7 +1822,33 @@ const struct tune_params arm_cortex_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ +}; + +const struct tune_params arm_cortex_a8_tune = +{ + arm_9e_rtx_costs, + &cortexa8_extra_costs, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + true, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_cortex_a7_tune = @@ -1596,7 +1864,12 @@ const struct tune_params arm_cortex_a7_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + true, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_cortex_a15_tune = @@ -1612,7 +1885,12 @@ const struct tune_params arm_cortex_a15_tune = true, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true, /* Prefer 32-bit encodings. */ + true, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + max_insn_queue_index + 1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_cortex_a53_tune = @@ -1628,7 +1906,12 @@ const struct tune_params arm_cortex_a53_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_cortex_a57_tune = @@ -1644,7 +1927,33 @@ const struct tune_params arm_cortex_a57_tune = true, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ + max_insn_queue_index + 1 /* Sched L2 autopref depth. */ +}; + +const struct tune_params arm_xgene1_tune = +{ + arm_9e_rtx_costs, + &xgene1_extra_costs, + NULL, /* Scheduler cost adjustment. */ + 1, /* Constant limit. */ + 2, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + true, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 32, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -1653,7 +1962,7 @@ const struct tune_params arm_cortex_a57_tune = const struct tune_params arm_cortex_a5_tune = { arm_9e_rtx_costs, - NULL, + &cortexa5_extra_costs, NULL, /* Sched adj cost. */ 1, /* Constant limit. */ 1, /* Max cond insns. */ @@ -1663,7 +1972,12 @@ const struct tune_params arm_cortex_a5_tune = false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + true, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_cortex_a9_tune = @@ -1679,23 +1993,33 @@ const struct tune_params arm_cortex_a9_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_cortex_a12_tune = { arm_9e_rtx_costs, &cortexa12_extra_costs, - NULL, + NULL, /* Sched adj cost. */ 1, /* Constant limit. */ - 5, /* Max cond insns. */ - ARM_PREFETCH_BENEFICIAL(4,32,32), + 2, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, false, /* Prefer constant pool. */ arm_default_branch_cost, true, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true, /* Prefer 32-bit encodings. */ + true, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single @@ -1718,7 +2042,35 @@ const struct tune_params arm_v7m_tune = false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ +}; + +/* Cortex-M7 tuning. */ + +const struct tune_params arm_cortex_m7_tune = +{ + arm_9e_rtx_costs, + &v7m_extra_costs, + NULL, /* Sched adj cost. */ + 0, /* Constant limit. */ + 1, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_cortex_m7_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than @@ -1736,7 +2088,12 @@ const struct tune_params arm_v6m_tune = false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; const struct tune_params arm_fa726te_tune = @@ -1752,7 +2109,12 @@ const struct tune_params arm_fa726te_tune = false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false, /* Prefer 32-bit encodings. */ + false, /* Prefer Neon for stringops. */ + 8, /* Maximum insns to inline memset. */ + ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ + -1 /* Sched L2 autopref depth. */ }; @@ -1824,7 +2186,7 @@ arm_constant_limit (bool size_p) /* Emit an insn that's a simple single-set. Both the operands must be known to be valid. */ -inline static rtx +inline static rtx_insn * emit_set_insn (rtx x, rtx y) { return emit_insn (gen_rtx_SET (VOIDmode, x, y)); @@ -1847,14 +2209,14 @@ bit_count (unsigned long value) typedef struct { - enum machine_mode mode; + machine_mode mode; const char *name; } arm_fixed_mode_set; /* A small helper for setting fixed-point library libfuncs. */ static void -arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode, +arm_set_fixed_optab_libfunc (optab optable, machine_mode mode, const char *funcname, const char *modename, int num_suffix) { @@ -1869,8 +2231,8 @@ arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode, } static void -arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to, - enum machine_mode from, const char *funcname, +arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to, + machine_mode from, const char *funcname, const char *toname, const char *fromname) { char buffer[50]; @@ -1895,7 +2257,7 @@ arm_init_libfuncs (void) { /* For Linux, we have access to kernel support for atomic operations. */ if (arm_abi == ARM_ABI_AAPCS_LINUX) - init_sync_libfuncs (2 * UNITS_PER_WORD); + init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE); /* There are no special library functions unless we are using the ARM BPABI. */ @@ -2468,10 +2830,6 @@ arm_option_override (void) if (TARGET_APCS_FLOAT) warning (0, "passing floating point arguments in fp regs not yet supported"); - if (TARGET_LITTLE_WORDS) - warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and " - "will be removed in a future release"); - /* Initialize boolean versions of the flags, for use in the arm.md file. */ arm_arch3m = (insn_flags & FL_ARCH3M) != 0; arm_arch4 = (insn_flags & FL_ARCH4) != 0; @@ -2500,6 +2858,7 @@ arm_option_override (void) arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; arm_arch_crc = (insn_flags & FL_CRC32) != 0; + arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0; if (arm_restrict_it == 2) arm_restrict_it = arm_arch8 && TARGET_THUMB2; @@ -2742,6 +3101,10 @@ arm_option_override (void) /* If optimizing for size, bump the number of instructions that we are prepared to conditionally execute (even on a StrongARM). */ max_insns_skipped = 6; + + /* For THUMB2, we limit the conditional sequence to one IT block. */ + if (TARGET_THUMB2) + max_insns_skipped = MAX_INSN_PER_IT_BLOCK; } else max_insns_skipped = current_tune->max_insns_skipped; @@ -2803,7 +3166,14 @@ arm_option_override (void) prefer_neon_for_64bits = true; /* Use the alternative scheduling-pressure algorithm by default. */ - maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, + maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL, + global_options.x_param_values, + global_options_set.x_param_values); + + /* Look through ready list and all of queue for instructions + relevant for L2 auto-prefetcher. */ + maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH, + current_tune->sched_autopref_queue_depth, global_options.x_param_values, global_options_set.x_param_values); @@ -2825,6 +3195,28 @@ arm_option_override (void) if (target_slow_flash_data) arm_disable_literal_pool = true; + /* Thumb2 inline assembly code should always use unified syntax. + This will apply to ARM and Thumb1 eventually. */ + if (TARGET_THUMB2) + inline_asm_unified = 1; + + /* Disable scheduling fusion by default if it's not armv7 processor + or doesn't prefer ldrd/strd. */ + if (flag_schedule_fusion == 2 + && (!arm_arch7 || !current_tune->prefer_ldrd_strd)) + flag_schedule_fusion = 0; + + /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn + - epilogue_insns - does not accurately model the corresponding insns + emitted in the asm file. In particular, see the comment in thumb_exit + 'Find out how many of the (return) argument registers we can corrupt'. + As a consequence, the epilogue may clobber registers without fipa-ra + finding out about it. Therefore, disable fipa-ra in Thumb1 mode. + TODO: Accurately model clobbers for epilogue_insns and reenable + fipa-ra. */ + if (TARGET_THUMB1) + flag_ipa_ra = 0; + /* Register global variables with the garbage collector. */ arm_add_gc_roots (); } @@ -3349,7 +3741,7 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code) /* ??? Tweak this for thumb2. */ int -arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn, +arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn, HOST_WIDE_INT val, rtx target, rtx source, int subtargets) { rtx cond; @@ -3372,7 +3764,7 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn, Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c */ - if (!after_arm_reorg + if (!cfun->machine->after_arm_reorg && !cond && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, 1, 0) @@ -3649,7 +4041,7 @@ emit_constant_insn (rtx cond, rtx pattern) RTL generation. */ static int -arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, +arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond, HOST_WIDE_INT val, rtx target, rtx source, int subtargets, int generate) { @@ -4311,7 +4703,7 @@ static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value) { - enum machine_mode mode; + machine_mode mode; unsigned HOST_WIDE_INT i, maxval; mode = GET_MODE (*op0); @@ -4326,7 +4718,6 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, for GTU/LEU in Thumb mode. */ if (mode == DImode) { - rtx tem; if (*code == GT || *code == LE || (!TARGET_ARM && (*code == GTU || *code == LEU))) @@ -4366,9 +4757,7 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, /* If that did not work, reverse the condition. */ if (!op0_preserve_value) { - tem = *op0; - *op0 = *op1; - *op1 = tem; + std::swap (*op0, *op1); *code = (int)swap_condition ((enum rtx_code)*code); } } @@ -4459,7 +4848,7 @@ static rtx arm_function_value(const_tree type, const_tree func, bool outgoing ATTRIBUTE_UNUSED) { - enum machine_mode mode; + machine_mode mode; int unsignedp ATTRIBUTE_UNUSED; rtx r ATTRIBUTE_UNUSED; @@ -4510,25 +4899,25 @@ libcall_hasher::hash (const value_type *p1) return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE); } -typedef hash_table <libcall_hasher> libcall_table_type; +typedef hash_table<libcall_hasher> libcall_table_type; static void -add_libcall (libcall_table_type htab, rtx libcall) +add_libcall (libcall_table_type *htab, rtx libcall) { - *htab.find_slot (libcall, INSERT) = libcall; + *htab->find_slot (libcall, INSERT) = libcall; } static bool arm_libcall_uses_aapcs_base (const_rtx libcall) { static bool init_done = false; - static libcall_table_type libcall_htab; + static libcall_table_type *libcall_htab = NULL; if (!init_done) { init_done = true; - libcall_htab.create (31); + libcall_htab = new libcall_table_type (31); add_libcall (libcall_htab, convert_optab_libfunc (sfloat_optab, SFmode, SImode)); add_libcall (libcall_htab, @@ -4587,11 +4976,11 @@ arm_libcall_uses_aapcs_base (const_rtx libcall) DFmode)); } - return libcall && libcall_htab.find (libcall) != NULL; + return libcall && libcall_htab->find (libcall) != NULL; } static rtx -arm_libcall_value_1 (enum machine_mode mode) +arm_libcall_value_1 (machine_mode mode) { if (TARGET_AAPCS_BASED) return aapcs_libcall_value (mode); @@ -4606,7 +4995,7 @@ arm_libcall_value_1 (enum machine_mode mode) assuming the value has mode MODE. */ static rtx -arm_libcall_value (enum machine_mode mode, const_rtx libcall) +arm_libcall_value (machine_mode mode, const_rtx libcall) { if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS && GET_MODE_CLASS (mode) == MODE_FLOAT) @@ -4884,7 +5273,7 @@ arm_get_pcs_model (const_tree type, const_tree decl) so we are free to use whatever conventions are appropriate. */ /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ - struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); + cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl)); if (i && i->local) return ARM_PCS_AAPCS_LOCAL; } @@ -4914,9 +5303,9 @@ aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, type that doesn't match a non-VOIDmode *MODEP is found, then return -1, otherwise return the count in the sub-tree. */ static int -aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) +aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) { - enum machine_mode mode; + machine_mode mode; HOST_WIDE_INT size; switch (TREE_CODE (type)) @@ -4980,8 +5369,10 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) int count; tree index = TYPE_DOMAIN (type); - /* Can't handle incomplete types. */ - if (!COMPLETE_TYPE_P (type)) + /* Can't handle incomplete types nor sizes that are not + fixed. */ + if (!COMPLETE_TYPE_P (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) return -1; count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); @@ -4998,9 +5389,7 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -5012,8 +5401,10 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) int sub_count; tree field; - /* Can't handle incomplete types. */ - if (!COMPLETE_TYPE_P (type)) + /* Can't handle incomplete types nor sizes that are not + fixed. */ + if (!COMPLETE_TYPE_P (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) return -1; for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) @@ -5028,9 +5419,7 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -5044,8 +5433,10 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) int sub_count; tree field; - /* Can't handle incomplete types. */ - if (!COMPLETE_TYPE_P (type)) + /* Can't handle incomplete types nor sizes that are not + fixed. */ + if (!COMPLETE_TYPE_P (type) + || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) return -1; for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) @@ -5060,9 +5451,7 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) } /* There must be no padding. */ - if (!tree_fits_uhwi_p (TYPE_SIZE (type)) - || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep))) + if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -5107,10 +5496,10 @@ use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) *COUNT to hold the number of such elements. */ static bool aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, - enum machine_mode mode, const_tree type, - enum machine_mode *base_mode, int *count) + machine_mode mode, const_tree type, + machine_mode *base_mode, int *count) { - enum machine_mode new_mode = VOIDmode; + machine_mode new_mode = VOIDmode; /* If we have the type information, prefer that to working things out from the mode. */ @@ -5148,10 +5537,10 @@ aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, static bool aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, - enum machine_mode mode, const_tree type) + machine_mode mode, const_tree type) { int count ATTRIBUTE_UNUSED; - enum machine_mode ag_mode ATTRIBUTE_UNUSED; + machine_mode ag_mode ATTRIBUTE_UNUSED; if (!use_vfp_abi (pcs_variant, false)) return false; @@ -5160,7 +5549,7 @@ aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, } static bool -aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, +aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode, const_tree type) { if (!use_vfp_abi (pcum->pcs_variant, false)) @@ -5172,7 +5561,7 @@ aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, } static bool -aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, +aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode, const_tree type ATTRIBUTE_UNUSED) { int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); @@ -5190,7 +5579,7 @@ aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, int i; int rcount = pcum->aapcs_vfp_rcount; int rshift = shift; - enum machine_mode rmode = pcum->aapcs_vfp_rmode; + machine_mode rmode = pcum->aapcs_vfp_rmode; rtx par; if (!TARGET_NEON) { @@ -5226,7 +5615,7 @@ aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, static rtx aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, - enum machine_mode mode, + machine_mode mode, const_tree type ATTRIBUTE_UNUSED) { if (!use_vfp_abi (pcs_variant, false)) @@ -5235,7 +5624,7 @@ aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) { int count; - enum machine_mode ag_mode; + machine_mode ag_mode; int i; rtx par; int shift; @@ -5271,7 +5660,7 @@ aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, static void aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED, const_tree type ATTRIBUTE_UNUSED) { pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; @@ -5304,25 +5693,25 @@ static struct BLKmode) is a candidate for this co-processor's registers; this function should ignore any position-dependent state in CUMULATIVE_ARGS and only use call-type dependent information. */ - bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree); /* Return true if the argument does get a co-processor register; it should set aapcs_reg to an RTX of the register allocated as is required for a return from FUNCTION_ARG. */ - bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree); /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) is can be returned in this co-processor's registers. */ - bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree); + bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree); /* Allocate and return an RTX element to hold the return type of a call, this routine must not fail and will only be called if is_return_candidate returned true with the same parameters. */ - rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree); + rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree); /* Finish processing this argument and prepare to start processing the next one. */ - void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree); } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = { AAPCS_CP(vfp) @@ -5331,7 +5720,7 @@ static struct #undef AAPCS_CP static int -aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, +aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode, const_tree type) { int i; @@ -5380,7 +5769,7 @@ aapcs_select_return_coproc (const_tree type, const_tree fntype) } static rtx -aapcs_allocate_return_reg (enum machine_mode mode, const_tree type, +aapcs_allocate_return_reg (machine_mode mode, const_tree type, const_tree fntype) { /* We aren't passed a decl, so we can't check that a call is local. @@ -5435,7 +5824,7 @@ aapcs_allocate_return_reg (enum machine_mode mode, const_tree type, } static rtx -aapcs_libcall_value (enum machine_mode mode) +aapcs_libcall_value (machine_mode mode) { if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode) && GET_MODE_SIZE (mode) <= 4) @@ -5447,7 +5836,7 @@ aapcs_libcall_value (enum machine_mode mode) /* Lay out a function argument using the AAPCS rules. The rule numbers referred to here are those in the AAPCS. */ static void -aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, +aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode, const_tree type, bool named) { int nregs, nregs2; @@ -5619,7 +6008,7 @@ arm_lra_p (void) /* Return true if mode/type need doubleword alignment. */ static bool -arm_needs_doubleword_align (enum machine_mode mode, const_tree type) +arm_needs_doubleword_align (machine_mode mode, const_tree type) { return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY || (type && TYPE_ALIGN (type) > PARM_BOUNDARY)); @@ -5646,7 +6035,7 @@ arm_needs_doubleword_align (enum machine_mode mode, const_tree type) indeed make it pass in the stack if necessary). */ static rtx -arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode, +arm_function_arg (cumulative_args_t pcum_v, machine_mode mode, const_tree type, bool named) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); @@ -5699,7 +6088,7 @@ arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode, } static unsigned int -arm_function_arg_boundary (enum machine_mode mode, const_tree type) +arm_function_arg_boundary (machine_mode mode, const_tree type) { return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type) ? DOUBLEWORD_ALIGNMENT @@ -5707,7 +6096,7 @@ arm_function_arg_boundary (enum machine_mode mode, const_tree type) } static int -arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode, +arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode, tree type, bool named) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); @@ -5735,7 +6124,7 @@ arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode, (TYPE is null for libcalls where that information may not be available.) */ static void -arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode, +arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode, const_tree type, bool named) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); @@ -5774,7 +6163,7 @@ arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode, static bool arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED, const_tree type, bool named ATTRIBUTE_UNUSED) { return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; @@ -6012,7 +6401,7 @@ arm_function_in_section_p (tree decl, section *section) if (!DECL_SECTION_NAME (decl)) { /* Make sure that we will not create a unique section for DECL. */ - if (flag_function_sections || DECL_ONE_ONLY (decl)) + if (flag_function_sections || DECL_COMDAT_GROUP (decl)) return false; } @@ -6076,11 +6465,6 @@ arm_function_ok_for_sibcall (tree decl, tree exp) if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) return false; - /* Cannot tail-call to long calls, since these are out of range of - a branch instruction. */ - if (decl && arm_is_long_call_p (decl)) - return false; - /* If we are interworking and the function is not declared static then we can't tail-call it unless we know that it exists in this compilation unit (since it might be a Thumb routine). */ @@ -6170,7 +6554,7 @@ require_pic_register (void) } else { - rtx seq, insn; + rtx_insn *seq, *insn; if (!cfun->machine->pic_reg) cfun->machine->pic_reg = gen_reg_rtx (Pmode); @@ -6209,7 +6593,7 @@ require_pic_register (void) } rtx -legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) +legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) { if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF) @@ -6534,7 +6918,7 @@ will_be_in_index_register (const_rtx x) /* Return nonzero if X is a valid ARM state address operand. */ int -arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, +arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer, int strict_p) { bool use_ldrd; @@ -6621,7 +7005,7 @@ arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, /* Return nonzero if X is a valid Thumb-2 address operand. */ static int -thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) +thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p) { bool use_ldrd; enum rtx_code code = GET_CODE (x); @@ -6716,7 +7100,7 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) /* Return nonzero if INDEX is valid for an address index operand in ARM state. */ static int -arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, +arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer, int strict_p) { HOST_WIDE_INT range; @@ -6837,7 +7221,7 @@ thumb2_index_mul_operand (rtx op) /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */ static int -thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p) +thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p) { enum rtx_code code = GET_CODE (index); @@ -6930,7 +7314,7 @@ thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p) /* Return nonzero if X is valid as a 16-bit Thumb state base register. */ static int -thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p) +thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p) { int regno; @@ -6978,7 +7362,7 @@ thumb1_index_register_rtx_p (rtx x, int strict_p) reload pass starts. This is so that eliminating such addresses into stack based ones won't produce impossible code. */ int -thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) +thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p) { /* ??? Not clear if this is right. Experiment. */ if (GET_MODE_SIZE (mode) < 4 @@ -7077,7 +7461,7 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) /* Return nonzero if VAL can be used as an offset in a Thumb-state address instruction of mode MODE. */ int -thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val) +thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val) { switch (GET_MODE_SIZE (mode)) { @@ -7095,7 +7479,7 @@ thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val) } bool -arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p) +arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) { if (TARGET_ARM) return arm_legitimate_address_outer_p (mode, x, SET, strict_p); @@ -7159,7 +7543,7 @@ arm_load_tp (rtx target) emit_insn (gen_load_tp_soft ()); - tmp = gen_rtx_REG (SImode, 0); + tmp = gen_rtx_REG (SImode, R0_REGNUM); emit_move_insn (target, tmp); } return target; @@ -7223,13 +7607,13 @@ arm_tls_descseq_addr (rtx x, rtx reg) gen_rtx_CONST (VOIDmode, label), GEN_INT (!TARGET_ARM)), UNSPEC_TLS); - rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0)); + rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM)); emit_insn (gen_tlscall (x, labelno)); if (!reg) reg = gen_reg_rtx (SImode); else - gcc_assert (REGNO (reg) != 0); + gcc_assert (REGNO (reg) != R0_REGNUM); emit_move_insn (reg, reg0); @@ -7333,7 +7717,7 @@ legitimize_tls_address (rtx x, rtx reg) /* Try machine-dependent ways of modifying an illegitimate address to be legitimate. If we find one, return the new, valid address. */ rtx -arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) +arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode) { if (arm_tls_referenced_p (x)) { @@ -7479,7 +7863,7 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) /* Try machine-dependent ways of modifying an illegitimate Thumb address to be legitimate. If we find one, return the new, valid address. */ rtx -thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) +thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode) { if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) @@ -7545,7 +7929,7 @@ thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) bool arm_legitimize_reload_address (rtx *p, - enum machine_mode mode, + machine_mode mode, int opnum, int type, int ind_levels ATTRIBUTE_UNUSED) { @@ -7732,7 +8116,7 @@ arm_legitimize_reload_address (rtx *p, rtx thumb_legitimize_reload_address (rtx *x_p, - enum machine_mode mode, + machine_mode mode, int opnum, int type, int ind_levels ATTRIBUTE_UNUSED) { @@ -7773,25 +8157,6 @@ thumb_legitimize_reload_address (rtx *x_p, return NULL; } -/* Test for various thread-local symbols. */ - -/* Helper for arm_tls_referenced_p. */ - -static int -arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED) -{ - if (GET_CODE (*x) == SYMBOL_REF) - return SYMBOL_REF_TLS_MODEL (*x) != 0; - - /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are - TLS offsets, not real symbol references. */ - if (GET_CODE (*x) == UNSPEC - && XINT (*x, 1) == UNSPEC_TLS) - return -1; - - return 0; -} - /* Return TRUE if X contains any TLS symbol references. */ bool @@ -7800,7 +8165,19 @@ arm_tls_referenced_p (rtx x) if (! TARGET_HAVE_TLS) return false; - return for_each_rtx (&x, arm_tls_operand_p_1, NULL); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, x, ALL) + { + const_rtx x = *iter; + if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0) + return true; + + /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are + TLS offsets, not real symbol references. */ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) + iter.skip_subrtxes (); + } + return false; } /* Implement TARGET_LEGITIMATE_CONSTANT_P. @@ -7812,7 +8189,7 @@ arm_tls_referenced_p (rtx x) When generating pic allow anything. */ static bool -arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x) +arm_legitimate_constant_p_1 (machine_mode mode, rtx x) { /* At present, we have no support for Neon structure constants, so forbid them here. It might be possible to handle simple cases like 0 and -1 @@ -7824,7 +8201,7 @@ arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x) } static bool -thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { return (CONST_INT_P (x) || CONST_DOUBLE_P (x) @@ -7833,7 +8210,7 @@ thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) } static bool -arm_legitimate_constant_p (enum machine_mode mode, rtx x) +arm_legitimate_constant_p (machine_mode mode, rtx x) { return (!arm_cannot_force_const_mem (mode, x) && (TARGET_32BIT @@ -7844,7 +8221,7 @@ arm_legitimate_constant_p (enum machine_mode mode, rtx x) /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ static bool -arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { rtx base, offset; @@ -7868,7 +8245,7 @@ arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) static inline int thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); int total, words; switch (code) @@ -7996,7 +8373,7 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) static inline bool arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); enum rtx_code subcode; rtx operand; enum rtx_code code = GET_CODE (x); @@ -8458,7 +8835,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) if (GET_MODE_CLASS (mode) == MODE_INT) { rtx op = XEXP (x, 0); - enum machine_mode opmode = GET_MODE (op); + machine_mode opmode = GET_MODE (op); if (mode == DImode) *total += COSTS_N_INSNS (1); @@ -8601,7 +8978,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) static inline int thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); int words; switch (code) @@ -8634,7 +9011,13 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) /* Thumb1 mul instruction can't operate on const. We must Load it into a register first. */ int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET); - return COSTS_N_INSNS (1) + const_size; + /* For the targets which have a very small and high-latency multiply + unit, we prefer to synthesize the mult with up to 5 instructions, + giving a good balance between size and performance. */ + if (arm_arch6m && arm_m_profile_small_mul) + return COSTS_N_INSNS (5); + else + return COSTS_N_INSNS (1) + const_size; } return COSTS_N_INSNS (1); @@ -8642,9 +9025,11 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) /* A SET doesn't have a mode, so let's look at the SET_DEST to get the mode. */ words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); - return (COSTS_N_INSNS (words) - + 4 * ((MEM_P (SET_SRC (x))) - + MEM_P (SET_DEST (x)))); + return COSTS_N_INSNS (words) + + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x)) + || satisfies_constraint_K (SET_SRC (x)) + /* thumb1_movdi_insn. */ + || ((words > 1) && MEM_P (SET_SRC (x)))); case CONST_INT: if (outer == SET) @@ -8697,16 +9082,14 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) case AND: case XOR: case IOR: - /* XXX guess. */ - return 8; + return COSTS_N_INSNS (1); case MEM: - /* XXX another guess. */ - /* Memory costs quite a lot for the first word, but subsequent words - load at the equivalent of a single insn each. */ - return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) + return (COSTS_N_INSNS (1) + + COSTS_N_INSNS (1) + * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) - ? 4 : 0)); + ? COSTS_N_INSNS (1) : 0)); case IF_THEN_ELSE: /* XXX a guess. */ @@ -8743,7 +9126,7 @@ static bool arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); if (TARGET_THUMB1) { *total = thumb1_size_rtx_costs (x, code, outer_code); @@ -9098,7 +9481,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, const struct cpu_cost_table *extra_cost, int *cost, bool speed_p) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); if (TARGET_THUMB1) { @@ -9334,6 +9717,47 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, *cost = LIBCALL_COST (2); return false; + case BSWAP: + if (arm_arch6) + { + if (mode == SImode) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.rev; + + return false; + } + } + else + { + /* No rev instruction available. Look at arm_legacy_rev + and thumb_legacy_rev for the form of RTL used then. */ + if (TARGET_THUMB) + { + *cost = COSTS_N_INSNS (10); + + if (speed_p) + { + *cost += 6 * extra_cost->alu.shift; + *cost += 3 * extra_cost->alu.logical; + } + } + else + { + *cost = COSTS_N_INSNS (5); + + if (speed_p) + { + *cost += 2 * extra_cost->alu.shift; + *cost += extra_cost->alu.arith_shift; + *cost += 2 * extra_cost->alu.logical; + } + } + return true; + } + return false; + case MINUS: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) @@ -9433,6 +9857,8 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p); return true; } + else if (speed_p) + *cost += extra_cost->alu.arith; return false; } @@ -9668,6 +10094,9 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p); return true; } + else if (speed_p) + *cost += extra_cost->alu.arith; + return false; } @@ -9716,8 +10145,17 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, /* Vector mode? */ *cost = LIBCALL_COST (2); return false; + case IOR: + if (mode == SImode && arm_arch6 && aarch_rev16_p (x)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.rev; - case AND: case XOR: case IOR: + return true; + } + /* Fall through. */ + case AND: case XOR: if (mode == SImode) { enum rtx_code subcode = GET_CODE (XEXP (x, 0)); @@ -10044,7 +10482,7 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, *cost = 0; else { - enum machine_mode op0mode; + machine_mode op0mode; /* We'll mostly assume that the cost of a compare is the cost of the LHS. However, there are some notable exceptions. */ @@ -10616,6 +11054,36 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, *cost = LIBCALL_COST (1); return false; + case FMA: + if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA) + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + rtx op2 = XEXP (x, 2); + + *cost = COSTS_N_INSNS (1); + + /* vfms or vfnma. */ + if (GET_CODE (op0) == NEG) + op0 = XEXP (op0, 0); + + /* vfnms or vfnma. */ + if (GET_CODE (op2) == NEG) + op2 = XEXP (op2, 0); + + *cost += rtx_cost (op0, FMA, 0, speed_p); + *cost += rtx_cost (op1, FMA, 1, speed_p); + *cost += rtx_cost (op2, FMA, 2, speed_p); + + if (speed_p) + *cost += extra_cost->fp[mode ==DFmode].fma; + + return true; + } + + *cost = LIBCALL_COST (3); + return false; + case FIX: case UNSIGNED_FIX: if (TARGET_HARD_FLOAT) @@ -10666,10 +11134,16 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, return true; case ASM_OPERANDS: - /* Just a guess. Cost one insn per input. */ - *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x)); - return true; + { + /* Just a guess. Guess number of instructions in the asm + plus one insn per input. Always a minimum of COSTS_N_INSNS (1) + though (see PR60663). */ + int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); + int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); + *cost = COSTS_N_INSNS (asm_length + num_operands); + return true; + } default: if (mode != VOIDmode) *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); @@ -10732,7 +11206,7 @@ static bool arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); if (TARGET_THUMB) { @@ -10786,7 +11260,7 @@ static bool arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); if (TARGET_THUMB1) { @@ -10870,7 +11344,7 @@ static bool arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); if (TARGET_THUMB) { @@ -10959,14 +11433,18 @@ static bool arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); if (TARGET_THUMB1) { switch (code) { case MULT: - *total = COSTS_N_INSNS (3); + /* Small multiply: 32 cycles for an integer multiply inst. */ + if (arm_arch6m && arm_m_profile_small_mul) + *total = COSTS_N_INSNS (32); + else + *total = COSTS_N_INSNS (3); return true; default: @@ -11064,7 +11542,7 @@ arm_thumb_address_cost (rtx x) } static int -arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED, +arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED, addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) { return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); @@ -11072,7 +11550,7 @@ arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED, /* Adjust cost hook for XScale. */ static bool -xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost) { /* Some true dependencies can have a higher cost depending on precisely how certain input operands are used. */ @@ -11112,7 +11590,7 @@ xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) that overlaps with SHIFTED_OPERAND, then we have increase the cost of this dependency. */ extract_insn (dep); - preprocess_constraints (); + preprocess_constraints (dep); for (opno = 0; opno < recog_data.n_operands; opno++) { /* We can ignore strict inputs. */ @@ -11133,7 +11611,7 @@ xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) /* Adjust cost hook for Cortex A9. */ static bool -cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost) { switch (REG_NOTE_KIND (link)) { @@ -11206,7 +11684,7 @@ cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) /* Adjust cost hook for FA726TE. */ static bool -fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost) { /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated) have penalty of 3. */ @@ -11242,7 +11720,7 @@ fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) point to integer conversion does not go through memory. */ int -arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, +arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, reg_class_t from, reg_class_t to) { if (TARGET_32BIT) @@ -11270,7 +11748,7 @@ arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, /* Implement TARGET_MEMORY_MOVE_COST. */ int -arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass, +arm_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in ATTRIBUTE_UNUSED) { if (TARGET_32BIT) @@ -11375,15 +11853,16 @@ arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, /* Return true if and only if this insn can dual-issue only as older. */ static bool -cortexa7_older_only (rtx insn) +cortexa7_older_only (rtx_insn *insn) { if (recog_memoized (insn) < 0) return false; switch (get_attr_type (insn)) { - case TYPE_ALU_REG: - case TYPE_ALUS_REG: + case TYPE_ALU_DSP_REG: + case TYPE_ALU_SREG: + case TYPE_ALUS_SREG: case TYPE_LOGIC_REG: case TYPE_LOGICS_REG: case TYPE_ADC_REG: @@ -11426,7 +11905,7 @@ cortexa7_older_only (rtx insn) /* Return true if and only if this insn can dual-issue as younger. */ static bool -cortexa7_younger (FILE *file, int verbose, rtx insn) +cortexa7_younger (FILE *file, int verbose, rtx_insn *insn) { if (recog_memoized (insn) < 0) { @@ -11464,8 +11943,8 @@ cortexa7_younger (FILE *file, int verbose, rtx insn) instructions. This heuristic may affect dual issue opportunities in the current cycle. */ static void -cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, - int clock) +cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready, + int *n_readyp, int clock) { int i; int first_older_only = -1, first_younger = -1; @@ -11482,7 +11961,7 @@ cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, older. */ for (i = *n_readyp - 1; i >= 0; i--) { - rtx insn = ready[i]; + rtx_insn *insn = ready[i]; if (cortexa7_older_only (insn)) { first_older_only = i; @@ -11517,7 +11996,7 @@ cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n", INSN_UID(ready [first_older_only]), INSN_UID(ready [first_younger])); - rtx first_older_only_insn = ready [first_older_only]; + rtx_insn *first_older_only_insn = ready [first_older_only]; for (i = first_older_only; i < first_younger; i++) { ready[i] = ready[i+1]; @@ -11529,7 +12008,7 @@ cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, /* Implement TARGET_SCHED_REORDER. */ static int -arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, +arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp, int clock) { switch (arm_tune) @@ -11553,7 +12032,7 @@ arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, adjust_cost function. Only put bits of code into arm_adjust_cost that are common across all cores. */ static int -arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) +arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost) { rtx i_pat, d_pat; @@ -11640,6 +12119,12 @@ arm_cortex_m_branch_cost (bool speed_p, bool predictable_p) : arm_default_branch_cost (speed_p, predictable_p); } +static int +arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p) +{ + return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); +} + static bool fp_consts_inited = false; static REAL_VALUE_TYPE value_fp0; @@ -11698,8 +12183,8 @@ vfp3_const_double_index (rtx x) int sign, exponent; unsigned HOST_WIDE_INT mantissa, mant_hi; unsigned HOST_WIDE_INT mask; - HOST_WIDE_INT m1, m2; int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; + bool fail; if (!TARGET_VFP3 || !CONST_DOUBLE_P (x)) return -1; @@ -11719,9 +12204,9 @@ vfp3_const_double_index (rtx x) WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 bits for the mantissa, this may fail (low bits would be lost). */ real_ldexp (&m, &r, point_pos - exponent); - REAL_VALUE_TO_INT (&m1, &m2, m); - mantissa = m1; - mant_hi = m2; + wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); + mantissa = w.elt (0); + mant_hi = w.elt (1); /* If there are bits set in the low part of the mantissa, we can't represent this value. */ @@ -11828,7 +12313,7 @@ vfp3_const_double_rtx (rtx x) -1 if the given value doesn't match any of the listed patterns. */ static int -neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, +neon_valid_immediate (rtx op, machine_mode mode, int inverse, rtx *modconst, int *elementwidth) { #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \ @@ -12033,7 +12518,7 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, VMOV) in *MODCONST. */ int -neon_immediate_valid_for_move (rtx op, enum machine_mode mode, +neon_immediate_valid_for_move (rtx op, machine_mode mode, rtx *modconst, int *elementwidth) { rtx tmpconst; @@ -12058,7 +12543,7 @@ neon_immediate_valid_for_move (rtx op, enum machine_mode mode, *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */ int -neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse, +neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse, rtx *modconst, int *elementwidth) { rtx tmpconst; @@ -12084,7 +12569,7 @@ neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse, because they have different limitations. */ int -neon_immediate_valid_for_shift (rtx op, enum machine_mode mode, +neon_immediate_valid_for_shift (rtx op, machine_mode mode, rtx *modconst, int *elementwidth, bool isleftshift) { @@ -12141,7 +12626,7 @@ neon_immediate_valid_for_shift (rtx op, enum machine_mode mode, MNEM. */ char * -neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode, +neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode, int inverse, int quad) { int width, is_valid; @@ -12164,7 +12649,7 @@ neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode, char * neon_output_shift_immediate (const char *mnem, char sign, rtx *op2, - enum machine_mode mode, int quad, + machine_mode mode, int quad, bool isleftshift) { int width, is_valid; @@ -12192,10 +12677,10 @@ neon_output_shift_immediate (const char *mnem, char sign, rtx *op2, for no particular gain. */ void -neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode, +neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode, rtx (*reduc) (rtx, rtx, rtx)) { - enum machine_mode inner = GET_MODE_INNER (mode); + machine_mode inner = GET_MODE_INNER (mode); unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner); rtx tmpsum = op1; @@ -12214,8 +12699,8 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode, static rtx neon_vdup_constant (rtx vals) { - enum machine_mode mode = GET_MODE (vals); - enum machine_mode inner_mode = GET_MODE_INNER (mode); + machine_mode mode = GET_MODE (vals); + machine_mode inner_mode = GET_MODE_INNER (mode); int n_elts = GET_MODE_NUNITS (mode); bool all_same = true; rtx x; @@ -12254,7 +12739,7 @@ neon_vdup_constant (rtx vals) rtx neon_make_constant (rtx vals) { - enum machine_mode mode = GET_MODE (vals); + machine_mode mode = GET_MODE (vals); rtx target; rtx const_vec = NULL_RTX; int n_elts = GET_MODE_NUNITS (mode); @@ -12306,8 +12791,8 @@ neon_make_constant (rtx vals) void neon_expand_vector_init (rtx target, rtx vals) { - enum machine_mode mode = GET_MODE (target); - enum machine_mode inner_mode = GET_MODE_INNER (mode); + machine_mode mode = GET_MODE (target); + machine_mode inner_mode = GET_MODE_INNER (mode); int n_elts = GET_MODE_NUNITS (mode); int n_var = 0, one_var = -1; bool all_same = true; @@ -12437,7 +12922,7 @@ neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) } HOST_WIDE_INT -neon_element_bits (enum machine_mode mode) +neon_element_bits (machine_mode mode) { if (mode == DImode) return GET_MODE_BITSIZE (mode); @@ -12563,7 +13048,11 @@ neon_vector_mem_operand (rtx op, int type, bool strict) || (type == 0 && GET_CODE (ind) == PRE_DEC)) return arm_address_register_rtx_p (XEXP (ind, 0), 0); - /* FIXME: vld1 allows register post-modify. */ + /* Allow post-increment by register for VLDn */ + if (type == 2 && GET_CODE (ind) == POST_MODIFY + && GET_CODE (XEXP (ind, 1)) == PLUS + && REG_P (XEXP (XEXP (ind, 1), 1))) + return true; /* Match: (plus (reg) @@ -12642,7 +13131,7 @@ arm_eliminable_register (rtx x) coprocessor registers. Otherwise return NO_REGS. */ enum reg_class -coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) +coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb) { if (mode == HFmode) { @@ -12767,25 +13256,24 @@ tls_mentioned_p (rtx x) /* Must not copy any rtx that uses a pc-relative address. */ -static int -arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED) -{ - if (GET_CODE (*x) == UNSPEC - && (XINT (*x, 1) == UNSPEC_PIC_BASE - || XINT (*x, 1) == UNSPEC_PIC_UNIFIED)) - return 1; - return 0; -} - static bool -arm_cannot_copy_insn_p (rtx insn) +arm_cannot_copy_insn_p (rtx_insn *insn) { /* The tls call insn cannot be copied, as it is paired with a data word. */ if (recog_memoized (insn) == CODE_FOR_tlscall) return true; - return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) + { + const_rtx x = *iter; + if (GET_CODE (x) == UNSPEC + && (XINT (x, 1) == UNSPEC_PIC_BASE + || XINT (x, 1) == UNSPEC_PIC_UNIFIED)) + return true; + } + return false; } enum rtx_code @@ -12927,7 +13415,7 @@ adjacent_mem_locations (rtx a, rtx b) REGNO (R_dk) = REGNO (R_d0) + k. The pattern for store is similar. */ bool -ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode, +ldm_stm_operation_p (rtx op, bool load, machine_mode mode, bool consecutive, bool return_pc) { HOST_WIDE_INT count = XVECLEN (op, 0); @@ -14153,7 +14641,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, unsigned int interleave_factor, HOST_WIDE_INT bytes_per_iter) { - rtx label, src_reg, dest_reg, final_src, test; + rtx src_reg, dest_reg, final_src, test; HOST_WIDE_INT leftover; leftover = length % bytes_per_iter; @@ -14169,7 +14657,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, 0, 0, OPTAB_WIDEN); /* Emit the start of the loop. */ - label = gen_label_rtx (); + rtx_code_label *label = gen_label_rtx (); emit_label (label); /* Emit the loop body. */ @@ -14289,7 +14777,7 @@ arm_gen_movmemqi (rtx *operands) else { mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); - emit_move_insn (mem, gen_rtx_REG (SImode, 0)); + emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM)); if (last_bytes != 0) { emit_insn (gen_addsi3 (dst, dst, GEN_INT (4))); @@ -14385,7 +14873,7 @@ by mode size. */ inline static rtx next_consecutive_mem (rtx mem) { - enum machine_mode mode = GET_MODE (mem); + machine_mode mode = GET_MODE (mem); HOST_WIDE_INT offset = GET_MODE_SIZE (mode); rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset); @@ -14525,7 +15013,7 @@ gen_movmem_ldrd_strd (rtx *operands) here. If we are unable to support a dominance comparison we return CC mode. This will then fail to match for the RTL expressions that generate this call. */ -enum machine_mode +machine_mode arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or) { enum rtx_code cond1, cond2; @@ -14554,11 +15042,7 @@ arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or) return CCmode; if (swapped) - { - enum rtx_code temp = cond1; - cond1 = cond2; - cond2 = temp; - } + std::swap (cond1, cond2); switch (cond1) { @@ -14667,7 +15151,7 @@ arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or) } } -enum machine_mode +machine_mode arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) { /* All floating point compares return CCFP if it is an equality @@ -14850,7 +15334,7 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) rtx arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch) { - enum machine_mode mode; + machine_mode mode; rtx cc_reg; int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode; @@ -15067,11 +15551,7 @@ arm_reload_out_hi (rtx *operands) /* Updating base_plus might destroy outval, see if we can swap the scratch and base_plus. */ if (!reg_overlap_mentioned_p (scratch, outval)) - { - rtx tmp = scratch; - scratch = base_plus; - base_plus = tmp; - } + std::swap (scratch, base_plus); else { rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2])); @@ -15126,11 +15606,7 @@ arm_reload_out_hi (rtx *operands) /* Updating base_plus might destroy outval, see if we can swap the scratch and base_plus. */ if (!reg_overlap_mentioned_p (scratch, outval)) - { - rtx tmp = scratch; - scratch = base_plus; - base_plus = tmp; - } + std::swap (scratch, base_plus); else { rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2])); @@ -15186,7 +15662,7 @@ arm_reload_out_hi (rtx *operands) (padded to the size of a word) should be passed in a register. */ static bool -arm_must_pass_in_stack (enum machine_mode mode, const_tree type) +arm_must_pass_in_stack (machine_mode mode, const_tree type) { if (TARGET_AAPCS_BASED) return must_pass_in_stack_var_size (mode, type); @@ -15202,7 +15678,7 @@ arm_must_pass_in_stack (enum machine_mode mode, const_tree type) aggregate types are placed in the lowest memory address. */ bool -arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type) +arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type) { if (!TARGET_AAPCS_BASED) return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward; @@ -15220,7 +15696,7 @@ arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type) significant byte does. */ bool -arm_pad_reg_upward (enum machine_mode mode, +arm_pad_reg_upward (machine_mode mode, tree type, int first ATTRIBUTE_UNUSED) { if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN) @@ -15363,8 +15839,6 @@ mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset) return false; } -#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0) - /* Called from a peephole2 to replace two word-size accesses with a single LDRD/STRD instruction. Returns true iff we can generate a new instruction sequence. That is, both accesses use the same base @@ -15504,10 +15978,10 @@ gen_operands_ldrd_strd (rtx *operands, bool load, offset = offsets[1]; /* Swap the instructions such that lower memory is accessed first. */ - SWAP_RTX (operands[0], operands[1]); - SWAP_RTX (operands[2], operands[3]); + std::swap (operands[0], operands[1]); + std::swap (operands[2], operands[3]); if (const_store) - SWAP_RTX (operands[4], operands[5]); + std::swap (operands[4], operands[5]); } else { @@ -15532,7 +16006,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load, if (load && commute) { /* Try reordering registers. */ - SWAP_RTX (operands[0], operands[1]); + std::swap (operands[0], operands[1]); if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset, false, load)) return true; @@ -15561,7 +16035,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load, if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset, false, false)) { - SWAP_RTX (operands[0], operands[1]); + std::swap (operands[0], operands[1]); return true; } @@ -15592,7 +16066,6 @@ gen_operands_ldrd_strd (rtx *operands, bool load, return false; } -#undef SWAP_RTX @@ -15745,7 +16218,7 @@ struct minipool_node /* The value in table. */ rtx value; /* The mode of value. */ - enum machine_mode mode; + machine_mode mode; /* The size of the value. With iWMMXt enabled sizes > 4 also imply an alignment of 8-bytes. */ int fix_size; @@ -15754,10 +16227,10 @@ struct minipool_node struct minipool_fixup { Mfix * next; - rtx insn; + rtx_insn * insn; HOST_WIDE_INT address; rtx * loc; - enum machine_mode mode; + machine_mode mode; int fix_size; rtx value; Mnode * minipool; @@ -15771,7 +16244,7 @@ struct minipool_fixup static Mnode * minipool_vector_head; static Mnode * minipool_vector_tail; -static rtx minipool_vector_label; +static rtx_code_label *minipool_vector_label; static int minipool_pad; /* The linked list of all minipool fixes required for this function. */ @@ -15785,7 +16258,7 @@ Mfix * minipool_barrier; #endif static HOST_WIDE_INT -get_jump_table_size (rtx insn) +get_jump_table_size (rtx_jump_table_data *insn) { /* ADDR_VECs only take room if read-only data does into the text section. */ @@ -16234,7 +16707,7 @@ assign_minipool_offsets (Mfix *barrier) /* Output the literal table */ static void -dump_minipool (rtx scan) +dump_minipool (rtx_insn *scan) { Mnode * mp; Mnode * nmp; @@ -16271,7 +16744,7 @@ dump_minipool (rtx scan) fputc ('\n', dump_file); } - switch (mp->fix_size) + switch (GET_MODE_SIZE (mp->mode)) { #ifdef HAVE_consttable_1 case 1: @@ -16357,23 +16830,23 @@ static Mfix * create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address) { HOST_WIDE_INT count = 0; - rtx barrier; - rtx from = fix->insn; + rtx_barrier *barrier; + rtx_insn *from = fix->insn; /* The instruction after which we will insert the jump. */ - rtx selected = NULL; + rtx_insn *selected = NULL; int selected_cost; /* The address at which the jump instruction will be placed. */ HOST_WIDE_INT selected_address; Mfix * new_fix; HOST_WIDE_INT max_count = max_address - fix->address; - rtx label = gen_label_rtx (); + rtx_code_label *label = gen_label_rtx (); selected_cost = arm_barrier_cost (from); selected_address = fix->address; while (from && count < max_count) { - rtx tmp; + rtx_jump_table_data *tmp; int new_cost; /* This code shouldn't have been called if there was a natural barrier @@ -16430,7 +16903,7 @@ create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address) CALL_ARG_LOCATION note. */ if (CALL_P (selected)) { - rtx next = NEXT_INSN (selected); + rtx_insn *next = NEXT_INSN (selected); if (next && NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION) selected = next; @@ -16455,7 +16928,7 @@ create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address) /* Record that there is a natural barrier in the insn stream at ADDRESS. */ static void -push_minipool_barrier (rtx insn, HOST_WIDE_INT address) +push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address) { Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix)); @@ -16477,8 +16950,8 @@ push_minipool_barrier (rtx insn, HOST_WIDE_INT address) fixing; VALUE is the constant that must be loaded, which is of type MODE. */ static void -push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc, - enum machine_mode mode, rtx value) +push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc, + machine_mode mode, rtx value) { Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix)); @@ -16545,7 +17018,7 @@ int arm_const_double_inline_cost (rtx val) { rtx lowpart, highpart; - enum machine_mode mode; + machine_mode mode; mode = GET_MODE (val); @@ -16566,6 +17039,14 @@ arm_const_double_inline_cost (rtx val) NULL_RTX, NULL_RTX, 0, 0)); } +/* Cost of loading a SImode constant. */ +static inline int +arm_const_inline_cost (enum rtx_code code, rtx val) +{ + return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val), + NULL_RTX, NULL_RTX, 1, 0); +} + /* Return true if it is worthwhile to split a 64-bit constant into two 32-bit operations. This is the case if optimizing for size, or if we have load delay slots, or if one 32-bit part can be done with @@ -16573,7 +17054,7 @@ arm_const_double_inline_cost (rtx val) bool arm_const_double_by_parts (rtx val) { - enum machine_mode mode = GET_MODE (val); + machine_mode mode = GET_MODE (val); rtx part; if (optimize_size || arm_ld_sched) @@ -16606,7 +17087,7 @@ arm_const_double_by_parts (rtx val) bool arm_const_double_by_immediates (rtx val) { - enum machine_mode mode = GET_MODE (val); + machine_mode mode = GET_MODE (val); rtx part; if (mode == VOIDmode) @@ -16633,22 +17114,20 @@ arm_const_double_by_immediates (rtx val) If DO_PUSHES is false we do not actually push any of the fixups needed. */ static void -note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes) +note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes) { int opno; - extract_insn (insn); - - if (!constrain_operands (1)) - fatal_insn_not_found (insn); + extract_constrain_insn (insn); if (recog_data.n_alternatives == 0) return; /* Fill in recog_op_alt with information about the constraints of this insn. */ - preprocess_constraints (); + preprocess_constraints (insn); + const operand_alternative *op_alt = which_op_alt (); for (opno = 0; opno < recog_data.n_operands; opno++) { /* Things we need to fix can only occur in inputs. */ @@ -16659,7 +17138,7 @@ note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes) of constants in this alternative is really to fool reload into allowing us to accept one there. We need to fix them up now so that we output the right code. */ - if (recog_op_alt[opno][which_alternative].memory_ok) + if (op_alt[opno].memory_ok) { rtx op = recog_data.operand[opno]; @@ -16709,14 +17188,15 @@ thumb1_reorg (void) { rtx dest, src; rtx pat, op0, set = NULL; - rtx prev, insn = BB_END (bb); + rtx_insn *prev, *insn = BB_END (bb); bool insn_clobbered = false; - while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn)) + while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn)) insn = PREV_INSN (insn); /* Find the last cbranchsi4_insn in basic block BB. */ - if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) + if (insn == BB_HEAD (bb) + || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) continue; /* Get the register with which we are comparing. */ @@ -16783,9 +17263,20 @@ thumb2_reorg (void) compute_bb_for_insn (); df_analyze (); + enum Convert_Action {SKIP, CONV, SWAP_CONV}; + FOR_EACH_BB_FN (bb, cfun) { - rtx insn; + if (current_tune->disparage_flag_setting_t16_encodings + && optimize_bb_for_speed_p (bb)) + continue; + + rtx_insn *insn; + Convert_Action action = SKIP; + Convert_Action action_for_partial_flag_setting + = (current_tune->disparage_partial_flag_setting_t16_encodings + && optimize_bb_for_speed_p (bb)) + ? SKIP : CONV; COPY_REG_SET (&live, DF_LR_OUT (bb)); df_simulate_initialize_backwards (bb, &live); @@ -16795,7 +17286,7 @@ thumb2_reorg (void) && !REGNO_REG_SET_P (&live, CC_REGNUM) && GET_CODE (PATTERN (insn)) == SET) { - enum {SKIP, CONV, SWAP_CONV} action = SKIP; + action = SKIP; rtx pat = PATTERN (insn); rtx dst = XEXP (pat, 0); rtx src = XEXP (pat, 1); @@ -16876,10 +17367,11 @@ thumb2_reorg (void) /* ANDS <Rdn>,<Rm> */ if (rtx_equal_p (dst, op0) && low_register_operand (op1, SImode)) - action = CONV; + action = action_for_partial_flag_setting; else if (rtx_equal_p (dst, op1) && low_register_operand (op0, SImode)) - action = SWAP_CONV; + action = action_for_partial_flag_setting == SKIP + ? SKIP : SWAP_CONV; break; case ASHIFTRT: @@ -16890,26 +17382,30 @@ thumb2_reorg (void) /* LSLS <Rdn>,<Rm> */ if (rtx_equal_p (dst, op0) && low_register_operand (op1, SImode)) - action = CONV; + action = action_for_partial_flag_setting; /* ASRS <Rd>,<Rm>,#<imm5> */ /* LSRS <Rd>,<Rm>,#<imm5> */ /* LSLS <Rd>,<Rm>,#<imm5> */ else if (low_register_operand (op0, SImode) && CONST_INT_P (op1) && IN_RANGE (INTVAL (op1), 0, 31)) - action = CONV; + action = action_for_partial_flag_setting; break; case ROTATERT: /* RORS <Rdn>,<Rm> */ if (rtx_equal_p (dst, op0) && low_register_operand (op1, SImode)) - action = CONV; + action = action_for_partial_flag_setting; break; case NOT: - case NEG: /* MVNS <Rd>,<Rm> */ + if (low_register_operand (op0, SImode)) + action = action_for_partial_flag_setting; + break; + + case NEG: /* NEGS <Rd>,<Rm> (a.k.a RSBS) */ if (low_register_operand (op0, SImode)) action = CONV; @@ -16919,7 +17415,7 @@ thumb2_reorg (void) /* MOVS <Rd>,#<imm8> */ if (CONST_INT_P (src) && IN_RANGE (INTVAL (src), 0, 255)) - action = CONV; + action = action_for_partial_flag_setting; break; case REG: @@ -16969,7 +17465,7 @@ thumb2_reorg (void) static void arm_reorg (void) { - rtx insn; + rtx_insn *insn; HOST_WIDE_INT address = 0; Mfix * fix; @@ -17000,7 +17496,7 @@ arm_reorg (void) push_minipool_barrier (insn, address); else if (INSN_P (insn)) { - rtx table; + rtx_jump_table_data *table; note_invalid_constants (insn, address, true); address += get_attr_length (insn); @@ -17132,7 +17628,7 @@ arm_reorg (void) /* From now on we must synthesize any constants that we can't handle directly. This can happen if the RTL gets split during final instruction generation. */ - after_arm_reorg = 1; + cfun->machine->after_arm_reorg = 1; /* Free the minipool memory. */ obstack_free (&minipool_obstack, minipool_startobj); @@ -17140,24 +17636,7 @@ arm_reorg (void) /* Routines to output assembly language. */ -/* If the rtx is the correct value then return the string of the number. - In this way we can ensure that valid double constants are generated even - when cross compiling. */ -const char * -fp_immediate_constant (rtx x) -{ - REAL_VALUE_TYPE r; - - if (!fp_consts_inited) - init_fp_table (); - - REAL_VALUE_FROM_CONST_DOUBLE (r, x); - - gcc_assert (REAL_VALUES_EQUAL (r, value_fp0)); - return "0"; -} - -/* As for fp_immediate_constant, but value is passed directly, not in rtx. */ +/* Return string representation of passed in real value. */ static const char * fp_const_from_val (REAL_VALUE_TYPE *r) { @@ -17248,14 +17727,22 @@ arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse, /* Output the assembly for a store multiple. */ const char * -vfp_output_fstmd (rtx * operands) +vfp_output_vstmd (rtx * operands) { char pattern[100]; int p; int base; int i; + rtx addr_reg = REG_P (XEXP (operands[0], 0)) + ? XEXP (operands[0], 0) + : XEXP (XEXP (operands[0], 0), 0); + bool push_p = REGNO (addr_reg) == SP_REGNUM; + + if (push_p) + strcpy (pattern, "vpush%?.64\t{%P1"); + else + strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1"); - strcpy (pattern, "fstmfdd%?\t%m0!, {%P1"); p = strlen (pattern); gcc_assert (REG_P (operands[1])); @@ -17364,7 +17851,7 @@ vfp_emit_fstmd (int base_reg, int count) the call target. */ void -arm_emit_call_insn (rtx pat, rtx addr) +arm_emit_call_insn (rtx pat, rtx addr, bool sibcall) { rtx insn; @@ -17375,6 +17862,7 @@ arm_emit_call_insn (rtx pat, rtx addr) to the instruction's CALL_INSN_FUNCTION_USAGE. */ if (TARGET_VXWORKS_RTP && flag_pic + && !sibcall && GET_CODE (addr) == SYMBOL_REF && (SYMBOL_REF_DECL (addr) ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) @@ -17383,6 +17871,16 @@ arm_emit_call_insn (rtx pat, rtx addr) require_pic_register (); use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg); } + + if (TARGET_AAPCS_BASED) + { + /* For AAPCS, IP and CC can be clobbered by veneers inserted by the + linker. We need to add an IP clobber to allow setting + TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber + is not needed since it's a fixed register. */ + rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn); + clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM)); + } } /* Output a 'call' insn. */ @@ -17713,12 +18211,9 @@ output_move_double (rtx *operands, bool emit, int *count) if (reg_overlap_mentioned_p (operands[0], otherops[2])) { - rtx tmp; /* Swap base and index registers over to avoid a conflict. */ - tmp = otherops[1]; - otherops[1] = otherops[2]; - otherops[2] = tmp; + std::swap (otherops[1], otherops[2]); } /* If both registers conflict, it will usually have been fixed by a splitter. */ @@ -18041,7 +18536,7 @@ output_move_vfp (rtx *operands) int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT; const char *templ; char buff[50]; - enum machine_mode mode; + machine_mode mode; reg = operands[!load]; mem = operands[load]; @@ -18062,19 +18557,19 @@ output_move_vfp (rtx *operands) switch (GET_CODE (addr)) { case PRE_DEC: - templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s"; + templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s"; ops[0] = XEXP (addr, 0); ops[1] = reg; break; case POST_INC: - templ = "f%smia%c%%?\t%%0!, {%%%s1}%s"; + templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s"; ops[0] = XEXP (addr, 0); ops[1] = reg; break; default: - templ = "f%s%c%%?\t%%%s0, %%1%s"; + templ = "v%sr%%?.%s\t%%%s0, %%1%s"; ops[0] = reg; ops[1] = mem; break; @@ -18082,7 +18577,7 @@ output_move_vfp (rtx *operands) sprintf (buff, templ, load ? "ld" : "st", - dp ? 'd' : 's', + dp ? "64" : "32", dp ? "P" : "", integer_p ? "\t%@ int" : ""); output_asm_insn (buff, ops); @@ -18137,7 +18632,7 @@ output_move_neon (rtx *operands) int regno, nregs, load = REG_P (operands[0]); const char *templ; char buff[50]; - enum machine_mode mode; + machine_mode mode; reg = operands[!load]; mem = operands[load]; @@ -18189,6 +18684,20 @@ output_move_neon (rtx *operands) /* FIXME: Not currently enabled in neon_vector_mem_operand. */ gcc_unreachable (); + case REG: + /* We have to use vldm / vstm for too-large modes. */ + if (nregs > 1) + { + if (nregs > 4) + templ = "v%smia%%?\t%%m0, %%h1"; + else + templ = "v%s1.64\t%%h1, %%A0"; + + ops[0] = mem; + ops[1] = reg; + break; + } + /* Fall through. */ case LABEL_REF: case PLUS: { @@ -18222,14 +18731,7 @@ output_move_neon (rtx *operands) } default: - /* We have to use vldm / vstm for too-large modes. */ - if (nregs > 4) - templ = "v%smia%%?\t%%m0, %%h1"; - else - templ = "v%s1.64\t%%h1, %%A0"; - - ops[0] = mem; - ops[1] = reg; + gcc_unreachable (); } sprintf (buff, templ, load ? "ld" : "st"); @@ -18241,11 +18743,11 @@ output_move_neon (rtx *operands) /* Compute and return the length of neon_mov<mode>, where <mode> is one of VSTRUCT modes: EI, OI, CI or XI. */ int -arm_attr_length_move_neon (rtx insn) +arm_attr_length_move_neon (rtx_insn *insn) { rtx reg, mem, addr; int load; - enum machine_mode mode; + machine_mode mode; extract_insn_cached (insn); @@ -18292,7 +18794,7 @@ arm_attr_length_move_neon (rtx insn) return zero. */ int -arm_address_offset_is_imm (rtx insn) +arm_address_offset_is_imm (rtx_insn *insn) { rtx mem, addr; @@ -18597,6 +19099,14 @@ output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len) fputs ("\"\n", stream); } +/* Whether a register is callee saved or not. This is necessary because high + registers are marked as caller saved when optimizing for size on Thumb-1 + targets despite being callee saved in order to avoid using them. */ +#define callee_saved_reg_p(reg) \ + (!call_used_regs[reg] \ + || (TARGET_THUMB1 && optimize_size \ + && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM)) + /* Compute the register save mask for registers 0 through 12 inclusive. This code is used by arm_compute_save_reg_mask. */ @@ -18657,7 +19167,7 @@ arm_compute_save_reg0_reg12_mask (void) /* In the normal case we only need to save those registers which are call saved and which are used by this function. */ for (reg = 0; reg <= 11; reg++) - if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) + if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg)) save_reg_mask |= (1 << reg); /* Handle the frame pointer as a special case. */ @@ -18762,6 +19272,7 @@ arm_compute_save_reg_mask (void) || (save_reg_mask && optimize_size && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + && !crtl->tail_call_emit && !crtl->calls_eh_return)) save_reg_mask |= 1 << LR_REGNUM; @@ -18820,7 +19331,7 @@ thumb1_compute_save_reg_mask (void) mask = 0; for (reg = 0; reg < 12; reg ++) - if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg)) mask |= 1 << reg; if (flag_pic @@ -18853,7 +19364,7 @@ thumb1_compute_save_reg_mask (void) if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ()) reg = LAST_LO_REGNUM; - if (! call_used_regs[reg]) + if (callee_saved_reg_p (reg)) mask |= 1 << reg; } @@ -19281,9 +19792,6 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, || (cfun->machine->return_used_this_function != 0) || offsets->saved_regs == offsets->outgoing_args || frame_pointer_needed); - - /* Reset the ARM-specific per-function variables. */ - after_arm_reorg = 0; } } @@ -20200,7 +20708,7 @@ arm_emit_ldrd_pop (unsigned long saved_regs_mask) static unsigned arm_size_return_regs (void) { - enum machine_mode mode; + machine_mode mode; if (crtl->return_rtx != 0) mode = GET_MODE (crtl->return_rtx); @@ -20422,30 +20930,47 @@ arm_get_frame_offsets (void) { int reg = -1; + /* Register r3 is caller-saved. Normally it does not need to be + saved on entry by the prologue. However if we choose to save + it for padding then we may confuse the compiler into thinking + a prologue sequence is required when in fact it is not. This + will occur when shrink-wrapping if r3 is used as a scratch + register and there are no other callee-saved writes. + + This situation can be avoided when other callee-saved registers + are available and r3 is not mandatory if we choose a callee-saved + register for padding. */ + bool prefer_callee_reg_p = false; + /* If it is safe to use r3, then do so. This sometimes generates better code on Thumb-2 by avoiding the need to use 32-bit push/pop instructions. */ if (! any_sibcall_could_use_r3 () && arm_size_return_regs () <= 12 && (offsets->saved_regs_mask & (1 << 3)) == 0 - && (TARGET_THUMB2 + && (TARGET_THUMB2 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd))) { reg = 3; + if (!TARGET_THUMB2) + prefer_callee_reg_p = true; + } + if (reg == -1 + || prefer_callee_reg_p) + { + for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) + { + /* Avoid fixed registers; they may be changed at + arbitrary times so it's unsafe to restore them + during the epilogue. */ + if (!fixed_regs[i] + && (offsets->saved_regs_mask & (1 << i)) == 0) + { + reg = i; + break; + } + } } - else - for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) - { - /* Avoid fixed registers; they may be changed at - arbitrary times so it's unsafe to restore them - during the epilogue. */ - if (!fixed_regs[i] - && (offsets->saved_regs_mask & (1 << i)) == 0) - { - reg = i; - break; - } - } if (reg != -1) { @@ -20694,8 +21219,8 @@ arm_expand_prologue (void) Just tell it we saved SP in r0. */ gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0); - r0 = gen_rtx_REG (SImode, 0); - r1 = gen_rtx_REG (SImode, 1); + r0 = gen_rtx_REG (SImode, R0_REGNUM); + r1 = gen_rtx_REG (SImode, R1_REGNUM); insn = emit_insn (gen_movsi (r0, stack_pointer_rtx)); RTX_FRAME_RELATED_P (insn) = 1; @@ -20945,7 +21470,7 @@ arm_expand_prologue (void) { /* This add can produce multiple insns for a large constant, so we need to get tricky. */ - rtx last = get_last_insn (); + rtx_insn *last = get_last_insn (); amount = GEN_INT (offsets->saved_args + saved_regs - offsets->outgoing_args); @@ -21035,7 +21560,15 @@ arm_print_condition (FILE *stream) } -/* If CODE is 'd', then the X is a condition operand and the instruction +/* Globally reserved letters: acln + Puncutation letters currently used: @_|?().!# + Lower case letters currently used: bcdefhimpqtvwxyz + Upper case letters currently used: ABCDFGHJKLMNOPQRSTU + Letters previously used, but now deprecated/obsolete: sVWXYZ. + + Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P. + + If CODE is 'd', then the X is a condition operand and the instruction should only be executed if the condition is true. if CODE is 'D', then the X is a condition operand and the instruction should only be executed if the condition is false: however, if the mode @@ -21175,6 +21708,19 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; + case 'b': + /* Print the log2 of a CONST_INT. */ + { + HOST_WIDE_INT val; + + if (!CONST_INT_P (x) + || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0) + output_operand_lossage ("Unsupported operand for code '%c'", code); + else + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); + } + return; + case 'L': /* The low 16 bits of an immediate constant. */ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff); @@ -21256,7 +21802,7 @@ arm_print_operand (FILE *stream, rtx x, int code) case 'R': if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) { - enum machine_mode mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); rtx part; if (mode == VOIDmode) @@ -21417,7 +21963,7 @@ arm_print_operand (FILE *stream, rtx x, int code) register. */ case 'p': { - int mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); int regno; if (GET_MODE_SIZE (mode) != 8 || !REG_P (x)) @@ -21441,7 +21987,7 @@ arm_print_operand (FILE *stream, rtx x, int code) case 'P': case 'q': { - int mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); int is_quad = (code == 'q'); int regno; @@ -21477,7 +22023,7 @@ arm_print_operand (FILE *stream, rtx x, int code) case 'e': case 'f': { - int mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); int regno; if ((GET_MODE_SIZE (mode) != 16 @@ -21559,6 +22105,7 @@ arm_print_operand (FILE *stream, rtx x, int code) { rtx addr; bool postinc = FALSE; + rtx postinc_reg = NULL; unsigned align, memsize, align_bits; gcc_assert (MEM_P (x)); @@ -21568,6 +22115,11 @@ arm_print_operand (FILE *stream, rtx x, int code) postinc = 1; addr = XEXP (addr, 0); } + if (GET_CODE (addr) == POST_MODIFY) + { + postinc_reg = XEXP( XEXP (addr, 1), 1); + addr = XEXP (addr, 0); + } asm_fprintf (stream, "[%r", REGNO (addr)); /* We know the alignment of this access, so we can emit a hint in the @@ -21593,6 +22145,8 @@ arm_print_operand (FILE *stream, rtx x, int code) if (postinc) fputs("!", stream); + if (postinc_reg) + asm_fprintf (stream, ", %r", REGNO (postinc_reg)); } return; @@ -21610,7 +22164,7 @@ arm_print_operand (FILE *stream, rtx x, int code) /* Translate an S register number into a D register number and element index. */ case 'y': { - int mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); int regno; if (GET_MODE_SIZE (mode) != 4 || !REG_P (x)) @@ -21644,7 +22198,7 @@ arm_print_operand (FILE *stream, rtx x, int code) number into a D register number and element index. */ case 'z': { - int mode = GET_MODE (x); + machine_mode mode = GET_MODE (x); int regno; if (GET_MODE_SIZE (mode) != 2 || !REG_P (x)) @@ -21684,15 +22238,12 @@ arm_print_operand (FILE *stream, rtx x, int code) break; case CONST_DOUBLE: - if (TARGET_NEON) - { - char fpstr[20]; - real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x), - sizeof (fpstr), 0, 1); - fprintf (stream, "#%s", fpstr); - } - else - fprintf (stream, "#%s", fp_immediate_constant (x)); + { + char fpstr[20]; + real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x), + sizeof (fpstr), 0, 1); + fprintf (stream, "#%s", fpstr); + } break; default: @@ -21731,9 +22282,7 @@ arm_print_operand_address (FILE *stream, rtx x) /* Ensure that BASE is a register. */ /* (one of them must be). */ /* Also ensure the SP is not used as in index register. */ - rtx temp = base; - base = index; - index = temp; + std::swap (base, index); } switch (GET_CODE (index)) { @@ -21772,7 +22321,7 @@ arm_print_operand_address (FILE *stream, rtx x) else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC) { - extern enum machine_mode output_memory_reference_mode; + extern machine_mode output_memory_reference_mode; gcc_assert (REG_P (XEXP (x, 0))); @@ -21849,7 +22398,7 @@ arm_print_operand_punct_valid_p (unsigned char code) static bool arm_assemble_integer (rtx x, unsigned int size, int aligned_p) { - enum machine_mode mode; + machine_mode mode; if (size == UNITS_PER_WORD && aligned_p) { @@ -22005,7 +22554,7 @@ arm_elf_asm_destructor (rtx symbol, int priority) enum arm_cond_code maybe_get_arm_condition_code (rtx comparison) { - enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); + machine_mode mode = GET_MODE (XEXP (comparison, 0)); enum arm_cond_code code; enum rtx_code comp_code = GET_CODE (comparison); @@ -22160,9 +22709,9 @@ get_arm_condition_code (rtx comparison) /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed instructions. */ void -thumb2_final_prescan_insn (rtx insn) +thumb2_final_prescan_insn (rtx_insn *insn) { - rtx first_insn = insn; + rtx_insn *first_insn = insn; rtx body = PATTERN (insn); rtx predicate; enum arm_cond_code code; @@ -22244,7 +22793,7 @@ thumb2_final_prescan_insn (rtx insn) } void -arm_final_prescan_insn (rtx insn) +arm_final_prescan_insn (rtx_insn *insn) { /* BODY will hold the body of INSN. */ rtx body = PATTERN (insn); @@ -22259,7 +22808,7 @@ arm_final_prescan_insn (rtx insn) /* START_INSN will hold the insn from where we start looking. This is the first insn after the following code_label if REVERSE is true. */ - rtx start_insn = insn; + rtx_insn *start_insn = insn; /* If in state 4, check if the target branch is reached, in order to change back to state 0. */ @@ -22331,7 +22880,8 @@ arm_final_prescan_insn (rtx insn) int fail = FALSE, succeed = FALSE; /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */ int then_not_else = TRUE; - rtx this_insn = start_insn, label = 0; + rtx_insn *this_insn = start_insn; + rtx label = 0; /* Register the insn jumped to. */ if (reverse) @@ -22553,13 +23103,16 @@ thumb2_asm_output_opcode (FILE * stream) /* Returns true if REGNO is a valid register for holding a quantity of type MODE. */ int -arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) +arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode) { if (GET_MODE_CLASS (mode) == MODE_CC) return (regno == CC_REGNUM || (TARGET_HARD_FLOAT && TARGET_VFP && regno == VFPCC_REGNUM)); + if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC) + return false; + if (TARGET_THUMB1) /* For the Thumb we only allow values bigger than SImode in registers 0 - 6, so that there is always a second low @@ -22605,12 +23158,19 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) } /* We allow almost any value to be stored in the general registers. - Restrict doubleword quantities to even register pairs so that we can - use ldrd. Do not allow very large Neon structure opaque modes in - general registers; they would use too many. */ + Restrict doubleword quantities to even register pairs in ARM state + so that we can use ldrd. Do not allow very large Neon structure + opaque modes in general registers; they would use too many. */ if (regno <= LAST_ARM_REGNUM) - return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) - && ARM_NUM_REGS (mode) <= 4; + { + if (ARM_NUM_REGS (mode) > 4) + return FALSE; + + if (TARGET_THUMB2) + return TRUE; + + return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0); + } if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM) @@ -22623,7 +23183,7 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) /* Implement MODES_TIEABLE_P. */ bool -arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) +arm_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)) return true; @@ -22649,6 +23209,9 @@ arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) enum reg_class arm_regno_class (int regno) { + if (regno == PC_REGNUM) + return NO_REGS; + if (TARGET_THUMB1) { if (regno == STACK_POINTER_REGNUM) @@ -22695,7 +23258,7 @@ arm_regno_class (int regno) int arm_debugger_arg_offset (int value, rtx addr) { - rtx insn; + rtx_insn *insn; /* We are only interested if dbxout_parms() failed to compute the offset. */ if (value != 0) @@ -22774,1769 +23337,6 @@ arm_debugger_arg_offset (int value, rtx addr) return value; } -typedef enum { - T_V8QI, - T_V4HI, - T_V4HF, - T_V2SI, - T_V2SF, - T_DI, - T_V16QI, - T_V8HI, - T_V4SI, - T_V4SF, - T_V2DI, - T_TI, - T_EI, - T_OI, - T_MAX /* Size of enum. Keep last. */ -} neon_builtin_type_mode; - -#define TYPE_MODE_BIT(X) (1 << (X)) - -#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \ - | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \ - | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI)) -#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \ - | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \ - | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI)) - -#define v8qi_UP T_V8QI -#define v4hi_UP T_V4HI -#define v4hf_UP T_V4HF -#define v2si_UP T_V2SI -#define v2sf_UP T_V2SF -#define di_UP T_DI -#define v16qi_UP T_V16QI -#define v8hi_UP T_V8HI -#define v4si_UP T_V4SI -#define v4sf_UP T_V4SF -#define v2di_UP T_V2DI -#define ti_UP T_TI -#define ei_UP T_EI -#define oi_UP T_OI - -#define UP(X) X##_UP - -typedef enum { - NEON_BINOP, - NEON_TERNOP, - NEON_UNOP, - NEON_GETLANE, - NEON_SETLANE, - NEON_CREATE, - NEON_RINT, - NEON_DUP, - NEON_DUPLANE, - NEON_COMBINE, - NEON_SPLIT, - NEON_LANEMUL, - NEON_LANEMULL, - NEON_LANEMULH, - NEON_LANEMAC, - NEON_SCALARMUL, - NEON_SCALARMULL, - NEON_SCALARMULH, - NEON_SCALARMAC, - NEON_CONVERT, - NEON_FLOAT_WIDEN, - NEON_FLOAT_NARROW, - NEON_FIXCONV, - NEON_SELECT, - NEON_RESULTPAIR, - NEON_REINTERP, - NEON_VTBL, - NEON_VTBX, - NEON_LOAD1, - NEON_LOAD1LANE, - NEON_STORE1, - NEON_STORE1LANE, - NEON_LOADSTRUCT, - NEON_LOADSTRUCTLANE, - NEON_STORESTRUCT, - NEON_STORESTRUCTLANE, - NEON_LOGICBINOP, - NEON_SHIFTINSERT, - NEON_SHIFTIMM, - NEON_SHIFTACC -} neon_itype; - -typedef struct { - const char *name; - const neon_itype itype; - const neon_builtin_type_mode mode; - const enum insn_code code; - unsigned int fcode; -} neon_builtin_datum; - -#define CF(N,X) CODE_FOR_neon_##N##X - -#define VAR1(T, N, A) \ - {#N, NEON_##T, UP (A), CF (N, A), 0} -#define VAR2(T, N, A, B) \ - VAR1 (T, N, A), \ - {#N, NEON_##T, UP (B), CF (N, B), 0} -#define VAR3(T, N, A, B, C) \ - VAR2 (T, N, A, B), \ - {#N, NEON_##T, UP (C), CF (N, C), 0} -#define VAR4(T, N, A, B, C, D) \ - VAR3 (T, N, A, B, C), \ - {#N, NEON_##T, UP (D), CF (N, D), 0} -#define VAR5(T, N, A, B, C, D, E) \ - VAR4 (T, N, A, B, C, D), \ - {#N, NEON_##T, UP (E), CF (N, E), 0} -#define VAR6(T, N, A, B, C, D, E, F) \ - VAR5 (T, N, A, B, C, D, E), \ - {#N, NEON_##T, UP (F), CF (N, F), 0} -#define VAR7(T, N, A, B, C, D, E, F, G) \ - VAR6 (T, N, A, B, C, D, E, F), \ - {#N, NEON_##T, UP (G), CF (N, G), 0} -#define VAR8(T, N, A, B, C, D, E, F, G, H) \ - VAR7 (T, N, A, B, C, D, E, F, G), \ - {#N, NEON_##T, UP (H), CF (N, H), 0} -#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ - VAR8 (T, N, A, B, C, D, E, F, G, H), \ - {#N, NEON_##T, UP (I), CF (N, I), 0} -#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ - VAR9 (T, N, A, B, C, D, E, F, G, H, I), \ - {#N, NEON_##T, UP (J), CF (N, J), 0} - -/* The NEON builtin data can be found in arm_neon_builtins.def. - The mode entries in the following table correspond to the "key" type of the - instruction variant, i.e. equivalent to that which would be specified after - the assembler mnemonic, which usually refers to the last vector operand. - (Signed/unsigned/polynomial types are not differentiated between though, and - are all mapped onto the same mode for a given element size.) The modes - listed per instruction should be the same as those defined for that - instruction's pattern in neon.md. */ - -static neon_builtin_datum neon_builtin_data[] = -{ -#include "arm_neon_builtins.def" -}; - -#undef CF -#undef VAR1 -#undef VAR2 -#undef VAR3 -#undef VAR4 -#undef VAR5 -#undef VAR6 -#undef VAR7 -#undef VAR8 -#undef VAR9 -#undef VAR10 - -#define CF(N,X) ARM_BUILTIN_NEON_##N##X -#define VAR1(T, N, A) \ - CF (N, A) -#define VAR2(T, N, A, B) \ - VAR1 (T, N, A), \ - CF (N, B) -#define VAR3(T, N, A, B, C) \ - VAR2 (T, N, A, B), \ - CF (N, C) -#define VAR4(T, N, A, B, C, D) \ - VAR3 (T, N, A, B, C), \ - CF (N, D) -#define VAR5(T, N, A, B, C, D, E) \ - VAR4 (T, N, A, B, C, D), \ - CF (N, E) -#define VAR6(T, N, A, B, C, D, E, F) \ - VAR5 (T, N, A, B, C, D, E), \ - CF (N, F) -#define VAR7(T, N, A, B, C, D, E, F, G) \ - VAR6 (T, N, A, B, C, D, E, F), \ - CF (N, G) -#define VAR8(T, N, A, B, C, D, E, F, G, H) \ - VAR7 (T, N, A, B, C, D, E, F, G), \ - CF (N, H) -#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ - VAR8 (T, N, A, B, C, D, E, F, G, H), \ - CF (N, I) -#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ - VAR9 (T, N, A, B, C, D, E, F, G, H, I), \ - CF (N, J) -enum arm_builtins -{ - ARM_BUILTIN_GETWCGR0, - ARM_BUILTIN_GETWCGR1, - ARM_BUILTIN_GETWCGR2, - ARM_BUILTIN_GETWCGR3, - - ARM_BUILTIN_SETWCGR0, - ARM_BUILTIN_SETWCGR1, - ARM_BUILTIN_SETWCGR2, - ARM_BUILTIN_SETWCGR3, - - ARM_BUILTIN_WZERO, - - ARM_BUILTIN_WAVG2BR, - ARM_BUILTIN_WAVG2HR, - ARM_BUILTIN_WAVG2B, - ARM_BUILTIN_WAVG2H, - - ARM_BUILTIN_WACCB, - ARM_BUILTIN_WACCH, - ARM_BUILTIN_WACCW, - - ARM_BUILTIN_WMACS, - ARM_BUILTIN_WMACSZ, - ARM_BUILTIN_WMACU, - ARM_BUILTIN_WMACUZ, - - ARM_BUILTIN_WSADB, - ARM_BUILTIN_WSADBZ, - ARM_BUILTIN_WSADH, - ARM_BUILTIN_WSADHZ, - - ARM_BUILTIN_WALIGNI, - ARM_BUILTIN_WALIGNR0, - ARM_BUILTIN_WALIGNR1, - ARM_BUILTIN_WALIGNR2, - ARM_BUILTIN_WALIGNR3, - - ARM_BUILTIN_TMIA, - ARM_BUILTIN_TMIAPH, - ARM_BUILTIN_TMIABB, - ARM_BUILTIN_TMIABT, - ARM_BUILTIN_TMIATB, - ARM_BUILTIN_TMIATT, - - ARM_BUILTIN_TMOVMSKB, - ARM_BUILTIN_TMOVMSKH, - ARM_BUILTIN_TMOVMSKW, - - ARM_BUILTIN_TBCSTB, - ARM_BUILTIN_TBCSTH, - ARM_BUILTIN_TBCSTW, - - ARM_BUILTIN_WMADDS, - ARM_BUILTIN_WMADDU, - - ARM_BUILTIN_WPACKHSS, - ARM_BUILTIN_WPACKWSS, - ARM_BUILTIN_WPACKDSS, - ARM_BUILTIN_WPACKHUS, - ARM_BUILTIN_WPACKWUS, - ARM_BUILTIN_WPACKDUS, - - ARM_BUILTIN_WADDB, - ARM_BUILTIN_WADDH, - ARM_BUILTIN_WADDW, - ARM_BUILTIN_WADDSSB, - ARM_BUILTIN_WADDSSH, - ARM_BUILTIN_WADDSSW, - ARM_BUILTIN_WADDUSB, - ARM_BUILTIN_WADDUSH, - ARM_BUILTIN_WADDUSW, - ARM_BUILTIN_WSUBB, - ARM_BUILTIN_WSUBH, - ARM_BUILTIN_WSUBW, - ARM_BUILTIN_WSUBSSB, - ARM_BUILTIN_WSUBSSH, - ARM_BUILTIN_WSUBSSW, - ARM_BUILTIN_WSUBUSB, - ARM_BUILTIN_WSUBUSH, - ARM_BUILTIN_WSUBUSW, - - ARM_BUILTIN_WAND, - ARM_BUILTIN_WANDN, - ARM_BUILTIN_WOR, - ARM_BUILTIN_WXOR, - - ARM_BUILTIN_WCMPEQB, - ARM_BUILTIN_WCMPEQH, - ARM_BUILTIN_WCMPEQW, - ARM_BUILTIN_WCMPGTUB, - ARM_BUILTIN_WCMPGTUH, - ARM_BUILTIN_WCMPGTUW, - ARM_BUILTIN_WCMPGTSB, - ARM_BUILTIN_WCMPGTSH, - ARM_BUILTIN_WCMPGTSW, - - ARM_BUILTIN_TEXTRMSB, - ARM_BUILTIN_TEXTRMSH, - ARM_BUILTIN_TEXTRMSW, - ARM_BUILTIN_TEXTRMUB, - ARM_BUILTIN_TEXTRMUH, - ARM_BUILTIN_TEXTRMUW, - ARM_BUILTIN_TINSRB, - ARM_BUILTIN_TINSRH, - ARM_BUILTIN_TINSRW, - - ARM_BUILTIN_WMAXSW, - ARM_BUILTIN_WMAXSH, - ARM_BUILTIN_WMAXSB, - ARM_BUILTIN_WMAXUW, - ARM_BUILTIN_WMAXUH, - ARM_BUILTIN_WMAXUB, - ARM_BUILTIN_WMINSW, - ARM_BUILTIN_WMINSH, - ARM_BUILTIN_WMINSB, - ARM_BUILTIN_WMINUW, - ARM_BUILTIN_WMINUH, - ARM_BUILTIN_WMINUB, - - ARM_BUILTIN_WMULUM, - ARM_BUILTIN_WMULSM, - ARM_BUILTIN_WMULUL, - - ARM_BUILTIN_PSADBH, - ARM_BUILTIN_WSHUFH, - - ARM_BUILTIN_WSLLH, - ARM_BUILTIN_WSLLW, - ARM_BUILTIN_WSLLD, - ARM_BUILTIN_WSRAH, - ARM_BUILTIN_WSRAW, - ARM_BUILTIN_WSRAD, - ARM_BUILTIN_WSRLH, - ARM_BUILTIN_WSRLW, - ARM_BUILTIN_WSRLD, - ARM_BUILTIN_WRORH, - ARM_BUILTIN_WRORW, - ARM_BUILTIN_WRORD, - ARM_BUILTIN_WSLLHI, - ARM_BUILTIN_WSLLWI, - ARM_BUILTIN_WSLLDI, - ARM_BUILTIN_WSRAHI, - ARM_BUILTIN_WSRAWI, - ARM_BUILTIN_WSRADI, - ARM_BUILTIN_WSRLHI, - ARM_BUILTIN_WSRLWI, - ARM_BUILTIN_WSRLDI, - ARM_BUILTIN_WRORHI, - ARM_BUILTIN_WRORWI, - ARM_BUILTIN_WRORDI, - - ARM_BUILTIN_WUNPCKIHB, - ARM_BUILTIN_WUNPCKIHH, - ARM_BUILTIN_WUNPCKIHW, - ARM_BUILTIN_WUNPCKILB, - ARM_BUILTIN_WUNPCKILH, - ARM_BUILTIN_WUNPCKILW, - - ARM_BUILTIN_WUNPCKEHSB, - ARM_BUILTIN_WUNPCKEHSH, - ARM_BUILTIN_WUNPCKEHSW, - ARM_BUILTIN_WUNPCKEHUB, - ARM_BUILTIN_WUNPCKEHUH, - ARM_BUILTIN_WUNPCKEHUW, - ARM_BUILTIN_WUNPCKELSB, - ARM_BUILTIN_WUNPCKELSH, - ARM_BUILTIN_WUNPCKELSW, - ARM_BUILTIN_WUNPCKELUB, - ARM_BUILTIN_WUNPCKELUH, - ARM_BUILTIN_WUNPCKELUW, - - ARM_BUILTIN_WABSB, - ARM_BUILTIN_WABSH, - ARM_BUILTIN_WABSW, - - ARM_BUILTIN_WADDSUBHX, - ARM_BUILTIN_WSUBADDHX, - - ARM_BUILTIN_WABSDIFFB, - ARM_BUILTIN_WABSDIFFH, - ARM_BUILTIN_WABSDIFFW, - - ARM_BUILTIN_WADDCH, - ARM_BUILTIN_WADDCW, - - ARM_BUILTIN_WAVG4, - ARM_BUILTIN_WAVG4R, - - ARM_BUILTIN_WMADDSX, - ARM_BUILTIN_WMADDUX, - - ARM_BUILTIN_WMADDSN, - ARM_BUILTIN_WMADDUN, - - ARM_BUILTIN_WMULWSM, - ARM_BUILTIN_WMULWUM, - - ARM_BUILTIN_WMULWSMR, - ARM_BUILTIN_WMULWUMR, - - ARM_BUILTIN_WMULWL, - - ARM_BUILTIN_WMULSMR, - ARM_BUILTIN_WMULUMR, - - ARM_BUILTIN_WQMULM, - ARM_BUILTIN_WQMULMR, - - ARM_BUILTIN_WQMULWM, - ARM_BUILTIN_WQMULWMR, - - ARM_BUILTIN_WADDBHUSM, - ARM_BUILTIN_WADDBHUSL, - - ARM_BUILTIN_WQMIABB, - ARM_BUILTIN_WQMIABT, - ARM_BUILTIN_WQMIATB, - ARM_BUILTIN_WQMIATT, - - ARM_BUILTIN_WQMIABBN, - ARM_BUILTIN_WQMIABTN, - ARM_BUILTIN_WQMIATBN, - ARM_BUILTIN_WQMIATTN, - - ARM_BUILTIN_WMIABB, - ARM_BUILTIN_WMIABT, - ARM_BUILTIN_WMIATB, - ARM_BUILTIN_WMIATT, - - ARM_BUILTIN_WMIABBN, - ARM_BUILTIN_WMIABTN, - ARM_BUILTIN_WMIATBN, - ARM_BUILTIN_WMIATTN, - - ARM_BUILTIN_WMIAWBB, - ARM_BUILTIN_WMIAWBT, - ARM_BUILTIN_WMIAWTB, - ARM_BUILTIN_WMIAWTT, - - ARM_BUILTIN_WMIAWBBN, - ARM_BUILTIN_WMIAWBTN, - ARM_BUILTIN_WMIAWTBN, - ARM_BUILTIN_WMIAWTTN, - - ARM_BUILTIN_WMERGE, - - ARM_BUILTIN_CRC32B, - ARM_BUILTIN_CRC32H, - ARM_BUILTIN_CRC32W, - ARM_BUILTIN_CRC32CB, - ARM_BUILTIN_CRC32CH, - ARM_BUILTIN_CRC32CW, - -#undef CRYPTO1 -#undef CRYPTO2 -#undef CRYPTO3 - -#define CRYPTO1(L, U, M1, M2) \ - ARM_BUILTIN_CRYPTO_##U, -#define CRYPTO2(L, U, M1, M2, M3) \ - ARM_BUILTIN_CRYPTO_##U, -#define CRYPTO3(L, U, M1, M2, M3, M4) \ - ARM_BUILTIN_CRYPTO_##U, - -#include "crypto.def" - -#undef CRYPTO1 -#undef CRYPTO2 -#undef CRYPTO3 - -#include "arm_neon_builtins.def" - - ,ARM_BUILTIN_MAX -}; - -#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data)) - -#undef CF -#undef VAR1 -#undef VAR2 -#undef VAR3 -#undef VAR4 -#undef VAR5 -#undef VAR6 -#undef VAR7 -#undef VAR8 -#undef VAR9 -#undef VAR10 - -static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX]; - -#define NUM_DREG_TYPES 5 -#define NUM_QREG_TYPES 6 - -static void -arm_init_neon_builtins (void) -{ - unsigned int i, fcode; - tree decl; - - tree neon_intQI_type_node; - tree neon_intHI_type_node; - tree neon_floatHF_type_node; - tree neon_polyQI_type_node; - tree neon_polyHI_type_node; - tree neon_intSI_type_node; - tree neon_intDI_type_node; - tree neon_intUTI_type_node; - tree neon_float_type_node; - - tree intQI_pointer_node; - tree intHI_pointer_node; - tree intSI_pointer_node; - tree intDI_pointer_node; - tree float_pointer_node; - - tree const_intQI_node; - tree const_intHI_node; - tree const_intSI_node; - tree const_intDI_node; - tree const_float_node; - - tree const_intQI_pointer_node; - tree const_intHI_pointer_node; - tree const_intSI_pointer_node; - tree const_intDI_pointer_node; - tree const_float_pointer_node; - - tree V8QI_type_node; - tree V4HI_type_node; - tree V4HF_type_node; - tree V2SI_type_node; - tree V2SF_type_node; - tree V16QI_type_node; - tree V8HI_type_node; - tree V4SI_type_node; - tree V4SF_type_node; - tree V2DI_type_node; - - tree intUQI_type_node; - tree intUHI_type_node; - tree intUSI_type_node; - tree intUDI_type_node; - - tree intEI_type_node; - tree intOI_type_node; - tree intCI_type_node; - tree intXI_type_node; - - tree V8QI_pointer_node; - tree V4HI_pointer_node; - tree V2SI_pointer_node; - tree V2SF_pointer_node; - tree V16QI_pointer_node; - tree V8HI_pointer_node; - tree V4SI_pointer_node; - tree V4SF_pointer_node; - tree V2DI_pointer_node; - - tree void_ftype_pv8qi_v8qi_v8qi; - tree void_ftype_pv4hi_v4hi_v4hi; - tree void_ftype_pv2si_v2si_v2si; - tree void_ftype_pv2sf_v2sf_v2sf; - tree void_ftype_pdi_di_di; - tree void_ftype_pv16qi_v16qi_v16qi; - tree void_ftype_pv8hi_v8hi_v8hi; - tree void_ftype_pv4si_v4si_v4si; - tree void_ftype_pv4sf_v4sf_v4sf; - tree void_ftype_pv2di_v2di_v2di; - - tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES]; - tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES]; - tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES]; - - /* Create distinguished type nodes for NEON vector element types, - and pointers to values of such types, so we can detect them later. */ - neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); - neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode)); - neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); - neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode)); - neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode)); - neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode)); - neon_float_type_node = make_node (REAL_TYPE); - TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE; - layout_type (neon_float_type_node); - neon_floatHF_type_node = make_node (REAL_TYPE); - TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode); - layout_type (neon_floatHF_type_node); - - /* Define typedefs which exactly correspond to the modes we are basing vector - types on. If you change these names you'll need to change - the table used by arm_mangle_type too. */ - (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node, - "__builtin_neon_qi"); - (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, - "__builtin_neon_hi"); - (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node, - "__builtin_neon_hf"); - (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, - "__builtin_neon_si"); - (*lang_hooks.types.register_builtin_type) (neon_float_type_node, - "__builtin_neon_sf"); - (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node, - "__builtin_neon_di"); - (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node, - "__builtin_neon_poly8"); - (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node, - "__builtin_neon_poly16"); - - intQI_pointer_node = build_pointer_type (neon_intQI_type_node); - intHI_pointer_node = build_pointer_type (neon_intHI_type_node); - intSI_pointer_node = build_pointer_type (neon_intSI_type_node); - intDI_pointer_node = build_pointer_type (neon_intDI_type_node); - float_pointer_node = build_pointer_type (neon_float_type_node); - - /* Next create constant-qualified versions of the above types. */ - const_intQI_node = build_qualified_type (neon_intQI_type_node, - TYPE_QUAL_CONST); - const_intHI_node = build_qualified_type (neon_intHI_type_node, - TYPE_QUAL_CONST); - const_intSI_node = build_qualified_type (neon_intSI_type_node, - TYPE_QUAL_CONST); - const_intDI_node = build_qualified_type (neon_intDI_type_node, - TYPE_QUAL_CONST); - const_float_node = build_qualified_type (neon_float_type_node, - TYPE_QUAL_CONST); - - const_intQI_pointer_node = build_pointer_type (const_intQI_node); - const_intHI_pointer_node = build_pointer_type (const_intHI_node); - const_intSI_pointer_node = build_pointer_type (const_intSI_node); - const_intDI_pointer_node = build_pointer_type (const_intDI_node); - const_float_pointer_node = build_pointer_type (const_float_node); - - /* Now create vector types based on our NEON element types. */ - /* 64-bit vectors. */ - V8QI_type_node = - build_vector_type_for_mode (neon_intQI_type_node, V8QImode); - V4HI_type_node = - build_vector_type_for_mode (neon_intHI_type_node, V4HImode); - V4HF_type_node = - build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode); - V2SI_type_node = - build_vector_type_for_mode (neon_intSI_type_node, V2SImode); - V2SF_type_node = - build_vector_type_for_mode (neon_float_type_node, V2SFmode); - /* 128-bit vectors. */ - V16QI_type_node = - build_vector_type_for_mode (neon_intQI_type_node, V16QImode); - V8HI_type_node = - build_vector_type_for_mode (neon_intHI_type_node, V8HImode); - V4SI_type_node = - build_vector_type_for_mode (neon_intSI_type_node, V4SImode); - V4SF_type_node = - build_vector_type_for_mode (neon_float_type_node, V4SFmode); - V2DI_type_node = - build_vector_type_for_mode (neon_intDI_type_node, V2DImode); - - /* Unsigned integer types for various mode sizes. */ - intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); - intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); - intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); - intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); - neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); - - - (*lang_hooks.types.register_builtin_type) (intUQI_type_node, - "__builtin_neon_uqi"); - (*lang_hooks.types.register_builtin_type) (intUHI_type_node, - "__builtin_neon_uhi"); - (*lang_hooks.types.register_builtin_type) (intUSI_type_node, - "__builtin_neon_usi"); - (*lang_hooks.types.register_builtin_type) (intUDI_type_node, - "__builtin_neon_udi"); - (*lang_hooks.types.register_builtin_type) (intUDI_type_node, - "__builtin_neon_poly64"); - (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node, - "__builtin_neon_poly128"); - - /* Opaque integer types for structures of vectors. */ - intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode)); - intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode)); - intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode)); - intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode)); - - (*lang_hooks.types.register_builtin_type) (intTI_type_node, - "__builtin_neon_ti"); - (*lang_hooks.types.register_builtin_type) (intEI_type_node, - "__builtin_neon_ei"); - (*lang_hooks.types.register_builtin_type) (intOI_type_node, - "__builtin_neon_oi"); - (*lang_hooks.types.register_builtin_type) (intCI_type_node, - "__builtin_neon_ci"); - (*lang_hooks.types.register_builtin_type) (intXI_type_node, - "__builtin_neon_xi"); - - /* Pointers to vector types. */ - V8QI_pointer_node = build_pointer_type (V8QI_type_node); - V4HI_pointer_node = build_pointer_type (V4HI_type_node); - V2SI_pointer_node = build_pointer_type (V2SI_type_node); - V2SF_pointer_node = build_pointer_type (V2SF_type_node); - V16QI_pointer_node = build_pointer_type (V16QI_type_node); - V8HI_pointer_node = build_pointer_type (V8HI_type_node); - V4SI_pointer_node = build_pointer_type (V4SI_type_node); - V4SF_pointer_node = build_pointer_type (V4SF_type_node); - V2DI_pointer_node = build_pointer_type (V2DI_type_node); - - /* Operations which return results as pairs. */ - void_ftype_pv8qi_v8qi_v8qi = - build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node, - V8QI_type_node, NULL); - void_ftype_pv4hi_v4hi_v4hi = - build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node, - V4HI_type_node, NULL); - void_ftype_pv2si_v2si_v2si = - build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node, - V2SI_type_node, NULL); - void_ftype_pv2sf_v2sf_v2sf = - build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node, - V2SF_type_node, NULL); - void_ftype_pdi_di_di = - build_function_type_list (void_type_node, intDI_pointer_node, - neon_intDI_type_node, neon_intDI_type_node, NULL); - void_ftype_pv16qi_v16qi_v16qi = - build_function_type_list (void_type_node, V16QI_pointer_node, - V16QI_type_node, V16QI_type_node, NULL); - void_ftype_pv8hi_v8hi_v8hi = - build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node, - V8HI_type_node, NULL); - void_ftype_pv4si_v4si_v4si = - build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node, - V4SI_type_node, NULL); - void_ftype_pv4sf_v4sf_v4sf = - build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node, - V4SF_type_node, NULL); - void_ftype_pv2di_v2di_v2di = - build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node, - V2DI_type_node, NULL); - - if (TARGET_CRYPTO && TARGET_HARD_FLOAT) - { - tree V4USI_type_node = - build_vector_type_for_mode (intUSI_type_node, V4SImode); - - tree V16UQI_type_node = - build_vector_type_for_mode (intUQI_type_node, V16QImode); - - tree v16uqi_ftype_v16uqi - = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE); - - tree v16uqi_ftype_v16uqi_v16uqi - = build_function_type_list (V16UQI_type_node, V16UQI_type_node, - V16UQI_type_node, NULL_TREE); - - tree v4usi_ftype_v4usi - = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE); - - tree v4usi_ftype_v4usi_v4usi - = build_function_type_list (V4USI_type_node, V4USI_type_node, - V4USI_type_node, NULL_TREE); - - tree v4usi_ftype_v4usi_v4usi_v4usi - = build_function_type_list (V4USI_type_node, V4USI_type_node, - V4USI_type_node, V4USI_type_node, NULL_TREE); - - tree uti_ftype_udi_udi - = build_function_type_list (neon_intUTI_type_node, intUDI_type_node, - intUDI_type_node, NULL_TREE); - - #undef CRYPTO1 - #undef CRYPTO2 - #undef CRYPTO3 - #undef C - #undef N - #undef CF - #undef FT1 - #undef FT2 - #undef FT3 - - #define C(U) \ - ARM_BUILTIN_CRYPTO_##U - #define N(L) \ - "__builtin_arm_crypto_"#L - #define FT1(R, A) \ - R##_ftype_##A - #define FT2(R, A1, A2) \ - R##_ftype_##A1##_##A2 - #define FT3(R, A1, A2, A3) \ - R##_ftype_##A1##_##A2##_##A3 - #define CRYPTO1(L, U, R, A) \ - arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \ - C (U), BUILT_IN_MD, \ - NULL, NULL_TREE); - #define CRYPTO2(L, U, R, A1, A2) \ - arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \ - C (U), BUILT_IN_MD, \ - NULL, NULL_TREE); - - #define CRYPTO3(L, U, R, A1, A2, A3) \ - arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \ - C (U), BUILT_IN_MD, \ - NULL, NULL_TREE); - #include "crypto.def" - - #undef CRYPTO1 - #undef CRYPTO2 - #undef CRYPTO3 - #undef C - #undef N - #undef FT1 - #undef FT2 - #undef FT3 - } - dreg_types[0] = V8QI_type_node; - dreg_types[1] = V4HI_type_node; - dreg_types[2] = V2SI_type_node; - dreg_types[3] = V2SF_type_node; - dreg_types[4] = neon_intDI_type_node; - - qreg_types[0] = V16QI_type_node; - qreg_types[1] = V8HI_type_node; - qreg_types[2] = V4SI_type_node; - qreg_types[3] = V4SF_type_node; - qreg_types[4] = V2DI_type_node; - qreg_types[5] = neon_intUTI_type_node; - - for (i = 0; i < NUM_QREG_TYPES; i++) - { - int j; - for (j = 0; j < NUM_QREG_TYPES; j++) - { - if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES) - reinterp_ftype_dreg[i][j] - = build_function_type_list (dreg_types[i], dreg_types[j], NULL); - - reinterp_ftype_qreg[i][j] - = build_function_type_list (qreg_types[i], qreg_types[j], NULL); - } - } - - for (i = 0, fcode = ARM_BUILTIN_NEON_BASE; - i < ARRAY_SIZE (neon_builtin_data); - i++, fcode++) - { - neon_builtin_datum *d = &neon_builtin_data[i]; - - const char* const modenames[] = { - "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di", - "v16qi", "v8hi", "v4si", "v4sf", "v2di", - "ti", "ei", "oi" - }; - char namebuf[60]; - tree ftype = NULL; - int is_load = 0, is_store = 0; - - gcc_assert (ARRAY_SIZE (modenames) == T_MAX); - - d->fcode = fcode; - - switch (d->itype) - { - case NEON_LOAD1: - case NEON_LOAD1LANE: - case NEON_LOADSTRUCT: - case NEON_LOADSTRUCTLANE: - is_load = 1; - /* Fall through. */ - case NEON_STORE1: - case NEON_STORE1LANE: - case NEON_STORESTRUCT: - case NEON_STORESTRUCTLANE: - if (!is_load) - is_store = 1; - /* Fall through. */ - case NEON_UNOP: - case NEON_RINT: - case NEON_BINOP: - case NEON_LOGICBINOP: - case NEON_SHIFTINSERT: - case NEON_TERNOP: - case NEON_GETLANE: - case NEON_SETLANE: - case NEON_CREATE: - case NEON_DUP: - case NEON_DUPLANE: - case NEON_SHIFTIMM: - case NEON_SHIFTACC: - case NEON_COMBINE: - case NEON_SPLIT: - case NEON_CONVERT: - case NEON_FIXCONV: - case NEON_LANEMUL: - case NEON_LANEMULL: - case NEON_LANEMULH: - case NEON_LANEMAC: - case NEON_SCALARMUL: - case NEON_SCALARMULL: - case NEON_SCALARMULH: - case NEON_SCALARMAC: - case NEON_SELECT: - case NEON_VTBL: - case NEON_VTBX: - { - int k; - tree return_type = void_type_node, args = void_list_node; - - /* Build a function type directly from the insn_data for - this builtin. The build_function_type() function takes - care of removing duplicates for us. */ - for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--) - { - tree eltype; - - if (is_load && k == 1) - { - /* Neon load patterns always have the memory - operand in the operand 1 position. */ - gcc_assert (insn_data[d->code].operand[k].predicate - == neon_struct_operand); - - switch (d->mode) - { - case T_V8QI: - case T_V16QI: - eltype = const_intQI_pointer_node; - break; - - case T_V4HI: - case T_V8HI: - eltype = const_intHI_pointer_node; - break; - - case T_V2SI: - case T_V4SI: - eltype = const_intSI_pointer_node; - break; - - case T_V2SF: - case T_V4SF: - eltype = const_float_pointer_node; - break; - - case T_DI: - case T_V2DI: - eltype = const_intDI_pointer_node; - break; - - default: gcc_unreachable (); - } - } - else if (is_store && k == 0) - { - /* Similarly, Neon store patterns use operand 0 as - the memory location to store to. */ - gcc_assert (insn_data[d->code].operand[k].predicate - == neon_struct_operand); - - switch (d->mode) - { - case T_V8QI: - case T_V16QI: - eltype = intQI_pointer_node; - break; - - case T_V4HI: - case T_V8HI: - eltype = intHI_pointer_node; - break; - - case T_V2SI: - case T_V4SI: - eltype = intSI_pointer_node; - break; - - case T_V2SF: - case T_V4SF: - eltype = float_pointer_node; - break; - - case T_DI: - case T_V2DI: - eltype = intDI_pointer_node; - break; - - default: gcc_unreachable (); - } - } - else - { - switch (insn_data[d->code].operand[k].mode) - { - case VOIDmode: eltype = void_type_node; break; - /* Scalars. */ - case QImode: eltype = neon_intQI_type_node; break; - case HImode: eltype = neon_intHI_type_node; break; - case SImode: eltype = neon_intSI_type_node; break; - case SFmode: eltype = neon_float_type_node; break; - case DImode: eltype = neon_intDI_type_node; break; - case TImode: eltype = intTI_type_node; break; - case EImode: eltype = intEI_type_node; break; - case OImode: eltype = intOI_type_node; break; - case CImode: eltype = intCI_type_node; break; - case XImode: eltype = intXI_type_node; break; - /* 64-bit vectors. */ - case V8QImode: eltype = V8QI_type_node; break; - case V4HImode: eltype = V4HI_type_node; break; - case V2SImode: eltype = V2SI_type_node; break; - case V2SFmode: eltype = V2SF_type_node; break; - /* 128-bit vectors. */ - case V16QImode: eltype = V16QI_type_node; break; - case V8HImode: eltype = V8HI_type_node; break; - case V4SImode: eltype = V4SI_type_node; break; - case V4SFmode: eltype = V4SF_type_node; break; - case V2DImode: eltype = V2DI_type_node; break; - default: gcc_unreachable (); - } - } - - if (k == 0 && !is_store) - return_type = eltype; - else - args = tree_cons (NULL_TREE, eltype, args); - } - - ftype = build_function_type (return_type, args); - } - break; - - case NEON_RESULTPAIR: - { - switch (insn_data[d->code].operand[1].mode) - { - case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; - case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; - case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; - case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; - case DImode: ftype = void_ftype_pdi_di_di; break; - case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; - case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; - case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; - case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; - case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; - default: gcc_unreachable (); - } - } - break; - - case NEON_REINTERP: - { - /* We iterate over NUM_DREG_TYPES doubleword types, - then NUM_QREG_TYPES quadword types. - V4HF is not a type used in reinterpret, so we translate - d->mode to the correct index in reinterp_ftype_dreg. */ - bool qreg_p - = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8; - int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0)) - % NUM_QREG_TYPES; - switch (insn_data[d->code].operand[0].mode) - { - case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; - case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break; - case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break; - case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break; - case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break; - case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break; - case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break; - case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break; - case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break; - case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break; - case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break; - default: gcc_unreachable (); - } - } - break; - case NEON_FLOAT_WIDEN: - { - tree eltype = NULL_TREE; - tree return_type = NULL_TREE; - - switch (insn_data[d->code].operand[1].mode) - { - case V4HFmode: - eltype = V4HF_type_node; - return_type = V4SF_type_node; - break; - default: gcc_unreachable (); - } - ftype = build_function_type_list (return_type, eltype, NULL); - break; - } - case NEON_FLOAT_NARROW: - { - tree eltype = NULL_TREE; - tree return_type = NULL_TREE; - - switch (insn_data[d->code].operand[1].mode) - { - case V4SFmode: - eltype = V4SF_type_node; - return_type = V4HF_type_node; - break; - default: gcc_unreachable (); - } - ftype = build_function_type_list (return_type, eltype, NULL); - break; - } - default: - gcc_unreachable (); - } - - gcc_assert (ftype != NULL); - - sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]); - - decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL, - NULL_TREE); - arm_builtin_decls[fcode] = decl; - } -} - -#undef NUM_DREG_TYPES -#undef NUM_QREG_TYPES - -#define def_mbuiltin(MASK, NAME, TYPE, CODE) \ - do \ - { \ - if ((MASK) & insn_flags) \ - { \ - tree bdecl; \ - bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \ - BUILT_IN_MD, NULL, NULL_TREE); \ - arm_builtin_decls[CODE] = bdecl; \ - } \ - } \ - while (0) - -struct builtin_description -{ - const unsigned int mask; - const enum insn_code icode; - const char * const name; - const enum arm_builtins code; - const enum rtx_code comparison; - const unsigned int flag; -}; - -static const struct builtin_description bdesc_2arg[] = -{ -#define IWMMXT_BUILTIN(code, string, builtin) \ - { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \ - ARM_BUILTIN_##builtin, UNKNOWN, 0 }, - -#define IWMMXT2_BUILTIN(code, string, builtin) \ - { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \ - ARM_BUILTIN_##builtin, UNKNOWN, 0 }, - - IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB) - IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH) - IWMMXT_BUILTIN (addv2si3, "waddw", WADDW) - IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB) - IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH) - IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW) - IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB) - IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH) - IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW) - IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB) - IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH) - IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW) - IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB) - IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH) - IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW) - IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB) - IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH) - IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW) - IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL) - IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM) - IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM) - IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB) - IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH) - IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW) - IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB) - IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH) - IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW) - IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB) - IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH) - IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW) - IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB) - IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB) - IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH) - IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH) - IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW) - IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW) - IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB) - IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB) - IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH) - IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH) - IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW) - IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW) - IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND) - IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN) - IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR) - IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR) - IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B) - IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H) - IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR) - IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR) - IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB) - IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH) - IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW) - IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB) - IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH) - IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW) - IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX) - IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX) - IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB) - IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH) - IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW) - IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4) - IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R) - IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM) - IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM) - IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR) - IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR) - IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL) - IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR) - IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR) - IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM) - IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR) - IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM) - IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR) - IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0) - IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1) - IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2) - IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3) - -#define IWMMXT_BUILTIN2(code, builtin) \ - { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, - -#define IWMMXT2_BUILTIN2(code, builtin) \ - { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, - - IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM) - IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL) - IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) - IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) - IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS) - IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS) - IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS) - IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) - IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) - IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) - -#define CRC32_BUILTIN(L, U) \ - {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ - UNKNOWN, 0}, - CRC32_BUILTIN (crc32b, CRC32B) - CRC32_BUILTIN (crc32h, CRC32H) - CRC32_BUILTIN (crc32w, CRC32W) - CRC32_BUILTIN (crc32cb, CRC32CB) - CRC32_BUILTIN (crc32ch, CRC32CH) - CRC32_BUILTIN (crc32cw, CRC32CW) -#undef CRC32_BUILTIN - - -#define CRYPTO_BUILTIN(L, U) \ - {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \ - UNKNOWN, 0}, -#undef CRYPTO1 -#undef CRYPTO2 -#undef CRYPTO3 -#define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U) -#define CRYPTO1(L, U, R, A) -#define CRYPTO3(L, U, R, A1, A2, A3) -#include "crypto.def" -#undef CRYPTO1 -#undef CRYPTO2 -#undef CRYPTO3 - -}; - -static const struct builtin_description bdesc_1arg[] = -{ - IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB) - IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH) - IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW) - IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB) - IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH) - IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW) - IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB) - IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH) - IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW) - IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB) - IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH) - IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW) - IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB) - IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH) - IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW) - IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB) - IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH) - IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW) - IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB) - IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH) - IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW) - IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB) - IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH) - IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW) - -#define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U) -#define CRYPTO2(L, U, R, A1, A2) -#define CRYPTO3(L, U, R, A1, A2, A3) -#include "crypto.def" -#undef CRYPTO1 -#undef CRYPTO2 -#undef CRYPTO3 -}; - -static const struct builtin_description bdesc_3arg[] = -{ -#define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U) -#define CRYPTO1(L, U, R, A) -#define CRYPTO2(L, U, R, A1, A2) -#include "crypto.def" -#undef CRYPTO1 -#undef CRYPTO2 -#undef CRYPTO3 - }; -#undef CRYPTO_BUILTIN - -/* Set up all the iWMMXt builtins. This is not called if - TARGET_IWMMXT is zero. */ - -static void -arm_init_iwmmxt_builtins (void) -{ - const struct builtin_description * d; - size_t i; - - tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); - tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); - tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); - - tree v8qi_ftype_v8qi_v8qi_int - = build_function_type_list (V8QI_type_node, - V8QI_type_node, V8QI_type_node, - integer_type_node, NULL_TREE); - tree v4hi_ftype_v4hi_int - = build_function_type_list (V4HI_type_node, - V4HI_type_node, integer_type_node, NULL_TREE); - tree v2si_ftype_v2si_int - = build_function_type_list (V2SI_type_node, - V2SI_type_node, integer_type_node, NULL_TREE); - tree v2si_ftype_di_di - = build_function_type_list (V2SI_type_node, - long_long_integer_type_node, - long_long_integer_type_node, - NULL_TREE); - tree di_ftype_di_int - = build_function_type_list (long_long_integer_type_node, - long_long_integer_type_node, - integer_type_node, NULL_TREE); - tree di_ftype_di_int_int - = build_function_type_list (long_long_integer_type_node, - long_long_integer_type_node, - integer_type_node, - integer_type_node, NULL_TREE); - tree int_ftype_v8qi - = build_function_type_list (integer_type_node, - V8QI_type_node, NULL_TREE); - tree int_ftype_v4hi - = build_function_type_list (integer_type_node, - V4HI_type_node, NULL_TREE); - tree int_ftype_v2si - = build_function_type_list (integer_type_node, - V2SI_type_node, NULL_TREE); - tree int_ftype_v8qi_int - = build_function_type_list (integer_type_node, - V8QI_type_node, integer_type_node, NULL_TREE); - tree int_ftype_v4hi_int - = build_function_type_list (integer_type_node, - V4HI_type_node, integer_type_node, NULL_TREE); - tree int_ftype_v2si_int - = build_function_type_list (integer_type_node, - V2SI_type_node, integer_type_node, NULL_TREE); - tree v8qi_ftype_v8qi_int_int - = build_function_type_list (V8QI_type_node, - V8QI_type_node, integer_type_node, - integer_type_node, NULL_TREE); - tree v4hi_ftype_v4hi_int_int - = build_function_type_list (V4HI_type_node, - V4HI_type_node, integer_type_node, - integer_type_node, NULL_TREE); - tree v2si_ftype_v2si_int_int - = build_function_type_list (V2SI_type_node, - V2SI_type_node, integer_type_node, - integer_type_node, NULL_TREE); - /* Miscellaneous. */ - tree v8qi_ftype_v4hi_v4hi - = build_function_type_list (V8QI_type_node, - V4HI_type_node, V4HI_type_node, NULL_TREE); - tree v4hi_ftype_v2si_v2si - = build_function_type_list (V4HI_type_node, - V2SI_type_node, V2SI_type_node, NULL_TREE); - tree v8qi_ftype_v4hi_v8qi - = build_function_type_list (V8QI_type_node, - V4HI_type_node, V8QI_type_node, NULL_TREE); - tree v2si_ftype_v4hi_v4hi - = build_function_type_list (V2SI_type_node, - V4HI_type_node, V4HI_type_node, NULL_TREE); - tree v2si_ftype_v8qi_v8qi - = build_function_type_list (V2SI_type_node, - V8QI_type_node, V8QI_type_node, NULL_TREE); - tree v4hi_ftype_v4hi_di - = build_function_type_list (V4HI_type_node, - V4HI_type_node, long_long_integer_type_node, - NULL_TREE); - tree v2si_ftype_v2si_di - = build_function_type_list (V2SI_type_node, - V2SI_type_node, long_long_integer_type_node, - NULL_TREE); - tree di_ftype_void - = build_function_type_list (long_long_unsigned_type_node, NULL_TREE); - tree int_ftype_void - = build_function_type_list (integer_type_node, NULL_TREE); - tree di_ftype_v8qi - = build_function_type_list (long_long_integer_type_node, - V8QI_type_node, NULL_TREE); - tree di_ftype_v4hi - = build_function_type_list (long_long_integer_type_node, - V4HI_type_node, NULL_TREE); - tree di_ftype_v2si - = build_function_type_list (long_long_integer_type_node, - V2SI_type_node, NULL_TREE); - tree v2si_ftype_v4hi - = build_function_type_list (V2SI_type_node, - V4HI_type_node, NULL_TREE); - tree v4hi_ftype_v8qi - = build_function_type_list (V4HI_type_node, - V8QI_type_node, NULL_TREE); - tree v8qi_ftype_v8qi - = build_function_type_list (V8QI_type_node, - V8QI_type_node, NULL_TREE); - tree v4hi_ftype_v4hi - = build_function_type_list (V4HI_type_node, - V4HI_type_node, NULL_TREE); - tree v2si_ftype_v2si - = build_function_type_list (V2SI_type_node, - V2SI_type_node, NULL_TREE); - - tree di_ftype_di_v4hi_v4hi - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - V4HI_type_node, V4HI_type_node, - NULL_TREE); - - tree di_ftype_v4hi_v4hi - = build_function_type_list (long_long_unsigned_type_node, - V4HI_type_node,V4HI_type_node, - NULL_TREE); - - tree v2si_ftype_v2si_v4hi_v4hi - = build_function_type_list (V2SI_type_node, - V2SI_type_node, V4HI_type_node, - V4HI_type_node, NULL_TREE); - - tree v2si_ftype_v2si_v8qi_v8qi - = build_function_type_list (V2SI_type_node, - V2SI_type_node, V8QI_type_node, - V8QI_type_node, NULL_TREE); - - tree di_ftype_di_v2si_v2si - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - V2SI_type_node, V2SI_type_node, - NULL_TREE); - - tree di_ftype_di_di_int - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - long_long_unsigned_type_node, - integer_type_node, NULL_TREE); - - tree void_ftype_int - = build_function_type_list (void_type_node, - integer_type_node, NULL_TREE); - - tree v8qi_ftype_char - = build_function_type_list (V8QI_type_node, - signed_char_type_node, NULL_TREE); - - tree v4hi_ftype_short - = build_function_type_list (V4HI_type_node, - short_integer_type_node, NULL_TREE); - - tree v2si_ftype_int - = build_function_type_list (V2SI_type_node, - integer_type_node, NULL_TREE); - - /* Normal vector binops. */ - tree v8qi_ftype_v8qi_v8qi - = build_function_type_list (V8QI_type_node, - V8QI_type_node, V8QI_type_node, NULL_TREE); - tree v4hi_ftype_v4hi_v4hi - = build_function_type_list (V4HI_type_node, - V4HI_type_node,V4HI_type_node, NULL_TREE); - tree v2si_ftype_v2si_v2si - = build_function_type_list (V2SI_type_node, - V2SI_type_node, V2SI_type_node, NULL_TREE); - tree di_ftype_di_di - = build_function_type_list (long_long_unsigned_type_node, - long_long_unsigned_type_node, - long_long_unsigned_type_node, - NULL_TREE); - - /* Add all builtins that are more or less simple operations on two - operands. */ - for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) - { - /* Use one of the operands; the target can have a different mode for - mask-generating compares. */ - enum machine_mode mode; - tree type; - - if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2)) - continue; - - mode = insn_data[d->icode].operand[1].mode; - - switch (mode) - { - case V8QImode: - type = v8qi_ftype_v8qi_v8qi; - break; - case V4HImode: - type = v4hi_ftype_v4hi_v4hi; - break; - case V2SImode: - type = v2si_ftype_v2si_v2si; - break; - case DImode: - type = di_ftype_di_di; - break; - - default: - gcc_unreachable (); - } - - def_mbuiltin (d->mask, d->name, type, d->code); - } - - /* Add the remaining MMX insns with somewhat more complicated types. */ -#define iwmmx_mbuiltin(NAME, TYPE, CODE) \ - def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \ - ARM_BUILTIN_ ## CODE) - -#define iwmmx2_mbuiltin(NAME, TYPE, CODE) \ - def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \ - ARM_BUILTIN_ ## CODE) - - iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO); - iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0); - iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1); - iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2); - iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3); - iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0); - iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1); - iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2); - iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3); - - iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH); - iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW); - iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD); - iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI); - iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI); - iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI); - - iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH); - iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW); - iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD); - iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI); - iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI); - iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI); - - iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH); - iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW); - iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD); - iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI); - iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI); - iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI); - - iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH); - iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW); - iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD); - iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI); - iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI); - iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI); - - iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH); - - iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB); - iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH); - iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS); - iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX); - iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN); - iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU); - iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX); - iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN); - iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ); - iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ); - - iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB); - iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH); - iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW); - iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB); - iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH); - iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW); - iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB); - iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH); - iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW); - - iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB); - iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH); - iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW); - - iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB); - iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH); - iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW); - - iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM); - iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL); - - iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS); - iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS); - iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS); - iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS); - iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS); - iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS); - - iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB); - iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH); - iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW); - iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB); - iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH); - iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW); - iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB); - iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH); - iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW); - iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB); - iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH); - iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW); - - iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS); - iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ); - iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU); - iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ); - - iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI); - iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA); - iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH); - iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB); - iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT); - iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB); - iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT); - - iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB); - iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH); - iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW); - - iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB); - iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT); - iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB); - iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT); - - iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN); - iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN); - iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN); - iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN); - - iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB); - iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT); - iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB); - iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT); - - iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN); - iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN); - iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN); - iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN); - - iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB); - iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT); - iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB); - iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT); - - iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN); - iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN); - iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN); - iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN); - - iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE); - - iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB); - iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH); - iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW); - -#undef iwmmx_mbuiltin -#undef iwmmx2_mbuiltin -} - -static void -arm_init_fp16_builtins (void) -{ - tree fp16_type = make_node (REAL_TYPE); - TYPE_PRECISION (fp16_type) = 16; - layout_type (fp16_type); - (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); -} - -static void -arm_init_crc32_builtins () -{ - tree si_ftype_si_qi - = build_function_type_list (unsigned_intSI_type_node, - unsigned_intSI_type_node, - unsigned_intQI_type_node, NULL_TREE); - tree si_ftype_si_hi - = build_function_type_list (unsigned_intSI_type_node, - unsigned_intSI_type_node, - unsigned_intHI_type_node, NULL_TREE); - tree si_ftype_si_si - = build_function_type_list (unsigned_intSI_type_node, - unsigned_intSI_type_node, - unsigned_intSI_type_node, NULL_TREE); - - arm_builtin_decls[ARM_BUILTIN_CRC32B] - = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi, - ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE); - arm_builtin_decls[ARM_BUILTIN_CRC32H] - = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi, - ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE); - arm_builtin_decls[ARM_BUILTIN_CRC32W] - = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si, - ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE); - arm_builtin_decls[ARM_BUILTIN_CRC32CB] - = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi, - ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE); - arm_builtin_decls[ARM_BUILTIN_CRC32CH] - = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi, - ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE); - arm_builtin_decls[ARM_BUILTIN_CRC32CW] - = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si, - ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE); -} - -static void -arm_init_builtins (void) -{ - if (TARGET_REALLY_IWMMXT) - arm_init_iwmmxt_builtins (); - - if (TARGET_NEON) - arm_init_neon_builtins (); - - if (arm_fp16_format) - arm_init_fp16_builtins (); - - if (TARGET_CRC32) - arm_init_crc32_builtins (); -} - -/* Return the ARM builtin for CODE. */ - -static tree -arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) -{ - if (code >= ARM_BUILTIN_MAX) - return error_mark_node; - - return arm_builtin_decls[code]; -} - /* Implement TARGET_INVALID_PARAMETER_TYPE. */ static const char * @@ -24591,7 +23391,7 @@ arm_convert_to_type (tree type, tree expr) special-cased in the default hook. */ static bool -arm_scalar_mode_supported_p (enum machine_mode mode) +arm_scalar_mode_supported_p (machine_mode mode) { if (mode == HFmode) return (arm_fp16_format != ARM_FP16_FORMAT_NONE); @@ -24601,511 +23401,6 @@ arm_scalar_mode_supported_p (enum machine_mode mode) return default_scalar_mode_supported_p (mode); } -/* Errors in the source file can cause expand_expr to return const0_rtx - where we expect a vector. To avoid crashing, use one of the vector - clear instructions. */ - -static rtx -safe_vector_operand (rtx x, enum machine_mode mode) -{ - if (x != const0_rtx) - return x; - x = gen_reg_rtx (mode); - - emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x - : gen_rtx_SUBREG (DImode, x, 0))); - return x; -} - -/* Function to expand ternary builtins. */ -static rtx -arm_expand_ternop_builtin (enum insn_code icode, - tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - tree arg2 = CALL_EXPR_ARG (exp, 2); - - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2 = expand_normal (arg2); - rtx op3 = NULL_RTX; - - /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select - lane operand depending on endianness. */ - bool builtin_sha1cpm_p = false; - - if (insn_data[icode].n_operands == 5) - { - gcc_assert (icode == CODE_FOR_crypto_sha1c - || icode == CODE_FOR_crypto_sha1p - || icode == CODE_FOR_crypto_sha1m); - builtin_sha1cpm_p = true; - } - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - enum machine_mode mode1 = insn_data[icode].operand[2].mode; - enum machine_mode mode2 = insn_data[icode].operand[3].mode; - - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - if (VECTOR_MODE_P (mode2)) - op2 = safe_vector_operand (op2, mode2); - - if (! target - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) - && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) - && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode)); - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) - op2 = copy_to_mode_reg (mode2, op2); - if (builtin_sha1cpm_p) - op3 = GEN_INT (TARGET_BIG_END ? 1 : 0); - - if (builtin_sha1cpm_p) - pat = GEN_FCN (icode) (target, op0, op1, op2, op3); - else - pat = GEN_FCN (icode) (target, op0, op1, op2); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -/* Subroutine of arm_expand_builtin to take care of binop insns. */ - -static rtx -arm_expand_binop_builtin (enum insn_code icode, - tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - enum machine_mode mode1 = insn_data[icode].operand[2].mode; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - if (! target - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) - && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -/* Subroutine of arm_expand_builtin to take care of unop insns. */ - -static rtx -arm_expand_unop_builtin (enum insn_code icode, - tree exp, rtx target, int do_load) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - rtx op0 = expand_normal (arg0); - rtx op1 = NULL_RTX; - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode0 = insn_data[icode].operand[1].mode; - bool builtin_sha1h_p = false; - - if (insn_data[icode].n_operands == 3) - { - gcc_assert (icode == CODE_FOR_crypto_sha1h); - builtin_sha1h_p = true; - } - - if (! target - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - if (do_load) - op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); - else - { - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - } - if (builtin_sha1h_p) - op1 = GEN_INT (TARGET_BIG_END ? 1 : 0); - - if (builtin_sha1h_p) - pat = GEN_FCN (icode) (target, op0, op1); - else - pat = GEN_FCN (icode) (target, op0); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -typedef enum { - NEON_ARG_COPY_TO_REG, - NEON_ARG_CONSTANT, - NEON_ARG_MEMORY, - NEON_ARG_STOP -} builtin_arg; - -#define NEON_MAX_BUILTIN_ARGS 5 - -/* EXP is a pointer argument to a Neon load or store intrinsic. Derive - and return an expression for the accessed memory. - - The intrinsic function operates on a block of registers that has - mode REG_MODE. This block contains vectors of type TYPE_MODE. The - function references the memory at EXP of type TYPE and in mode - MEM_MODE; this mode may be BLKmode if no more suitable mode is - available. */ - -static tree -neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode, - enum machine_mode reg_mode, - neon_builtin_type_mode type_mode) -{ - HOST_WIDE_INT reg_size, vector_size, nvectors, nelems; - tree elem_type, upper_bound, array_type; - - /* Work out the size of the register block in bytes. */ - reg_size = GET_MODE_SIZE (reg_mode); - - /* Work out the size of each vector in bytes. */ - gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG)); - vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8); - - /* Work out how many vectors there are. */ - gcc_assert (reg_size % vector_size == 0); - nvectors = reg_size / vector_size; - - /* Work out the type of each element. */ - gcc_assert (POINTER_TYPE_P (type)); - elem_type = TREE_TYPE (type); - - /* Work out how many elements are being loaded or stored. - MEM_MODE == REG_MODE implies a one-to-one mapping between register - and memory elements; anything else implies a lane load or store. */ - if (mem_mode == reg_mode) - nelems = vector_size * nvectors / int_size_in_bytes (elem_type); - else - nelems = nvectors; - - /* Create a type that describes the full access. */ - upper_bound = build_int_cst (size_type_node, nelems - 1); - array_type = build_array_type (elem_type, build_index_type (upper_bound)); - - /* Dereference EXP using that type. */ - return fold_build2 (MEM_REF, array_type, exp, - build_int_cst (build_pointer_type (array_type), 0)); -} - -/* Expand a Neon builtin. */ -static rtx -arm_expand_neon_args (rtx target, int icode, int have_retval, - neon_builtin_type_mode type_mode, - tree exp, int fcode, ...) -{ - va_list ap; - rtx pat; - tree arg[NEON_MAX_BUILTIN_ARGS]; - rtx op[NEON_MAX_BUILTIN_ARGS]; - tree arg_type; - tree formals; - enum machine_mode tmode = insn_data[icode].operand[0].mode; - enum machine_mode mode[NEON_MAX_BUILTIN_ARGS]; - enum machine_mode other_mode; - int argc = 0; - int opno; - - if (have_retval - && (!target - || GET_MODE (target) != tmode - || !(*insn_data[icode].operand[0].predicate) (target, tmode))) - target = gen_reg_rtx (tmode); - - va_start (ap, fcode); - - formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode])); - - for (;;) - { - builtin_arg thisarg = (builtin_arg) va_arg (ap, int); - - if (thisarg == NEON_ARG_STOP) - break; - else - { - opno = argc + have_retval; - mode[argc] = insn_data[icode].operand[opno].mode; - arg[argc] = CALL_EXPR_ARG (exp, argc); - arg_type = TREE_VALUE (formals); - if (thisarg == NEON_ARG_MEMORY) - { - other_mode = insn_data[icode].operand[1 - opno].mode; - arg[argc] = neon_dereference_pointer (arg[argc], arg_type, - mode[argc], other_mode, - type_mode); - } - - /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P - be returned. */ - op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode, - (thisarg == NEON_ARG_MEMORY - ? EXPAND_MEMORY : EXPAND_NORMAL)); - - switch (thisarg) - { - case NEON_ARG_COPY_TO_REG: - /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/ - if (!(*insn_data[icode].operand[opno].predicate) - (op[argc], mode[argc])) - op[argc] = copy_to_mode_reg (mode[argc], op[argc]); - break; - - case NEON_ARG_CONSTANT: - /* FIXME: This error message is somewhat unhelpful. */ - if (!(*insn_data[icode].operand[opno].predicate) - (op[argc], mode[argc])) - error ("argument must be a constant"); - break; - - case NEON_ARG_MEMORY: - /* Check if expand failed. */ - if (op[argc] == const0_rtx) - return 0; - gcc_assert (MEM_P (op[argc])); - PUT_MODE (op[argc], mode[argc]); - /* ??? arm_neon.h uses the same built-in functions for signed - and unsigned accesses, casting where necessary. This isn't - alias safe. */ - set_mem_alias_set (op[argc], 0); - if (!(*insn_data[icode].operand[opno].predicate) - (op[argc], mode[argc])) - op[argc] = (replace_equiv_address - (op[argc], force_reg (Pmode, XEXP (op[argc], 0)))); - break; - - case NEON_ARG_STOP: - gcc_unreachable (); - } - - argc++; - formals = TREE_CHAIN (formals); - } - } - - va_end (ap); - - if (have_retval) - switch (argc) - { - case 1: - pat = GEN_FCN (icode) (target, op[0]); - break; - - case 2: - pat = GEN_FCN (icode) (target, op[0], op[1]); - break; - - case 3: - pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); - break; - - case 4: - pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); - break; - - case 5: - pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); - break; - - default: - gcc_unreachable (); - } - else - switch (argc) - { - case 1: - pat = GEN_FCN (icode) (op[0]); - break; - - case 2: - pat = GEN_FCN (icode) (op[0], op[1]); - break; - - case 3: - pat = GEN_FCN (icode) (op[0], op[1], op[2]); - break; - - case 4: - pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); - break; - - case 5: - pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); - break; - - default: - gcc_unreachable (); - } - - if (!pat) - return 0; - - emit_insn (pat); - - return target; -} - -/* Expand a Neon builtin. These are "special" because they don't have symbolic - constants defined per-instruction or per instruction-variant. Instead, the - required info is looked up in the table neon_builtin_data. */ -static rtx -arm_expand_neon_builtin (int fcode, tree exp, rtx target) -{ - neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE]; - neon_itype itype = d->itype; - enum insn_code icode = d->code; - neon_builtin_type_mode type_mode = d->mode; - - switch (itype) - { - case NEON_UNOP: - case NEON_CONVERT: - case NEON_DUPLANE: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); - - case NEON_BINOP: - case NEON_SETLANE: - case NEON_SCALARMUL: - case NEON_SCALARMULL: - case NEON_SCALARMULH: - case NEON_SHIFTINSERT: - case NEON_LOGICBINOP: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, - NEON_ARG_STOP); - - case NEON_TERNOP: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, - NEON_ARG_CONSTANT, NEON_ARG_STOP); - - case NEON_GETLANE: - case NEON_FIXCONV: - case NEON_SHIFTIMM: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, - NEON_ARG_STOP); - - case NEON_CREATE: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); - - case NEON_DUP: - case NEON_RINT: - case NEON_SPLIT: - case NEON_FLOAT_WIDEN: - case NEON_FLOAT_NARROW: - case NEON_REINTERP: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); - - case NEON_COMBINE: - case NEON_VTBL: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); - - case NEON_RESULTPAIR: - return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, - NEON_ARG_STOP); - - case NEON_LANEMUL: - case NEON_LANEMULL: - case NEON_LANEMULH: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, - NEON_ARG_CONSTANT, NEON_ARG_STOP); - - case NEON_LANEMAC: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, - NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP); - - case NEON_SHIFTACC: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, - NEON_ARG_CONSTANT, NEON_ARG_STOP); - - case NEON_SCALARMAC: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, - NEON_ARG_CONSTANT, NEON_ARG_STOP); - - case NEON_SELECT: - case NEON_VTBX: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, - NEON_ARG_STOP); - - case NEON_LOAD1: - case NEON_LOADSTRUCT: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_MEMORY, NEON_ARG_STOP); - - case NEON_LOAD1LANE: - case NEON_LOADSTRUCTLANE: - return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, - NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, - NEON_ARG_STOP); - - case NEON_STORE1: - case NEON_STORESTRUCT: - return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, - NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); - - case NEON_STORE1LANE: - case NEON_STORESTRUCTLANE: - return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, - NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, - NEON_ARG_STOP); - } - - gcc_unreachable (); -} - /* Emit code to reinterpret one Neon type as another, without altering bits. */ void neon_reinterpret (rtx dest, rtx src) @@ -25113,24 +23408,6 @@ neon_reinterpret (rtx dest, rtx src) emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); } -/* Emit code to place a Neon pair result in memory locations (with equal - registers). */ -void -neon_emit_pair_result_insn (enum machine_mode mode, - rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr, - rtx op1, rtx op2) -{ - rtx mem = gen_rtx_MEM (mode, destaddr); - rtx tmp1 = gen_reg_rtx (mode); - rtx tmp2 = gen_reg_rtx (mode); - - emit_insn (intfn (tmp1, op1, op2, tmp2)); - - emit_move_insn (mem, tmp1); - mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); - emit_move_insn (mem, tmp2); -} - /* Set up OPERANDS for a register copy from SRC to DEST, taking care not to early-clobber SRC registers in the process. @@ -25169,7 +23446,7 @@ neon_split_vcombine (rtx operands[3]) unsigned int dest = REGNO (operands[0]); unsigned int src1 = REGNO (operands[1]); unsigned int src2 = REGNO (operands[2]); - enum machine_mode halfmode = GET_MODE (operands[1]); + machine_mode halfmode = GET_MODE (operands[1]); unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode); rtx destlo, desthi; @@ -25211,531 +23488,6 @@ neon_split_vcombine (rtx operands[3]) emit_move_insn (destlo, operands[1]); } } - -/* Expand an expression EXP that calls a built-in function, - with result going to TARGET if that's convenient - (and in mode MODE if that's convenient). - SUBTARGET may be used as the target for computing one of EXP's operands. - IGNORE is nonzero if the value is to be ignored. */ - -static rtx -arm_expand_builtin (tree exp, - rtx target, - rtx subtarget ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, - int ignore ATTRIBUTE_UNUSED) -{ - const struct builtin_description * d; - enum insn_code icode; - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - tree arg0; - tree arg1; - tree arg2; - rtx op0; - rtx op1; - rtx op2; - rtx pat; - unsigned int fcode = DECL_FUNCTION_CODE (fndecl); - size_t i; - enum machine_mode tmode; - enum machine_mode mode0; - enum machine_mode mode1; - enum machine_mode mode2; - int opint; - int selector; - int mask; - int imm; - - if (fcode >= ARM_BUILTIN_NEON_BASE) - return arm_expand_neon_builtin (fcode, exp, target); - - switch (fcode) - { - case ARM_BUILTIN_TEXTRMSB: - case ARM_BUILTIN_TEXTRMUB: - case ARM_BUILTIN_TEXTRMSH: - case ARM_BUILTIN_TEXTRMUH: - case ARM_BUILTIN_TEXTRMSW: - case ARM_BUILTIN_TEXTRMUW: - icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb - : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub - : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh - : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh - : CODE_FOR_iwmmxt_textrmw); - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - tmode = insn_data[icode].operand[0].mode; - mode0 = insn_data[icode].operand[1].mode; - mode1 = insn_data[icode].operand[2].mode; - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) - { - /* @@@ better error message */ - error ("selector must be an immediate"); - return gen_reg_rtx (tmode); - } - - opint = INTVAL (op1); - if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB) - { - if (opint > 7 || opint < 0) - error ("the range of selector should be in 0 to 7"); - } - else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH) - { - if (opint > 3 || opint < 0) - error ("the range of selector should be in 0 to 3"); - } - else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */ - { - if (opint > 1 || opint < 0) - error ("the range of selector should be in 0 to 1"); - } - - if (target == 0 - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; - - case ARM_BUILTIN_WALIGNI: - /* If op2 is immediate, call walighi, else call walighr. */ - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - if (CONST_INT_P (op2)) - { - icode = CODE_FOR_iwmmxt_waligni; - tmode = insn_data[icode].operand[0].mode; - mode0 = insn_data[icode].operand[1].mode; - mode1 = insn_data[icode].operand[2].mode; - mode2 = insn_data[icode].operand[3].mode; - if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2)); - selector = INTVAL (op2); - if (selector > 7 || selector < 0) - error ("the range of selector should be in 0 to 7"); - } - else - { - icode = CODE_FOR_iwmmxt_walignr; - tmode = insn_data[icode].operand[0].mode; - mode0 = insn_data[icode].operand[1].mode; - mode1 = insn_data[icode].operand[2].mode; - mode2 = insn_data[icode].operand[3].mode; - if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - if (!(*insn_data[icode].operand[3].predicate) (op2, mode2)) - op2 = copy_to_mode_reg (mode2, op2); - } - if (target == 0 - || GET_MODE (target) != tmode - || !(*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - pat = GEN_FCN (icode) (target, op0, op1, op2); - if (!pat) - return 0; - emit_insn (pat); - return target; - - case ARM_BUILTIN_TINSRB: - case ARM_BUILTIN_TINSRH: - case ARM_BUILTIN_TINSRW: - case ARM_BUILTIN_WMERGE: - icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb - : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh - : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge - : CODE_FOR_iwmmxt_tinsrw); - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - tmode = insn_data[icode].operand[0].mode; - mode0 = insn_data[icode].operand[1].mode; - mode1 = insn_data[icode].operand[2].mode; - mode2 = insn_data[icode].operand[3].mode; - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) - { - error ("selector must be an immediate"); - return const0_rtx; - } - if (icode == CODE_FOR_iwmmxt_wmerge) - { - selector = INTVAL (op2); - if (selector > 7 || selector < 0) - error ("the range of selector should be in 0 to 7"); - } - if ((icode == CODE_FOR_iwmmxt_tinsrb) - || (icode == CODE_FOR_iwmmxt_tinsrh) - || (icode == CODE_FOR_iwmmxt_tinsrw)) - { - mask = 0x01; - selector= INTVAL (op2); - if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7)) - error ("the range of selector should be in 0 to 7"); - else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3)) - error ("the range of selector should be in 0 to 3"); - else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1)) - error ("the range of selector should be in 0 to 1"); - mask <<= selector; - op2 = GEN_INT (mask); - } - if (target == 0 - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - pat = GEN_FCN (icode) (target, op0, op1, op2); - if (! pat) - return 0; - emit_insn (pat); - return target; - - case ARM_BUILTIN_SETWCGR0: - case ARM_BUILTIN_SETWCGR1: - case ARM_BUILTIN_SETWCGR2: - case ARM_BUILTIN_SETWCGR3: - icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0 - : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1 - : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2 - : CODE_FOR_iwmmxt_setwcgr3); - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - mode0 = insn_data[icode].operand[0].mode; - if (!(*insn_data[icode].operand[0].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - pat = GEN_FCN (icode) (op0); - if (!pat) - return 0; - emit_insn (pat); - return 0; - - case ARM_BUILTIN_GETWCGR0: - case ARM_BUILTIN_GETWCGR1: - case ARM_BUILTIN_GETWCGR2: - case ARM_BUILTIN_GETWCGR3: - icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0 - : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1 - : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2 - : CODE_FOR_iwmmxt_getwcgr3); - tmode = insn_data[icode].operand[0].mode; - if (target == 0 - || GET_MODE (target) != tmode - || !(*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - pat = GEN_FCN (icode) (target); - if (!pat) - return 0; - emit_insn (pat); - return target; - - case ARM_BUILTIN_WSHUFH: - icode = CODE_FOR_iwmmxt_wshufh; - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - tmode = insn_data[icode].operand[0].mode; - mode1 = insn_data[icode].operand[1].mode; - mode2 = insn_data[icode].operand[2].mode; - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) - op0 = copy_to_mode_reg (mode1, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) - { - error ("mask must be an immediate"); - return const0_rtx; - } - selector = INTVAL (op1); - if (selector < 0 || selector > 255) - error ("the range of mask should be in 0 to 255"); - if (target == 0 - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; - - case ARM_BUILTIN_WMADDS: - return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target); - case ARM_BUILTIN_WMADDSX: - return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target); - case ARM_BUILTIN_WMADDSN: - return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target); - case ARM_BUILTIN_WMADDU: - return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target); - case ARM_BUILTIN_WMADDUX: - return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target); - case ARM_BUILTIN_WMADDUN: - return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target); - case ARM_BUILTIN_WSADBZ: - return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target); - case ARM_BUILTIN_WSADHZ: - return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target); - - /* Several three-argument builtins. */ - case ARM_BUILTIN_WMACS: - case ARM_BUILTIN_WMACU: - case ARM_BUILTIN_TMIA: - case ARM_BUILTIN_TMIAPH: - case ARM_BUILTIN_TMIATT: - case ARM_BUILTIN_TMIATB: - case ARM_BUILTIN_TMIABT: - case ARM_BUILTIN_TMIABB: - case ARM_BUILTIN_WQMIABB: - case ARM_BUILTIN_WQMIABT: - case ARM_BUILTIN_WQMIATB: - case ARM_BUILTIN_WQMIATT: - case ARM_BUILTIN_WQMIABBN: - case ARM_BUILTIN_WQMIABTN: - case ARM_BUILTIN_WQMIATBN: - case ARM_BUILTIN_WQMIATTN: - case ARM_BUILTIN_WMIABB: - case ARM_BUILTIN_WMIABT: - case ARM_BUILTIN_WMIATB: - case ARM_BUILTIN_WMIATT: - case ARM_BUILTIN_WMIABBN: - case ARM_BUILTIN_WMIABTN: - case ARM_BUILTIN_WMIATBN: - case ARM_BUILTIN_WMIATTN: - case ARM_BUILTIN_WMIAWBB: - case ARM_BUILTIN_WMIAWBT: - case ARM_BUILTIN_WMIAWTB: - case ARM_BUILTIN_WMIAWTT: - case ARM_BUILTIN_WMIAWBBN: - case ARM_BUILTIN_WMIAWBTN: - case ARM_BUILTIN_WMIAWTBN: - case ARM_BUILTIN_WMIAWTTN: - case ARM_BUILTIN_WSADB: - case ARM_BUILTIN_WSADH: - icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs - : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu - : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia - : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph - : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb - : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt - : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb - : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt - : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb - : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt - : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb - : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt - : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn - : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn - : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn - : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn - : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb - : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt - : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb - : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt - : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn - : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn - : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn - : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn - : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb - : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt - : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb - : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt - : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn - : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn - : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn - : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn - : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb - : CODE_FOR_iwmmxt_wsadh); - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - tmode = insn_data[icode].operand[0].mode; - mode0 = insn_data[icode].operand[1].mode; - mode1 = insn_data[icode].operand[2].mode; - mode2 = insn_data[icode].operand[3].mode; - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) - op2 = copy_to_mode_reg (mode2, op2); - if (target == 0 - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - pat = GEN_FCN (icode) (target, op0, op1, op2); - if (! pat) - return 0; - emit_insn (pat); - return target; - - case ARM_BUILTIN_WZERO: - target = gen_reg_rtx (DImode); - emit_insn (gen_iwmmxt_clrdi (target)); - return target; - - case ARM_BUILTIN_WSRLHI: - case ARM_BUILTIN_WSRLWI: - case ARM_BUILTIN_WSRLDI: - case ARM_BUILTIN_WSLLHI: - case ARM_BUILTIN_WSLLWI: - case ARM_BUILTIN_WSLLDI: - case ARM_BUILTIN_WSRAHI: - case ARM_BUILTIN_WSRAWI: - case ARM_BUILTIN_WSRADI: - case ARM_BUILTIN_WRORHI: - case ARM_BUILTIN_WRORWI: - case ARM_BUILTIN_WRORDI: - case ARM_BUILTIN_WSRLH: - case ARM_BUILTIN_WSRLW: - case ARM_BUILTIN_WSRLD: - case ARM_BUILTIN_WSLLH: - case ARM_BUILTIN_WSLLW: - case ARM_BUILTIN_WSLLD: - case ARM_BUILTIN_WSRAH: - case ARM_BUILTIN_WSRAW: - case ARM_BUILTIN_WSRAD: - case ARM_BUILTIN_WRORH: - case ARM_BUILTIN_WRORW: - case ARM_BUILTIN_WRORD: - icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt - : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt - : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt - : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt - : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt - : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt - : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt - : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt - : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt - : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3 - : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3 - : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3 - : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di - : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di - : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di - : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di - : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di - : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di - : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di - : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di - : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di - : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di - : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di - : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di - : CODE_FOR_nothing); - arg1 = CALL_EXPR_ARG (exp, 1); - op1 = expand_normal (arg1); - if (GET_MODE (op1) == VOIDmode) - { - imm = INTVAL (op1); - if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI - || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW) - && (imm < 0 || imm > 32)) - { - if (fcode == ARM_BUILTIN_WRORHI) - error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code."); - else if (fcode == ARM_BUILTIN_WRORWI) - error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code."); - else if (fcode == ARM_BUILTIN_WRORH) - error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code."); - else - error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code."); - } - else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD) - && (imm < 0 || imm > 64)) - { - if (fcode == ARM_BUILTIN_WRORDI) - error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code."); - else - error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code."); - } - else if (imm < 0) - { - if (fcode == ARM_BUILTIN_WSRLHI) - error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code."); - else if (fcode == ARM_BUILTIN_WSRLWI) - error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code."); - else if (fcode == ARM_BUILTIN_WSRLDI) - error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code."); - else if (fcode == ARM_BUILTIN_WSLLHI) - error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code."); - else if (fcode == ARM_BUILTIN_WSLLWI) - error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code."); - else if (fcode == ARM_BUILTIN_WSLLDI) - error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code."); - else if (fcode == ARM_BUILTIN_WSRAHI) - error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code."); - else if (fcode == ARM_BUILTIN_WSRAWI) - error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code."); - else if (fcode == ARM_BUILTIN_WSRADI) - error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code."); - else if (fcode == ARM_BUILTIN_WSRLH) - error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code."); - else if (fcode == ARM_BUILTIN_WSRLW) - error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code."); - else if (fcode == ARM_BUILTIN_WSRLD) - error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code."); - else if (fcode == ARM_BUILTIN_WSLLH) - error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code."); - else if (fcode == ARM_BUILTIN_WSLLW) - error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code."); - else if (fcode == ARM_BUILTIN_WSLLD) - error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code."); - else if (fcode == ARM_BUILTIN_WSRAH) - error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code."); - else if (fcode == ARM_BUILTIN_WSRAW) - error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code."); - else - error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code."); - } - } - return arm_expand_binop_builtin (icode, exp, target); - - default: - break; - } - - for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) - if (d->code == (const enum arm_builtins) fcode) - return arm_expand_binop_builtin (d->icode, exp, target); - - for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) - if (d->code == (const enum arm_builtins) fcode) - return arm_expand_unop_builtin (d->icode, exp, target, 0); - - for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++) - if (d->code == (const enum arm_builtins) fcode) - return arm_expand_ternop_builtin (d->icode, exp, target); - - /* @@@ Should really do something sensible here. */ - return NULL_RTX; -} /* Return the number (counting from 0) of the least significant set bit in MASK. */ @@ -25751,11 +23503,12 @@ number_of_first_bit_set (unsigned mask) to be saved; REAL_REGS is the set of registers to be described as saved. If REAL_REGS is 0, only describe the stack adjustment. */ -static rtx +static rtx_insn * thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs) { unsigned long regno; - rtx par[10], tmp, reg, insn; + rtx par[10], tmp, reg; + rtx_insn *insn; int i, j; /* Build the parallel of the registers actually being stored. */ @@ -25884,7 +23637,7 @@ thumb_exit (FILE *f, int reg_containing_return_addr) int pops_needed; unsigned available; unsigned required; - int mode; + machine_mode mode; int size; int restore_a4 = FALSE; @@ -26151,7 +23904,7 @@ thumb_exit (FILE *f, int reg_containing_return_addr) For Thumb-1, we track the status of the condition codes; this information is used in the cbranchsi4_insn pattern. */ void -thumb1_final_prescan_insn (rtx insn) +thumb1_final_prescan_insn (rtx_insn *insn) { if (flag_print_asm_name) asm_fprintf (asm_out_file, "%@ 0x%04x\n", @@ -26220,7 +23973,7 @@ thumb_shiftable_const (unsigned HOST_WIDE_INT val) static int thumb_far_jump_used_p (void) { - rtx insn; + rtx_insn *insn; bool far_jump = false; unsigned int func_size = 0; @@ -26574,7 +24327,7 @@ static struct machine_function * arm_init_machine_status (void) { struct machine_function *machine; - machine = ggc_alloc_cleared_machine_function (); + machine = ggc_cleared_alloc<machine_function> (); #if ARM_FT_UNKNOWN != 0 machine->func_type = ARM_FT_UNKNOWN; @@ -26669,7 +24422,7 @@ thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to) void thumb1_expand_prologue (void) { - rtx insn; + rtx_insn *insn; HOST_WIDE_INT amount; arm_stack_offsets *offsets; @@ -27121,7 +24874,7 @@ arm_expand_epilogue_apcs_frame (bool really_return) int saved_size = arm_get_vfp_saved_size (); if (saved_size > 0) { - rtx insn; + rtx_insn *insn; floats_from_frame += saved_size; insn = emit_insn (gen_addsi3 (ip_rtx, hard_frame_pointer_rtx, @@ -27160,7 +24913,7 @@ arm_expand_epilogue_apcs_frame (bool really_return) { /* The frame pointer is guaranteed to be non-double-word aligned, as it is set to double-word-aligned old_stack_pointer - 4. */ - rtx insn; + rtx_insn *insn; int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1); for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--) @@ -27200,7 +24953,7 @@ arm_expand_epilogue_apcs_frame (bool really_return) num_regs = bit_count (saved_regs_mask); if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca) { - rtx insn; + rtx_insn *insn; emit_insn (gen_blockage ()); /* Unwind the stack to just below the saved registers. */ insn = emit_insn (gen_addsi3 (stack_pointer_rtx, @@ -27217,7 +24970,7 @@ arm_expand_epilogue_apcs_frame (bool really_return) { /* Interrupt handlers will have pushed the IP onto the stack, so restore it now. */ - rtx insn; + rtx_insn *insn; rtx addr = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode, stack_pointer_rtx)); @@ -27240,7 +24993,7 @@ arm_expand_epilogue_apcs_frame (bool really_return) /* Restore the original stack pointer. Before prologue, the stack was realigned and the original stack pointer saved in r0. For details, see comment in arm_expand_prologue. */ - emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0))); + emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM))); emit_jump_insn (simple_return_rtx); } @@ -27286,7 +25039,7 @@ arm_expand_epilogue (bool really_return) if (frame_pointer_needed) { - rtx insn; + rtx_insn *insn; /* Restore stack pointer if necessary. */ if (TARGET_ARM) { @@ -27343,7 +25096,7 @@ arm_expand_epilogue (bool really_return) amount = offsets->outgoing_args - offsets->saved_regs; if (amount) { - rtx tmp; + rtx_insn *tmp; /* Force out any pending memory operations that reference stacked data before stack de-allocation occurs. */ emit_insn (gen_blockage ()); @@ -27395,7 +25148,7 @@ arm_expand_epilogue (bool really_return) for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++) if (df_regs_ever_live_p (i) && !call_used_regs[i]) { - rtx insn; + rtx_insn *insn; rtx addr = gen_rtx_MEM (V2SImode, gen_rtx_POST_INC (SImode, stack_pointer_rtx)); @@ -27484,9 +25237,10 @@ arm_expand_epilogue (bool really_return) { int i, j; rtx dwarf = NULL_RTX; - rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx, - stack_pointer_rtx, - GEN_INT (crtl->args.pretend_args_size))); + rtx_insn *tmp = + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (crtl->args.pretend_args_size))); RTX_FRAME_RELATED_P (tmp) = 1; @@ -27521,7 +25275,7 @@ arm_expand_epilogue (bool really_return) /* Restore the original stack pointer. Before prologue, the stack was realigned and the original stack pointer saved in r0. For details, see comment in arm_expand_prologue. */ - emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0))); + emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM))); emit_jump_insn (simple_return_rtx); } @@ -27701,23 +25455,11 @@ thumb_output_move_mem_multiple (int n, rtx *operands) case 3: if (REGNO (operands[4]) > REGNO (operands[5])) - { - tmp = operands[4]; - operands[4] = operands[5]; - operands[5] = tmp; - } + std::swap (operands[4], operands[5]); if (REGNO (operands[5]) > REGNO (operands[6])) - { - tmp = operands[5]; - operands[5] = operands[6]; - operands[6] = tmp; - } + std::swap (operands[5], operands[6]); if (REGNO (operands[4]) > REGNO (operands[5])) - { - tmp = operands[4]; - operands[4] = operands[5]; - operands[5] = tmp; - } + std::swap (operands[4], operands[5]); output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands); output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands); @@ -28141,12 +25883,14 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, fputs ("\tldr\tr3, ", file); assemble_name (file, label); fputs ("+4\n", file); - asm_fprintf (file, "\t%s\t%r, %r, r3\n", + asm_fprintf (file, "\t%ss\t%r, %r, r3\n", mi_op, this_regno, this_regno); } else if (mi_delta != 0) { - asm_fprintf (file, "\t%s\t%r, %r, #%d\n", + /* Thumb1 unified syntax requires s suffix in instruction name when + one of the operands is immediate. */ + asm_fprintf (file, "\t%ss\t%r, %r, #%d\n", mi_op, this_regno, this_regno, mi_delta); } @@ -28179,9 +25923,13 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, fputs (":\n", file); if (flag_pic) { - /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */ + /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */ rtx tem = XEXP (DECL_RTL (function), 0); - tem = plus_constant (GET_MODE (tem), tem, -7); + /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC + pipeline offset is four rather than eight. Adjust the offset + accordingly. */ + tem = plus_constant (GET_MODE (tem), tem, + TARGET_THUMB1_ONLY ? -3 : -7); tem = gen_rtx_MINUS (GET_MODE (tem), tem, gen_rtx_SYMBOL_REF (Pmode, @@ -28290,7 +26038,7 @@ arm_output_load_gr (rtx *operands) static void arm_setup_incoming_varargs (cumulative_args_t pcum_v, - enum machine_mode mode, + machine_mode mode, tree type, int *pretend_size, int second_time ATTRIBUTE_UNUSED) @@ -28321,9 +26069,9 @@ arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED) return !TARGET_AAPCS_BASED; } -static enum machine_mode +static machine_mode arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, - enum machine_mode mode, + machine_mode mode, int *punsignedp ATTRIBUTE_UNUSED, const_tree fntype ATTRIBUTE_UNUSED, int for_return ATTRIBUTE_UNUSED) @@ -28484,7 +26232,11 @@ arm_set_return_address (rtx source, rtx scratch) addr = plus_constant (Pmode, addr, delta); } - emit_move_insn (gen_frame_mem (Pmode, addr), source); + /* The store needs to be marked as frame related in order to prevent + DSE from deleting it as dead if it is based on fp. */ + rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM)); } } @@ -28536,7 +26288,11 @@ thumb_set_return_address (rtx source, rtx scratch) else addr = plus_constant (Pmode, addr, delta); - emit_move_insn (gen_frame_mem (Pmode, addr), source); + /* The store needs to be marked as frame related in order to prevent + DSE from deleting it as dead if it is based on fp. */ + rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM)); } else emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); @@ -28544,7 +26300,7 @@ thumb_set_return_address (rtx source, rtx scratch) /* Implements target hook vector_mode_supported_p. */ bool -arm_vector_mode_supported_p (enum machine_mode mode) +arm_vector_mode_supported_p (machine_mode mode) { /* Neon also supports V2SImode, etc. listed in the clause below. */ if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode @@ -28568,7 +26324,7 @@ arm_vector_mode_supported_p (enum machine_mode mode) /* Implements target hook array_mode_supported_p. */ static bool -arm_array_mode_supported_p (enum machine_mode mode, +arm_array_mode_supported_p (machine_mode mode, unsigned HOST_WIDE_INT nelems) { if (TARGET_NEON @@ -28583,8 +26339,8 @@ arm_array_mode_supported_p (enum machine_mode mode, registers when autovectorizing for Neon, at least until multiple vector widths are supported properly by the middle-end. */ -static enum machine_mode -arm_preferred_simd_mode (enum machine_mode mode) +static machine_mode +arm_preferred_simd_mode (machine_mode mode) { if (TARGET_NEON) switch (mode) @@ -28641,7 +26397,7 @@ arm_class_likely_spilled_p (reg_class_t rclass) /* Implements target hook small_register_classes_for_mode_p. */ bool -arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) +arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED) { return TARGET_THUMB1; } @@ -28652,7 +26408,7 @@ arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) guarantee no particular behavior for out-of-range counts. */ static unsigned HOST_WIDE_INT -arm_shift_truncation_mask (enum machine_mode mode) +arm_shift_truncation_mask (machine_mode mode) { return mode == SImode ? 255 : 0; } @@ -28690,7 +26446,7 @@ arm_dbx_register_number (unsigned int regno) static rtx arm_dwarf_register_span (rtx rtl) { - enum machine_mode mode; + machine_mode mode; unsigned regno; rtx parts[16]; int nregs; @@ -28946,7 +26702,7 @@ arm_unwind_emit_set (FILE * asm_out_file, rtx p) /* Emit unwind directives for the given insn. */ static void -arm_unwind_emit (FILE * asm_out_file, rtx insn) +arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn) { rtx note, pat; bool handled_one = false; @@ -29245,7 +27001,7 @@ arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wr { int shift = INTVAL (operands[2]); char templ[50]; - enum machine_mode opmode = GET_MODE (operands[0]); + machine_mode opmode = GET_MODE (operands[0]); gcc_assert (shift >= 0); @@ -29332,7 +27088,7 @@ arm_output_iwmmxt_tinsr (rtx *operands) const char * thumb1_output_casesi (rtx *operands) { - rtx diff_vec = PATTERN (NEXT_INSN (operands[0])); + rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0]))); gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); @@ -29355,7 +27111,7 @@ thumb1_output_casesi (rtx *operands) const char * thumb2_output_casesi (rtx *operands) { - rtx diff_vec = PATTERN (NEXT_INSN (operands[2])); + rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2]))); gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); @@ -29392,10 +27148,14 @@ arm_issue_rate (void) { switch (arm_tune) { + case xgene1: + return 4; + case cortexa15: case cortexa57: return 3; + case cortexm7: case cortexr4: case cortexr4f: case cortexr5: @@ -29405,6 +27165,7 @@ arm_issue_rate (void) case cortexa8: case cortexa9: case cortexa12: + case cortexa17: case cortexa53: case fa726te: case marvell_pj4: @@ -29415,50 +27176,26 @@ arm_issue_rate (void) } } -/* A table and a function to perform ARM-specific name mangling for - NEON vector types in order to conform to the AAPCS (see "Procedure - Call Standard for the ARM Architecture", Appendix A). To qualify - for emission with the mangled names defined in that document, a - vector type must not only be of the correct mode but also be - composed of NEON vector element types (e.g. __builtin_neon_qi). */ -typedef struct +/* Return how many instructions should scheduler lookahead to choose the + best one. */ +static int +arm_first_cycle_multipass_dfa_lookahead (void) { - enum machine_mode mode; - const char *element_type_name; - const char *aapcs_name; -} arm_mangle_map_entry; - -static arm_mangle_map_entry arm_mangle_map[] = { - /* 64-bit containerized types. */ - { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" }, - { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" }, - { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" }, - { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" }, - { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" }, - { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" }, - { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" }, - { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" }, - { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" }, - { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" }, - - /* 128-bit containerized types. */ - { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" }, - { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" }, - { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" }, - { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" }, - { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" }, - { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" }, - { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" }, - { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" }, - { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" }, - { VOIDmode, NULL, NULL } -}; + int issue_rate = arm_issue_rate (); + + return issue_rate > 1 && !sched_fusion ? issue_rate : 0; +} + +/* Enable modeling of L2 auto-prefetcher. */ +static int +arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index) +{ + return autopref_multipass_dfa_lookahead_guard (insn, ready_index); +} const char * arm_mangle_type (const_tree type) { - arm_mangle_map_entry *pos = arm_mangle_map; - /* The ARM ABI documents (10th October 2008) say that "__va_list" has to be managled as if it is in the "std" namespace. */ if (TARGET_AAPCS_BASED @@ -29469,26 +27206,12 @@ arm_mangle_type (const_tree type) if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) return "Dh"; - if (TREE_CODE (type) != VECTOR_TYPE) - return NULL; - - /* Check the mode of the vector type, and the name of the vector - element type, against the table. */ - while (pos->mode != VOIDmode) - { - tree elt_type = TREE_TYPE (type); - - if (pos->mode == TYPE_MODE (type) - && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL - && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))), - pos->element_type_name)) - return pos->aapcs_name; + /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a + builtin type. */ + if (TYPE_NAME (type) != NULL) + return arm_mangle_builtin_type (type); - pos++; - } - - /* Use the default mangling for unrecognized (possibly user-defined) - vector types. */ + /* Use the default mangling. */ return NULL; } @@ -29534,60 +27257,6 @@ arm_have_conditional_execution (void) return !TARGET_THUMB1; } -tree -arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) -{ - enum machine_mode in_mode, out_mode; - int in_n, out_n; - - if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE - || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations)) - return NULL_TREE; - - out_mode = TYPE_MODE (TREE_TYPE (type_out)); - out_n = TYPE_VECTOR_SUBPARTS (type_out); - in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); - -/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the - decl of the vectorized builtin for the appropriate vector mode. - NULL_TREE is returned if no such builtin is available. */ -#undef ARM_CHECK_BUILTIN_MODE -#define ARM_CHECK_BUILTIN_MODE(C) \ - (out_mode == SFmode && out_n == C \ - && in_mode == SFmode && in_n == C) - -#undef ARM_FIND_VRINT_VARIANT -#define ARM_FIND_VRINT_VARIANT(N) \ - (ARM_CHECK_BUILTIN_MODE (2) \ - ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \ - : (ARM_CHECK_BUILTIN_MODE (4) \ - ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \ - : NULL_TREE)) - - if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) - { - enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); - switch (fn) - { - case BUILT_IN_FLOORF: - return ARM_FIND_VRINT_VARIANT (vrintm); - case BUILT_IN_CEILF: - return ARM_FIND_VRINT_VARIANT (vrintp); - case BUILT_IN_TRUNCF: - return ARM_FIND_VRINT_VARIANT (vrintz); - case BUILT_IN_ROUNDF: - return ARM_FIND_VRINT_VARIANT (vrinta); - default: - return NULL_TREE; - } - } - return NULL_TREE; -} -#undef ARM_CHECK_BUILTIN_MODE -#undef ARM_FIND_VRINT_VARIANT - /* The AAPCS sets the maximum alignment of a vector to 64 bits. */ static HOST_WIDE_INT arm_vector_alignment (const_tree type) @@ -29618,7 +27287,7 @@ arm_vector_alignment_reachable (const_tree type, bool is_packed) } static bool -arm_builtin_support_vector_misalignment (enum machine_mode mode, +arm_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, bool is_packed) { @@ -29655,8 +27324,7 @@ arm_conditional_register_usage (void) /* When optimizing for size on Thumb-1, it's better not to use the HI regs, because of the overhead of stacking them. */ - for (regno = FIRST_HI_REGNUM; - regno <= LAST_HI_REGNUM; ++regno) + for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno) fixed_regs[regno] = call_used_regs[regno] = 1; } @@ -29842,7 +27510,7 @@ arm_post_atomic_barrier (enum memmodel model) Use acquire and release versions if necessary. */ static void -arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq) +arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq) { rtx (*gen) (rtx, rtx); @@ -29875,7 +27543,7 @@ arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq) } static void -arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, +arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval, rtx mem, bool rel) { rtx (*gen) (rtx, rtx, rtx); @@ -29925,7 +27593,7 @@ void arm_expand_compare_and_swap (rtx operands[]) { rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; - enum machine_mode mode; + machine_mode mode; rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); bval = operands[0]; @@ -30006,10 +27674,11 @@ void arm_split_compare_and_swap (rtx operands[]) { rtx rval, mem, oldval, newval, scratch; - enum machine_mode mode; + machine_mode mode; enum memmodel mod_s, mod_f; bool is_weak; - rtx label1, label2, x, cond; + rtx_code_label *label1, *label2; + rtx x, cond; rval = operands[0]; mem = operands[1]; @@ -30035,7 +27704,7 @@ arm_split_compare_and_swap (rtx operands[]) if (!(use_acquire || use_release)) arm_pre_atomic_barrier (mod_s); - label1 = NULL_RTX; + label1 = NULL; if (!is_weak) { label1 = gen_label_rtx (); @@ -30083,9 +27752,10 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, rtx value, rtx model_rtx, rtx cond) { enum memmodel model = (enum memmodel) INTVAL (model_rtx); - enum machine_mode mode = GET_MODE (mem); - enum machine_mode wmode = (mode == DImode ? DImode : SImode); - rtx label, x; + machine_mode mode = GET_MODE (mem); + machine_mode wmode = (mode == DImode ? DImode : SImode); + rtx_code_label *label; + rtx x; bool use_acquire = TARGET_HAVE_LDACQ && !(model == MEMMODEL_RELAXED @@ -30177,7 +27847,7 @@ struct expand_vec_perm_d { rtx target, op0, op1; unsigned char perm[MAX_VECT_LEN]; - enum machine_mode vmode; + machine_mode vmode; unsigned char nelt; bool one_vector_p; bool testing_p; @@ -30188,7 +27858,7 @@ struct expand_vec_perm_d static void arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) { - enum machine_mode vmode = GET_MODE (target); + machine_mode vmode = GET_MODE (target); bool one_vector_p = rtx_equal_p (op0, op1); gcc_checking_assert (vmode == V8QImode || vmode == V16QImode); @@ -30227,7 +27897,7 @@ arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) { - enum machine_mode vmode = GET_MODE (target); + machine_mode vmode = GET_MODE (target); unsigned int i, nelt = GET_MODE_NUNITS (vmode); bool one_vector_p = rtx_equal_p (op0, op1); rtx rmask[MAX_VECT_LEN], mask; @@ -30385,7 +28055,7 @@ static bool arm_evpc_neon_vrev (struct expand_vec_perm_d *d) { unsigned int i, j, diff, nelt = d->nelt; - rtx (*gen)(rtx, rtx, rtx); + rtx (*gen)(rtx, rtx); if (!d->one_vector_p) return false; @@ -30449,9 +28119,7 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d) if (d->testing_p) return true; - /* ??? The third operand is an artifact of the builtin infrastructure - and is ignored by the actual instruction. */ - emit_insn (gen (d->target, d->op0, const0_rtx)); + emit_insn (gen (d->target, d->op0)); return true; } @@ -30595,7 +28263,7 @@ static bool arm_evpc_neon_vtbl (struct expand_vec_perm_d *d) { rtx rperm[MAX_VECT_LEN], sel; - enum machine_mode vmode = d->vmode; + machine_mode vmode = d->vmode; unsigned int i, nelt = d->nelt; /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's @@ -30720,7 +28388,7 @@ arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ static bool -arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, +arm_vectorize_vec_perm_const_ok (machine_mode vmode, const unsigned char *sel) { struct expand_vec_perm_d d; @@ -30761,7 +28429,7 @@ arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, } bool -arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code) +arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code) { /* If we are soft float and we do not have ldrd then all auto increment forms are ok. */ @@ -31011,7 +28679,7 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in, ORR (SHIFT (ASHIFT, in_up, scratch1), out_down))); if (code == ASHIFTRT) { - rtx done_label = gen_label_rtx (); + rtx_code_label *done_label = gen_label_rtx (); emit_jump_insn (BRANCH (LT, done_label)); emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2), out_down))); @@ -31030,7 +28698,7 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in, if (code == ASHIFTRT) { - rtx done_label = gen_label_rtx (); + rtx_code_label *done_label = gen_label_rtx (); emit_jump_insn (BRANCH (LT, done_label)); emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2))); emit_insn (SET (out_down, ORR (out_down, scratch2))); @@ -31065,7 +28733,7 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) { enum rtx_code code = GET_CODE (*comparison); int code_int; - enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) + machine_mode mode = (GET_MODE (*op1) == VOIDmode) ? GET_MODE (*op2) : GET_MODE (*op1); gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode); @@ -31108,6 +28776,586 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) } +/* Maximum number of instructions to set block of memory. */ +static int +arm_block_set_max_insns (void) +{ + if (optimize_function_for_size_p (cfun)) + return 4; + else + return current_tune->max_insns_inline_memset; +} + +/* Return TRUE if it's profitable to set block of memory for + non-vectorized case. VAL is the value to set the memory + with. LENGTH is the number of bytes to set. ALIGN is the + alignment of the destination memory in bytes. UNALIGNED_P + is TRUE if we can only set the memory with instructions + meeting alignment requirements. USE_STRD_P is TRUE if we + can use strd to set the memory. */ +static bool +arm_block_set_non_vect_profit_p (rtx val, + unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT align, + bool unaligned_p, bool use_strd_p) +{ + int num = 0; + /* For leftovers in bytes of 0-7, we can set the memory block using + strb/strh/str with minimum instruction number. */ + const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3}; + + if (unaligned_p) + { + num = arm_const_inline_cost (SET, val); + num += length / align + length % align; + } + else if (use_strd_p) + { + num = arm_const_double_inline_cost (val); + num += (length >> 3) + leftover[length & 7]; + } + else + { + num = arm_const_inline_cost (SET, val); + num += (length >> 2) + leftover[length & 3]; + } + + /* We may be able to combine last pair STRH/STRB into a single STR + by shifting one byte back. */ + if (unaligned_access && length > 3 && (length & 3) == 3) + num--; + + return (num <= arm_block_set_max_insns ()); +} + +/* Return TRUE if it's profitable to set block of memory for + vectorized case. LENGTH is the number of bytes to set. + ALIGN is the alignment of destination memory in bytes. + MODE is the vector mode used to set the memory. */ +static bool +arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT align, + machine_mode mode) +{ + int num; + bool unaligned_p = ((align & 3) != 0); + unsigned int nelt = GET_MODE_NUNITS (mode); + + /* Instruction loading constant value. */ + num = 1; + /* Instructions storing the memory. */ + num += (length + nelt - 1) / nelt; + /* Instructions adjusting the address expression. Only need to + adjust address expression if it's 4 bytes aligned and bytes + leftover can only be stored by mis-aligned store instruction. */ + if (!unaligned_p && (length & 3) != 0) + num++; + + /* Store the first 16 bytes using vst1:v16qi for the aligned case. */ + if (!unaligned_p && mode == V16QImode) + num--; + + return (num <= arm_block_set_max_insns ()); +} + +/* Set a block of memory using vectorization instructions for the + unaligned case. We fill the first LENGTH bytes of the memory + area starting from DSTBASE with byte constant VALUE. ALIGN is + the alignment requirement of memory. Return TRUE if succeeded. */ +static bool +arm_block_set_unaligned_vect (rtx dstbase, + unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT value, + unsigned HOST_WIDE_INT align) +{ + unsigned int i, j, nelt_v16, nelt_v8, nelt_mode; + rtx dst, mem; + rtx val_elt, val_vec, reg; + rtx rval[MAX_VECT_LEN]; + rtx (*gen_func) (rtx, rtx); + machine_mode mode; + unsigned HOST_WIDE_INT v = value; + + gcc_assert ((align & 0x3) != 0); + nelt_v8 = GET_MODE_NUNITS (V8QImode); + nelt_v16 = GET_MODE_NUNITS (V16QImode); + if (length >= nelt_v16) + { + mode = V16QImode; + gen_func = gen_movmisalignv16qi; + } + else + { + mode = V8QImode; + gen_func = gen_movmisalignv8qi; + } + nelt_mode = GET_MODE_NUNITS (mode); + gcc_assert (length >= nelt_mode); + /* Skip if it isn't profitable. */ + if (!arm_block_set_vect_profit_p (length, align, mode)) + return false; + + dst = copy_addr_to_reg (XEXP (dstbase, 0)); + mem = adjust_automodify_address (dstbase, mode, dst, 0); + + v = sext_hwi (v, BITS_PER_WORD); + val_elt = GEN_INT (v); + for (j = 0; j < nelt_mode; j++) + rval[j] = val_elt; + + reg = gen_reg_rtx (mode); + val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval)); + /* Emit instruction loading the constant value. */ + emit_move_insn (reg, val_vec); + + /* Handle nelt_mode bytes in a vector. */ + for (i = 0; (i + nelt_mode <= length); i += nelt_mode) + { + emit_insn ((*gen_func) (mem, reg)); + if (i + 2 * nelt_mode <= length) + emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode))); + } + + /* If there are not less than nelt_v8 bytes leftover, we must be in + V16QI mode. */ + gcc_assert ((i + nelt_v8) > length || mode == V16QImode); + + /* Handle (8, 16) bytes leftover. */ + if (i + nelt_v8 < length) + { + emit_insn (gen_add2_insn (dst, GEN_INT (length - i))); + /* We are shifting bytes back, set the alignment accordingly. */ + if ((length & 1) != 0 && align >= 2) + set_mem_align (mem, BITS_PER_UNIT); + + emit_insn (gen_movmisalignv16qi (mem, reg)); + } + /* Handle (0, 8] bytes leftover. */ + else if (i < length && i + nelt_v8 >= length) + { + if (mode == V16QImode) + { + reg = gen_lowpart (V8QImode, reg); + mem = adjust_automodify_address (dstbase, V8QImode, dst, 0); + } + emit_insn (gen_add2_insn (dst, GEN_INT ((length - i) + + (nelt_mode - nelt_v8)))); + /* We are shifting bytes back, set the alignment accordingly. */ + if ((length & 1) != 0 && align >= 2) + set_mem_align (mem, BITS_PER_UNIT); + + emit_insn (gen_movmisalignv8qi (mem, reg)); + } + + return true; +} + +/* Set a block of memory using vectorization instructions for the + aligned case. We fill the first LENGTH bytes of the memory area + starting from DSTBASE with byte constant VALUE. ALIGN is the + alignment requirement of memory. Return TRUE if succeeded. */ +static bool +arm_block_set_aligned_vect (rtx dstbase, + unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT value, + unsigned HOST_WIDE_INT align) +{ + unsigned int i, j, nelt_v8, nelt_v16, nelt_mode; + rtx dst, addr, mem; + rtx val_elt, val_vec, reg; + rtx rval[MAX_VECT_LEN]; + machine_mode mode; + unsigned HOST_WIDE_INT v = value; + + gcc_assert ((align & 0x3) == 0); + nelt_v8 = GET_MODE_NUNITS (V8QImode); + nelt_v16 = GET_MODE_NUNITS (V16QImode); + if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN) + mode = V16QImode; + else + mode = V8QImode; + + nelt_mode = GET_MODE_NUNITS (mode); + gcc_assert (length >= nelt_mode); + /* Skip if it isn't profitable. */ + if (!arm_block_set_vect_profit_p (length, align, mode)) + return false; + + dst = copy_addr_to_reg (XEXP (dstbase, 0)); + + v = sext_hwi (v, BITS_PER_WORD); + val_elt = GEN_INT (v); + for (j = 0; j < nelt_mode; j++) + rval[j] = val_elt; + + reg = gen_reg_rtx (mode); + val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval)); + /* Emit instruction loading the constant value. */ + emit_move_insn (reg, val_vec); + + i = 0; + /* Handle first 16 bytes specially using vst1:v16qi instruction. */ + if (mode == V16QImode) + { + mem = adjust_automodify_address (dstbase, mode, dst, 0); + emit_insn (gen_movmisalignv16qi (mem, reg)); + i += nelt_mode; + /* Handle (8, 16) bytes leftover using vst1:v16qi again. */ + if (i + nelt_v8 < length && i + nelt_v16 > length) + { + emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode))); + mem = adjust_automodify_address (dstbase, mode, dst, 0); + /* We are shifting bytes back, set the alignment accordingly. */ + if ((length & 0x3) == 0) + set_mem_align (mem, BITS_PER_UNIT * 4); + else if ((length & 0x1) == 0) + set_mem_align (mem, BITS_PER_UNIT * 2); + else + set_mem_align (mem, BITS_PER_UNIT); + + emit_insn (gen_movmisalignv16qi (mem, reg)); + return true; + } + /* Fall through for bytes leftover. */ + mode = V8QImode; + nelt_mode = GET_MODE_NUNITS (mode); + reg = gen_lowpart (V8QImode, reg); + } + + /* Handle 8 bytes in a vector. */ + for (; (i + nelt_mode <= length); i += nelt_mode) + { + addr = plus_constant (Pmode, dst, i); + mem = adjust_automodify_address (dstbase, mode, addr, i); + emit_move_insn (mem, reg); + } + + /* Handle single word leftover by shifting 4 bytes back. We can + use aligned access for this case. */ + if (i + UNITS_PER_WORD == length) + { + addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD); + mem = adjust_automodify_address (dstbase, mode, + addr, i - UNITS_PER_WORD); + /* We are shifting 4 bytes back, set the alignment accordingly. */ + if (align > UNITS_PER_WORD) + set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD); + + emit_move_insn (mem, reg); + } + /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back. + We have to use unaligned access for this case. */ + else if (i < length) + { + emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode))); + mem = adjust_automodify_address (dstbase, mode, dst, 0); + /* We are shifting bytes back, set the alignment accordingly. */ + if ((length & 1) == 0) + set_mem_align (mem, BITS_PER_UNIT * 2); + else + set_mem_align (mem, BITS_PER_UNIT); + + emit_insn (gen_movmisalignv8qi (mem, reg)); + } + + return true; +} + +/* Set a block of memory using plain strh/strb instructions, only + using instructions allowed by ALIGN on processor. We fill the + first LENGTH bytes of the memory area starting from DSTBASE + with byte constant VALUE. ALIGN is the alignment requirement + of memory. */ +static bool +arm_block_set_unaligned_non_vect (rtx dstbase, + unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT value, + unsigned HOST_WIDE_INT align) +{ + unsigned int i; + rtx dst, addr, mem; + rtx val_exp, val_reg, reg; + machine_mode mode; + HOST_WIDE_INT v = value; + + gcc_assert (align == 1 || align == 2); + + if (align == 2) + v |= (value << BITS_PER_UNIT); + + v = sext_hwi (v, BITS_PER_WORD); + val_exp = GEN_INT (v); + /* Skip if it isn't profitable. */ + if (!arm_block_set_non_vect_profit_p (val_exp, length, + align, true, false)) + return false; + + dst = copy_addr_to_reg (XEXP (dstbase, 0)); + mode = (align == 2 ? HImode : QImode); + val_reg = force_reg (SImode, val_exp); + reg = gen_lowpart (mode, val_reg); + + for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode)) + { + addr = plus_constant (Pmode, dst, i); + mem = adjust_automodify_address (dstbase, mode, addr, i); + emit_move_insn (mem, reg); + } + + /* Handle single byte leftover. */ + if (i + 1 == length) + { + reg = gen_lowpart (QImode, val_reg); + addr = plus_constant (Pmode, dst, i); + mem = adjust_automodify_address (dstbase, QImode, addr, i); + emit_move_insn (mem, reg); + i++; + } + + gcc_assert (i == length); + return true; +} + +/* Set a block of memory using plain strd/str/strh/strb instructions, + to permit unaligned copies on processors which support unaligned + semantics for those instructions. We fill the first LENGTH bytes + of the memory area starting from DSTBASE with byte constant VALUE. + ALIGN is the alignment requirement of memory. */ +static bool +arm_block_set_aligned_non_vect (rtx dstbase, + unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT value, + unsigned HOST_WIDE_INT align) +{ + unsigned int i; + rtx dst, addr, mem; + rtx val_exp, val_reg, reg; + unsigned HOST_WIDE_INT v; + bool use_strd_p; + + use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0 + && TARGET_LDRD && current_tune->prefer_ldrd_strd); + + v = (value | (value << 8) | (value << 16) | (value << 24)); + if (length < UNITS_PER_WORD) + v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT); + + if (use_strd_p) + v |= (v << BITS_PER_WORD); + else + v = sext_hwi (v, BITS_PER_WORD); + + val_exp = GEN_INT (v); + /* Skip if it isn't profitable. */ + if (!arm_block_set_non_vect_profit_p (val_exp, length, + align, false, use_strd_p)) + { + if (!use_strd_p) + return false; + + /* Try without strd. */ + v = (v >> BITS_PER_WORD); + v = sext_hwi (v, BITS_PER_WORD); + val_exp = GEN_INT (v); + use_strd_p = false; + if (!arm_block_set_non_vect_profit_p (val_exp, length, + align, false, use_strd_p)) + return false; + } + + i = 0; + dst = copy_addr_to_reg (XEXP (dstbase, 0)); + /* Handle double words using strd if possible. */ + if (use_strd_p) + { + val_reg = force_reg (DImode, val_exp); + reg = val_reg; + for (; (i + 8 <= length); i += 8) + { + addr = plus_constant (Pmode, dst, i); + mem = adjust_automodify_address (dstbase, DImode, addr, i); + emit_move_insn (mem, reg); + } + } + else + val_reg = force_reg (SImode, val_exp); + + /* Handle words. */ + reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg); + for (; (i + 4 <= length); i += 4) + { + addr = plus_constant (Pmode, dst, i); + mem = adjust_automodify_address (dstbase, SImode, addr, i); + if ((align & 3) == 0) + emit_move_insn (mem, reg); + else + emit_insn (gen_unaligned_storesi (mem, reg)); + } + + /* Merge last pair of STRH and STRB into a STR if possible. */ + if (unaligned_access && i > 0 && (i + 3) == length) + { + addr = plus_constant (Pmode, dst, i - 1); + mem = adjust_automodify_address (dstbase, SImode, addr, i - 1); + /* We are shifting one byte back, set the alignment accordingly. */ + if ((align & 1) == 0) + set_mem_align (mem, BITS_PER_UNIT); + + /* Most likely this is an unaligned access, and we can't tell at + compilation time. */ + emit_insn (gen_unaligned_storesi (mem, reg)); + return true; + } + + /* Handle half word leftover. */ + if (i + 2 <= length) + { + reg = gen_lowpart (HImode, val_reg); + addr = plus_constant (Pmode, dst, i); + mem = adjust_automodify_address (dstbase, HImode, addr, i); + if ((align & 1) == 0) + emit_move_insn (mem, reg); + else + emit_insn (gen_unaligned_storehi (mem, reg)); + + i += 2; + } + + /* Handle single byte leftover. */ + if (i + 1 == length) + { + reg = gen_lowpart (QImode, val_reg); + addr = plus_constant (Pmode, dst, i); + mem = adjust_automodify_address (dstbase, QImode, addr, i); + emit_move_insn (mem, reg); + } + + return true; +} + +/* Set a block of memory using vectorization instructions for both + aligned and unaligned cases. We fill the first LENGTH bytes of + the memory area starting from DSTBASE with byte constant VALUE. + ALIGN is the alignment requirement of memory. */ +static bool +arm_block_set_vect (rtx dstbase, + unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT value, + unsigned HOST_WIDE_INT align) +{ + /* Check whether we need to use unaligned store instruction. */ + if (((align & 3) != 0 || (length & 3) != 0) + /* Check whether unaligned store instruction is available. */ + && (!unaligned_access || BYTES_BIG_ENDIAN)) + return false; + + if ((align & 3) == 0) + return arm_block_set_aligned_vect (dstbase, length, value, align); + else + return arm_block_set_unaligned_vect (dstbase, length, value, align); +} + +/* Expand string store operation. Firstly we try to do that by using + vectorization instructions, then try with ARM unaligned access and + double-word store if profitable. OPERANDS[0] is the destination, + OPERANDS[1] is the number of bytes, operands[2] is the value to + initialize the memory, OPERANDS[3] is the known alignment of the + destination. */ +bool +arm_gen_setmem (rtx *operands) +{ + rtx dstbase = operands[0]; + unsigned HOST_WIDE_INT length; + unsigned HOST_WIDE_INT value; + unsigned HOST_WIDE_INT align; + + if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1])) + return false; + + length = UINTVAL (operands[1]); + if (length > 64) + return false; + + value = (UINTVAL (operands[2]) & 0xFF); + align = UINTVAL (operands[3]); + if (TARGET_NEON && length >= 8 + && current_tune->string_ops_prefer_neon + && arm_block_set_vect (dstbase, length, value, align)) + return true; + + if (!unaligned_access && (align & 3) != 0) + return arm_block_set_unaligned_non_vect (dstbase, length, value, align); + + return arm_block_set_aligned_non_vect (dstbase, length, value, align); +} + + +static bool +arm_macro_fusion_p (void) +{ + return current_tune->fuseable_ops != ARM_FUSE_NOTHING; +} + + +static bool +aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr) +{ + rtx set_dest; + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + if (!prev_set + || !curr_set) + return false; + + if (any_condjump_p (curr)) + return false; + + if (!arm_macro_fusion_p ()) + return false; + + if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT) + { + /* We are trying to fuse + movw imm / movt imm + instructions as a group that gets scheduled together. */ + + set_dest = SET_DEST (curr_set); + + if (GET_MODE (set_dest) != SImode) + return false; + + /* We are trying to match: + prev (movw) == (set (reg r0) (const_int imm16)) + curr (movt) == (set (zero_extract (reg r0) + (const_int 16) + (const_int 16)) + (const_int imm16_1)) + or + prev (movw) == (set (reg r1) + (high (symbol_ref ("SYM")))) + curr (movt) == (set (reg r0) + (lo_sum (reg r1) + (symbol_ref ("SYM")))) */ + if (GET_CODE (set_dest) == ZERO_EXTRACT) + { + if (CONST_INT_P (SET_SRC (curr_set)) + && CONST_INT_P (SET_SRC (prev_set)) + && REG_P (XEXP (set_dest, 0)) + && REG_P (SET_DEST (prev_set)) + && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) + return true; + } + else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM + && REG_P (SET_DEST (curr_set)) + && REG_P (SET_DEST (prev_set)) + && GET_CODE (SET_SRC (prev_set)) == HIGH + && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) + return true; + } + return false; +} + /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ static unsigned HOST_WIDE_INT @@ -31116,4 +29364,175 @@ arm_asan_shadow_offset (void) return (unsigned HOST_WIDE_INT) 1 << 29; } + +/* This is a temporary fix for PR60655. Ideally we need + to handle most of these cases in the generic part but + currently we reject minus (..) (sym_ref). We try to + ameliorate the case with minus (sym_ref1) (sym_ref2) + where they are in the same section. */ + +static bool +arm_const_not_ok_for_debug_p (rtx p) +{ + tree decl_op0 = NULL; + tree decl_op1 = NULL; + + if (GET_CODE (p) == MINUS) + { + if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF) + { + decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1)); + if (decl_op1 + && GET_CODE (XEXP (p, 0)) == SYMBOL_REF + && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0)))) + { + if ((TREE_CODE (decl_op1) == VAR_DECL + || TREE_CODE (decl_op1) == CONST_DECL) + && (TREE_CODE (decl_op0) == VAR_DECL + || TREE_CODE (decl_op0) == CONST_DECL)) + return (get_variable_section (decl_op1, false) + != get_variable_section (decl_op0, false)); + + if (TREE_CODE (decl_op1) == LABEL_DECL + && TREE_CODE (decl_op0) == LABEL_DECL) + return (DECL_CONTEXT (decl_op1) + != DECL_CONTEXT (decl_op0)); + } + + return true; + } + } + + return false; +} + +/* return TRUE if x is a reference to a value in a constant pool */ +extern bool +arm_is_constant_pool_ref (rtx x) +{ + return (MEM_P (x) + && GET_CODE (XEXP (x, 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))); +} + +/* If MEM is in the form of [base+offset], extract the two parts + of address and set to BASE and OFFSET, otherwise return false + after clearing BASE and OFFSET. */ + +static bool +extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) +{ + rtx addr; + + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + /* Strip off const from addresses like (const (addr)). */ + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == REG) + { + *base = addr; + *offset = const0_rtx; + return true; + } + + if (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == REG + && CONST_INT_P (XEXP (addr, 1))) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return true; + } + + *base = NULL_RTX; + *offset = NULL_RTX; + + return false; +} + +/* If INSN is a load or store of address in the form of [base+offset], + extract the two parts and set to BASE and OFFSET. IS_LOAD is set + to TRUE if it's a load. Return TRUE if INSN is such an instruction, + otherwise return FALSE. */ + +static bool +fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load) +{ + rtx x, dest, src; + + gcc_assert (INSN_P (insn)); + x = PATTERN (insn); + if (GET_CODE (x) != SET) + return false; + + src = SET_SRC (x); + dest = SET_DEST (x); + if (GET_CODE (src) == REG && GET_CODE (dest) == MEM) + { + *is_load = false; + extract_base_offset_in_addr (dest, base, offset); + } + else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG) + { + *is_load = true; + extract_base_offset_in_addr (src, base, offset); + } + else + return false; + + return (*base != NULL_RTX && *offset != NULL_RTX); +} + +/* Implement the TARGET_SCHED_FUSION_PRIORITY hook. + + Currently we only support to fuse ldr or str instructions, so FUSION_PRI + and PRI are only calculated for these instructions. For other instruction, + FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind + instruction fusion can be supported by returning different priorities. + + It's important that irrelevant instructions get the largest FUSION_PRI. */ + +static void +arm_sched_fusion_priority (rtx_insn *insn, int max_pri, + int *fusion_pri, int *pri) +{ + int tmp, off_val; + bool is_load; + rtx base, offset; + + gcc_assert (INSN_P (insn)); + + tmp = max_pri - 1; + if (!fusion_load_store (insn, &base, &offset, &is_load)) + { + *pri = tmp; + *fusion_pri = tmp; + return; + } + + /* Load goes first. */ + if (is_load) + *fusion_pri = tmp - 1; + else + *fusion_pri = tmp - 2; + + tmp /= 2; + + /* INSN with smaller base register goes first. */ + tmp -= ((REGNO (base) & 0xff) << 20); + + /* INSN with smaller offset goes first. */ + off_val = (int)(INTVAL (offset)); + if (off_val >= 0) + tmp -= (off_val & 0xfffff); + else + tmp += ((- off_val) & 0xfffff); + + *pri = tmp; + return; +} #include "gt-arm.h" |