summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/rs6000.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r--gcc/config/rs6000/rs6000.c5373
1 files changed, 3812 insertions, 1561 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index c50019198d6..aad4c02132d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1,5 +1,5 @@
/* Subroutines used for code generation on IBM RS/6000.
- Copyright (C) 1991-2014 Free Software Foundation, Inc.
+ Copyright (C) 1991-2015 Free Software Foundation, Inc.
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
This file is part of GCC.
@@ -31,23 +31,50 @@
#include "flags.h"
#include "recog.h"
#include "obstack.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "vec.h"
+#include "double-int.h"
+#include "input.h"
+#include "alias.h"
+#include "symtab.h"
+#include "wide-int.h"
+#include "inchash.h"
#include "tree.h"
+#include "fold-const.h"
#include "stringpool.h"
#include "stor-layout.h"
#include "calls.h"
#include "print-tree.h"
#include "varasm.h"
+#include "hashtab.h"
+#include "function.h"
+#include "statistics.h"
+#include "real.h"
+#include "fixed-value.h"
+#include "expmed.h"
+#include "dojump.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "stmt.h"
#include "expr.h"
+#include "insn-codes.h"
#include "optabs.h"
#include "except.h"
-#include "function.h"
#include "output.h"
#include "dbxout.h"
+#include "predict.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfgrtl.h"
+#include "cfganal.h"
+#include "lcm.h"
+#include "cfgbuild.h"
+#include "cfgcleanup.h"
#include "basic-block.h"
#include "diagnostic-core.h"
#include "toplev.h"
#include "ggc.h"
-#include "hashtab.h"
#include "tm_p.h"
#include "target.h"
#include "target-def.h"
@@ -56,10 +83,7 @@
#include "reload.h"
#include "cfgloop.h"
#include "sched-int.h"
-#include "pointer-set.h"
#include "hash-table.h"
-#include "vec.h"
-#include "basic-block.h"
#include "tree-ssa-alias.h"
#include "internal-fn.h"
#include "gimple-fold.h"
@@ -77,8 +101,14 @@
#include "opts.h"
#include "tree-vectorizer.h"
#include "dumpfile.h"
+#include "hash-map.h"
+#include "plugin-api.h"
+#include "ipa-ref.h"
#include "cgraph.h"
#include "target-globals.h"
+#include "builtins.h"
+#include "context.h"
+#include "tree-pass.h"
#if TARGET_XCOFF
#include "xcoffout.h" /* get declarations of xcoff_*_section_name */
#endif
@@ -140,8 +170,6 @@ typedef struct rs6000_stack {
This is added to the cfun structure. */
typedef struct GTY(()) machine_function
{
- /* Some local-dynamic symbol. */
- const char *some_ld_name;
/* Whether the instruction chain has been scanned already. */
int insn_chain_scanned_p;
/* Flags if __builtin_return_address (n) with n >= 1 was used. */
@@ -379,6 +407,7 @@ typedef unsigned char addr_mask_type;
#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
+#define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
/* Register type masks based on the type, of valid addressing modes. */
struct rs6000_reg_addr {
@@ -388,13 +417,14 @@ struct rs6000_reg_addr {
enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
+ bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
};
static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
static inline bool
-mode_supports_pre_incdec_p (enum machine_mode mode)
+mode_supports_pre_incdec_p (machine_mode mode)
{
return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
!= 0);
@@ -402,7 +432,7 @@ mode_supports_pre_incdec_p (enum machine_mode mode)
/* Helper function to say whether a mode supports PRE_MODIFY. */
static inline bool
-mode_supports_pre_modify_p (enum machine_mode mode)
+mode_supports_pre_modify_p (machine_mode mode)
{
return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
!= 0);
@@ -1060,7 +1090,7 @@ static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
static tree (*rs6000_veclib_handler) (tree, tree, tree);
-static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool);
+static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
static bool spe_func_has_64bit_regs_p (void);
static struct machine_function * rs6000_init_machine_status (void);
static int rs6000_ra_ever_killed (void);
@@ -1068,24 +1098,24 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
-static rtx rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
-static int rs6000_memory_move_cost (enum machine_mode, reg_class_t, bool);
+static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
+static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
-static int rs6000_debug_address_cost (rtx, enum machine_mode, addr_space_t,
+static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
bool);
-static int rs6000_debug_adjust_cost (rtx, rtx, rtx, int);
-static bool is_microcoded_insn (rtx);
-static bool is_nonpipeline_insn (rtx);
-static bool is_cracked_insn (rtx);
+static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
+static bool is_microcoded_insn (rtx_insn *);
+static bool is_nonpipeline_insn (rtx_insn *);
+static bool is_cracked_insn (rtx_insn *);
static bool is_load_insn (rtx, rtx *);
static bool is_store_insn (rtx, rtx *);
-static bool set_to_load_agen (rtx,rtx);
-static bool insn_terminates_group_p (rtx , enum group_termination);
-static bool insn_must_be_first_in_group (rtx);
-static bool insn_must_be_last_in_group (rtx);
+static bool set_to_load_agen (rtx_insn *,rtx_insn *);
+static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
+static bool insn_must_be_first_in_group (rtx_insn *);
+static bool insn_must_be_last_in_group (rtx_insn *);
static void altivec_init_builtins (void);
-static tree builtin_function_type (enum machine_mode, enum machine_mode,
- enum machine_mode, enum machine_mode,
+static tree builtin_function_type (machine_mode, machine_mode,
+ machine_mode, machine_mode,
enum rs6000_builtins, const char *name);
static void rs6000_common_init_builtins (void);
static void paired_init_builtins (void);
@@ -1097,44 +1127,42 @@ static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
static rs6000_stack_t *rs6000_stack_info (void);
static void is_altivec_return_reg (rtx, void *);
-int easy_vector_constant (rtx, enum machine_mode);
-static rtx rs6000_debug_legitimize_address (rtx, rtx, enum machine_mode);
+int easy_vector_constant (rtx, machine_mode);
+static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
-static int rs6000_tls_symbol_ref_1 (rtx *, void *);
-static int rs6000_get_some_local_dynamic_name_1 (rtx *, void *);
static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
bool, bool);
#if TARGET_MACHO
static void macho_branch_islands (void);
#endif
-static rtx rs6000_legitimize_reload_address (rtx, enum machine_mode, int, int,
+static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
int, int *);
-static rtx rs6000_debug_legitimize_reload_address (rtx, enum machine_mode, int,
+static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
int, int, int *);
static bool rs6000_mode_dependent_address (const_rtx);
static bool rs6000_debug_mode_dependent_address (const_rtx);
static enum reg_class rs6000_secondary_reload_class (enum reg_class,
- enum machine_mode, rtx);
+ machine_mode, rtx);
static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
- enum machine_mode,
+ machine_mode,
rtx);
static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
static enum reg_class rs6000_debug_preferred_reload_class (rtx,
enum reg_class);
static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
- enum machine_mode);
+ machine_mode);
static bool rs6000_debug_secondary_memory_needed (enum reg_class,
enum reg_class,
- enum machine_mode);
-static bool rs6000_cannot_change_mode_class (enum machine_mode,
- enum machine_mode,
+ machine_mode);
+static bool rs6000_cannot_change_mode_class (machine_mode,
+ machine_mode,
enum reg_class);
-static bool rs6000_debug_cannot_change_mode_class (enum machine_mode,
- enum machine_mode,
+static bool rs6000_debug_cannot_change_mode_class (machine_mode,
+ machine_mode,
enum reg_class);
static bool rs6000_save_toc_in_prologue_p (void);
-rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, int, int,
+rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
int, int *)
= rs6000_legitimize_reload_address;
@@ -1142,18 +1170,18 @@ static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
= rs6000_mode_dependent_address;
enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
- enum machine_mode, rtx)
+ machine_mode, rtx)
= rs6000_secondary_reload_class;
enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
= rs6000_preferred_reload_class;
bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
- enum machine_mode)
+ machine_mode)
= rs6000_secondary_memory_needed;
-bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
- enum machine_mode,
+bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
+ machine_mode,
enum reg_class)
= rs6000_cannot_change_mode_class;
@@ -1167,33 +1195,46 @@ static void rs6000_print_builtin_options (FILE *, int, const char *,
static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
enum rs6000_reg_type,
- enum machine_mode,
+ machine_mode,
secondary_reload_info *,
bool);
+rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
/* Hash table stuff for keeping track of TOC entries. */
-struct GTY(()) toc_hash_struct
+struct GTY((for_user)) toc_hash_struct
{
/* `key' will satisfy CONSTANT_P; in fact, it will satisfy
ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
rtx key;
- enum machine_mode key_mode;
+ machine_mode key_mode;
int labelno;
};
-static GTY ((param_is (struct toc_hash_struct))) htab_t toc_hash_table;
+struct toc_hasher : ggc_hasher<toc_hash_struct *>
+{
+ static hashval_t hash (toc_hash_struct *);
+ static bool equal (toc_hash_struct *, toc_hash_struct *);
+};
+
+static GTY (()) hash_table<toc_hasher> *toc_hash_table;
/* Hash table to keep track of the argument types for builtin functions. */
-struct GTY(()) builtin_hash_struct
+struct GTY((for_user)) builtin_hash_struct
{
tree type;
- enum machine_mode mode[4]; /* return value + 3 arguments. */
+ machine_mode mode[4]; /* return value + 3 arguments. */
unsigned char uns_p[4]; /* and whether the types are unsigned. */
};
-static GTY ((param_is (struct builtin_hash_struct))) htab_t builtin_hash_table;
+struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
+{
+ static hashval_t hash (builtin_hash_struct *);
+ static bool equal (builtin_hash_struct *, builtin_hash_struct *);
+};
+
+static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
/* Default register names. */
@@ -1221,7 +1262,12 @@ char rs6000_reg_names[][8] =
/* Soft frame pointer. */
"sfp",
/* HTM SPR registers. */
- "tfhar", "tfiar", "texasr"
+ "tfhar", "tfiar", "texasr",
+ /* SPE High registers. */
+ "0", "1", "2", "3", "4", "5", "6", "7",
+ "8", "9", "10", "11", "12", "13", "14", "15",
+ "16", "17", "18", "19", "20", "21", "22", "23",
+ "24", "25", "26", "27", "28", "29", "30", "31"
};
#ifdef TARGET_REGNAMES
@@ -1249,7 +1295,12 @@ static const char alt_reg_names[][8] =
/* Soft frame pointer. */
"sfp",
/* HTM SPR registers. */
- "tfhar", "tfiar", "texasr"
+ "tfhar", "tfiar", "texasr",
+ /* SPE High registers. */
+ "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
+ "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
+ "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
+ "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
};
#endif
@@ -1460,10 +1511,8 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_MEMBER_TYPE_FORCES_BLK
#define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
-/* On rs6000, function arguments are promoted, as are function return
- values. */
#undef TARGET_PROMOTE_FUNCTION_MODE
-#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
+#define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
@@ -1518,6 +1567,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS rs6000_md_asm_clobbers
+
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE rs6000_option_override
@@ -1621,6 +1673,16 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
+
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
+#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
+#define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
/* Processor table. */
@@ -1670,7 +1732,7 @@ rs6000_cpu_name_lookup (const char *name)
PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
static int
-rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
+rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
{
unsigned HOST_WIDE_INT reg_size;
@@ -1692,7 +1754,7 @@ rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
SCmode so as to pass the value correctly in a pair of
registers. */
else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
- && !DECIMAL_FLOAT_MODE_P (mode))
+ && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
reg_size = UNITS_PER_FP_WORD;
else
@@ -1704,7 +1766,7 @@ rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
/* Value is 1 if hard register REGNO can hold a value of machine-mode
MODE. */
static int
-rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
+rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
{
int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
@@ -1723,8 +1785,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
asked for it. */
if (TARGET_VSX && VSX_REGNO_P (regno)
&& (VECTOR_MEM_VSX_P (mode)
- || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
- || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
+ || reg_addr[mode].scalar_in_vmx_p
|| (TARGET_VSX_TIMODE && mode == TImode)
|| (TARGET_VADDUQM && mode == V1TImode)))
{
@@ -1733,10 +1794,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
if (ALTIVEC_REGNO_P (regno))
{
- if (mode == SFmode && !TARGET_UPPER_REGS_SF)
- return 0;
-
- if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
+ if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
return 0;
return ALTIVEC_REGNO_P (last_regno);
@@ -1752,9 +1810,6 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
modes and DImode. */
if (FP_REGNO_P (regno))
{
- if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
- return 0;
-
if (SCALAR_FLOAT_MODE_P (mode)
&& (mode != TDmode || (regno % 2) == 0)
&& FP_REGNO_P (last_regno))
@@ -1776,13 +1831,17 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
return GET_MODE_CLASS (mode) == MODE_CC;
if (CA_REGNO_P (regno))
- return mode == BImode;
+ return mode == Pmode || mode == SImode;
/* AltiVec only in AldyVec registers. */
if (ALTIVEC_REGNO_P (regno))
return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
|| mode == V1TImode);
+ /* ...but GPRs can hold SIMD data on the SPE in one register. */
+ if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
+ return 1;
+
/* We cannot put non-VSX TImode or PTImode anywhere except general register
and it must be able to fit within the register set. */
@@ -1891,6 +1950,54 @@ rs6000_debug_vector_unit (enum rs6000_vector v)
return ret;
}
+/* Inner function printing just the address mask for a particular reload
+ register class. */
+DEBUG_FUNCTION char *
+rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
+{
+ static char ret[8];
+ char *p = ret;
+
+ if ((mask & RELOAD_REG_VALID) != 0)
+ *p++ = 'v';
+ else if (keep_spaces)
+ *p++ = ' ';
+
+ if ((mask & RELOAD_REG_MULTIPLE) != 0)
+ *p++ = 'm';
+ else if (keep_spaces)
+ *p++ = ' ';
+
+ if ((mask & RELOAD_REG_INDEXED) != 0)
+ *p++ = 'i';
+ else if (keep_spaces)
+ *p++ = ' ';
+
+ if ((mask & RELOAD_REG_OFFSET) != 0)
+ *p++ = 'o';
+ else if (keep_spaces)
+ *p++ = ' ';
+
+ if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
+ *p++ = '+';
+ else if (keep_spaces)
+ *p++ = ' ';
+
+ if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
+ *p++ = '+';
+ else if (keep_spaces)
+ *p++ = ' ';
+
+ if ((mask & RELOAD_REG_AND_M16) != 0)
+ *p++ = '&';
+ else if (keep_spaces)
+ *p++ = ' ';
+
+ *p = '\0';
+
+ return ret;
+}
+
/* Print the address masks in a human readble fashion. */
DEBUG_FUNCTION void
rs6000_debug_print_mode (ssize_t m)
@@ -1899,30 +2006,22 @@ rs6000_debug_print_mode (ssize_t m)
fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
for (rc = 0; rc < N_RELOAD_REG; rc++)
- {
- addr_mask_type mask = reg_addr[m].addr_mask[rc];
- fprintf (stderr,
- " %s: %c%c%c%c%c%c",
- reload_reg_map[rc].name,
- (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ',
- (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ',
- (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ',
- (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ',
- (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
- (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
- }
+ fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
+ rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
if (rs6000_vector_unit[m] != VECTOR_NONE
|| rs6000_vector_mem[m] != VECTOR_NONE
|| (reg_addr[m].reload_store != CODE_FOR_nothing)
- || (reg_addr[m].reload_load != CODE_FOR_nothing))
+ || (reg_addr[m].reload_load != CODE_FOR_nothing)
+ || reg_addr[m].scalar_in_vmx_p)
{
fprintf (stderr,
- " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c",
+ " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
rs6000_debug_vector_unit (rs6000_vector_unit[m]),
rs6000_debug_vector_unit (rs6000_vector_mem[m]),
(reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
- (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
+ (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
+ (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
}
fputs ("\n", stderr);
@@ -1952,7 +2051,7 @@ rs6000_debug_reg_global (void)
struct cl_target_option cl_opts;
/* Modes we want tieable information on. */
- static const enum machine_mode print_tieable_modes[] = {
+ static const machine_mode print_tieable_modes[] = {
QImode,
HImode,
SImode,
@@ -2039,6 +2138,10 @@ rs6000_debug_reg_global (void)
"wd reg_class = %s\n"
"wf reg_class = %s\n"
"wg reg_class = %s\n"
+ "wh reg_class = %s\n"
+ "wi reg_class = %s\n"
+ "wj reg_class = %s\n"
+ "wk reg_class = %s\n"
"wl reg_class = %s\n"
"wm reg_class = %s\n"
"wr reg_class = %s\n"
@@ -2058,6 +2161,10 @@ rs6000_debug_reg_global (void)
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
@@ -2078,13 +2185,13 @@ rs6000_debug_reg_global (void)
for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
{
- enum machine_mode mode1 = print_tieable_modes[m1];
+ machine_mode mode1 = print_tieable_modes[m1];
bool first_time = true;
nl = (const char *)0;
for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
{
- enum machine_mode mode2 = print_tieable_modes[m2];
+ machine_mode mode2 = print_tieable_modes[m2];
if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
{
if (first_time)
@@ -2282,6 +2389,24 @@ rs6000_debug_reg_global (void)
if (rs6000_float_gprs)
fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
+ fprintf (stderr, DEBUG_FMT_S, "fprs",
+ (TARGET_FPRS ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "single_float",
+ (TARGET_SINGLE_FLOAT ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "double_float",
+ (TARGET_DOUBLE_FLOAT ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "soft_float",
+ (TARGET_SOFT_FLOAT ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "e500_single",
+ (TARGET_E500_SINGLE ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "e500_double",
+ (TARGET_E500_DOUBLE ? "true" : "false"));
+
if (TARGET_LINK_STACK)
fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
@@ -2328,6 +2453,8 @@ rs6000_setup_reg_addr_masks (void)
for (m = 0; m < NUM_MACHINE_MODES; ++m)
{
+ machine_mode m2 = (machine_mode)m;
+
/* SDmode is special in that we want to access it only via REG+REG
addressing on power7 and above, since we want to use the LFIWZX and
STFIWZX instructions to load it. */
@@ -2356,19 +2483,15 @@ rs6000_setup_reg_addr_masks (void)
/* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
addressing. Restrict addressing on SPE for 64-bit types
because of the SUBREG hackery used to address 64-bit floats in
- '32-bit' GPRs. To simplify secondary reload, don't allow
- update forms on scalar floating point types that can go in the
- upper registers. */
+ '32-bit' GPRs. */
if (TARGET_UPDATE
&& (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
- && GET_MODE_SIZE (m) <= 8
- && !VECTOR_MODE_P (m)
- && !COMPLEX_MODE_P (m)
+ && GET_MODE_SIZE (m2) <= 8
+ && !VECTOR_MODE_P (m2)
+ && !COMPLEX_MODE_P (m2)
&& !indexed_only_p
- && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8)
- && !(m == DFmode && TARGET_UPPER_REGS_DF)
- && !(m == SFmode && TARGET_UPPER_REGS_SF))
+ && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
{
addr_mask |= RELOAD_REG_PRE_INCDEC;
@@ -2401,6 +2524,12 @@ rs6000_setup_reg_addr_masks (void)
&& (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
addr_mask |= RELOAD_REG_OFFSET;
+ /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
+ addressing on 128-bit types. */
+ if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
+ && (addr_mask & RELOAD_REG_VALID) != 0)
+ addr_mask |= RELOAD_REG_AND_M16;
+
reg_addr[m].addr_mask[rc] = addr_mask;
any_addr_mask |= addr_mask;
}
@@ -2438,7 +2567,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
- rs6000_regno_regclass[CA_REGNO] = CA_REGS;
+ rs6000_regno_regclass[CA_REGNO] = NO_REGS;
rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
@@ -2567,13 +2696,19 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_vector_align[V1TImode] = 128;
}
- /* DFmode, see if we want to use the VSX unit. */
+ /* DFmode, see if we want to use the VSX unit. Memory is handled
+ differently, so don't set rs6000_vector_mem. */
if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
{
rs6000_vector_unit[DFmode] = VECTOR_VSX;
- rs6000_vector_mem[DFmode]
- = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
- rs6000_vector_align[DFmode] = align64;
+ rs6000_vector_align[DFmode] = 64;
+ }
+
+ /* SFmode, see if we want to use the VSX unit. */
+ if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
+ {
+ rs6000_vector_unit[SFmode] = VECTOR_VSX;
+ rs6000_vector_align[SFmode] = 32;
}
/* Allow TImode in VSX register and set the VSX memory macros. */
@@ -2599,37 +2734,44 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
f - Register class to use with traditional SFmode instructions.
v - Altivec register.
wa - Any VSX register.
+ wc - Reserved to represent individual CR bits (used in LLVM).
wd - Preferred register class for V2DFmode.
wf - Preferred register class for V4SFmode.
wg - Float register for power6x move insns.
+ wh - FP register for direct move instructions.
+ wi - FP or VSX register to hold 64-bit integers for VSX insns.
+ wj - FP or VSX register to hold 64-bit integers for direct moves.
+ wk - FP or VSX register to hold 64-bit doubles for direct moves.
wl - Float register if we can do 32-bit signed int loads.
wm - VSX register for ISA 2.07 direct move operations.
+ wn - always NO_REGS.
wr - GPR if 64-bit mode is permitted.
ws - Register class to do ISA 2.06 DF operations.
+ wt - VSX register for TImode in VSX registers.
wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
- wt - VSX register for TImode in VSX registers.
ww - Register class to do SF conversions in with VSX operations.
wx - Float register if we can do 32-bit int stores.
wy - Register class to do ISA 2.07 SF operations.
wz - Float register if we can do 32-bit unsigned int loads. */
if (TARGET_HARD_FLOAT && TARGET_FPRS)
- rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
- rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
if (TARGET_VSX)
{
rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
- rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
- rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
+ rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
+ rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
if (TARGET_VSX_TIMODE)
- rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
- if (TARGET_UPPER_REGS_DF)
+ if (TARGET_UPPER_REGS_DF) /* DFmode */
{
rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
@@ -2643,19 +2785,26 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
if (TARGET_ALTIVEC)
rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
- if (TARGET_MFPGPR)
+ if (TARGET_MFPGPR) /* DFmode */
rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
if (TARGET_LFIWAX)
- rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
if (TARGET_DIRECT_MOVE)
- rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+ {
+ rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
+ = rs6000_constraints[RS6000_CONSTRAINT_wi];
+ rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
+ = rs6000_constraints[RS6000_CONSTRAINT_ws];
+ rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+ }
if (TARGET_POWERPC64)
rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
- if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)
+ if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
{
rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
@@ -2670,10 +2819,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
if (TARGET_STFIWX)
- rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
if (TARGET_LFIWZX)
- rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
/* Set up the reload helper and direct move functions. */
if (TARGET_VSX || TARGET_ALTIVEC)
@@ -2694,55 +2843,42 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
- if (TARGET_VSX && TARGET_UPPER_REGS_DF)
- {
- reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
- reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
- reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
- reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
- }
- if (TARGET_P8_VECTOR)
- {
- reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
- reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
- reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
- reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
- }
+ reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
+ reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
+ reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
+ reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
+ reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
+ reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
+ reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
+ reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
+
if (TARGET_VSX_TIMODE)
{
reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
}
+
if (TARGET_DIRECT_MOVE)
{
- if (TARGET_POWERPC64)
- {
- reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
- reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
- reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
- reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
- reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
- reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
- reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
- reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
- reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
-
- reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
- reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
- reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
- reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
- reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
- reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
- reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
- reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
- reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
- }
- else
- {
- reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
- reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
- reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
- }
+ reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
+ reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
+ reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
+ reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
+ reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
+ reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
+ reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
+ reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
+ reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
+
+ reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
+ reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
+ reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
+ reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
+ reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
+ reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
+ reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
+ reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
+ reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
}
}
else
@@ -2761,38 +2897,46 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
- if (TARGET_VSX && TARGET_UPPER_REGS_DF)
- {
- reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
- reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
- reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
- reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
- }
- if (TARGET_P8_VECTOR)
- {
- reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
- reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
- reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
- reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
- }
+ reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
+ reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
+ reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
+ reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
+ reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
+ reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
+ reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
+ reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
+
if (TARGET_VSX_TIMODE)
{
reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
}
+
+ if (TARGET_DIRECT_MOVE)
+ {
+ reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
+ reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
+ reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
+ }
}
+
+ if (TARGET_UPPER_REGS_DF)
+ reg_addr[DFmode].scalar_in_vmx_p = true;
+
+ if (TARGET_UPPER_REGS_SF)
+ reg_addr[SFmode].scalar_in_vmx_p = true;
}
/* Precalculate HARD_REGNO_NREGS. */
for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
for (m = 0; m < NUM_MACHINE_MODES; ++m)
rs6000_hard_regno_nregs[m][r]
- = rs6000_hard_regno_nregs_internal (r, (enum machine_mode)m);
+ = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
/* Precalculate HARD_REGNO_MODE_OK. */
for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
for (m = 0; m < NUM_MACHINE_MODES; ++m)
- if (rs6000_hard_regno_mode_ok (r, (enum machine_mode)m))
+ if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
rs6000_hard_regno_mode_ok_p[m][r] = true;
/* Precalculate CLASS_MAX_NREGS sizes. */
@@ -2814,6 +2958,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
for (m = 0; m < NUM_MACHINE_MODES; ++m)
{
+ machine_mode m2 = (machine_mode)m;
int reg_size2 = reg_size;
/* TFmode/TDmode always takes 2 registers, even in VSX. */
@@ -2822,7 +2967,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
reg_size2 = UNITS_PER_FP_WORD;
rs6000_class_max_nregs[m][c]
- = (GET_MODE_SIZE (m) + reg_size2 - 1) / reg_size2;
+ = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
}
}
@@ -3016,7 +3161,23 @@ rs6000_builtin_mask_calculate (void)
| ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
| ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
| ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
- | ((TARGET_HTM) ? RS6000_BTM_HTM : 0));
+ | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
+ | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
+ | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
+ | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
+}
+
+/* Implement TARGET_MD_ASM_CLOBBERS. All asm statements are considered
+ to clobber the XER[CA] bit because clobbering that bit without telling
+ the compiler worked just fine with versions of GCC before GCC 5, and
+ breaking a lot of older code in ways that are hard to track down is
+ not such a great idea. */
+
+static tree
+rs6000_md_asm_clobbers (tree, tree, tree clobbers)
+{
+ tree s = build_string (strlen (reg_names[CA_REGNO]), reg_names[CA_REGNO]);
+ return tree_cons (NULL_TREE, s, clobbers);
}
/* Override command line options. Mostly we process the processor type and
@@ -3373,6 +3534,61 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
}
+ if (TARGET_DFP && !TARGET_HARD_FLOAT)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
+ error ("-mhard-dfp requires -mhard-float");
+ rs6000_isa_flags &= ~OPTION_MASK_DFP;
+ }
+
+ /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
+ -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
+ the individual option. */
+ if (TARGET_UPPER_REGS > 0)
+ {
+ if (TARGET_VSX
+ && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
+ {
+ rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
+ rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
+ }
+ if (TARGET_P8_VECTOR
+ && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
+ {
+ rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
+ rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
+ }
+ }
+ else if (TARGET_UPPER_REGS == 0)
+ {
+ if (TARGET_VSX
+ && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
+ {
+ rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
+ rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
+ }
+ if (TARGET_P8_VECTOR
+ && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
+ {
+ rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
+ rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
+ }
+ }
+
+ if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
+ error ("-mupper-regs-df requires -mvsx");
+ rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
+ }
+
+ if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
+ error ("-mupper-regs-sf requires -mpower8-vector");
+ rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
+ }
+
/* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
silently turn off quad memory mode. */
if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
@@ -4016,6 +4232,15 @@ static void
rs6000_option_override (void)
{
(void) rs6000_option_override_internal (true);
+
+ /* Register machine-specific passes. This needs to be done at start-up.
+ It's convenient to do it here (like i386 does). */
+ opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
+
+ struct register_pass_info analyze_swaps_info
+ = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
+
+ register_pass (&analyze_swaps_info);
}
@@ -4057,7 +4282,7 @@ rs6000_loop_align (rtx label)
/* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
static int
-rs6000_loop_align_max_skip (rtx label)
+rs6000_loop_align_max_skip (rtx_insn *label)
{
return (1 << rs6000_loop_align (label)) - 1;
}
@@ -4095,7 +4320,7 @@ rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_pac
/* Return true if the vector misalignment factor is supported by the
target. */
static bool
-rs6000_builtin_support_vector_misalignment (enum machine_mode mode,
+rs6000_builtin_support_vector_misalignment (machine_mode mode,
const_tree type,
int misalignment,
bool is_packed)
@@ -4253,8 +4478,8 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
/* Implement targetm.vectorize.preferred_simd_mode. */
-static enum machine_mode
-rs6000_preferred_simd_mode (enum machine_mode mode)
+static machine_mode
+rs6000_preferred_simd_mode (machine_mode mode)
{
if (TARGET_VSX)
switch (mode)
@@ -4426,7 +4651,7 @@ rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
tree fntype, new_fndecl, bdecl = NULL_TREE;
int n_args = 1;
const char *bname;
- enum machine_mode el_mode, in_mode;
+ machine_mode el_mode, in_mode;
int n, in_n;
/* Libmass is suitable for unsafe math only as it does not correctly support
@@ -4566,7 +4791,7 @@ static tree
rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
tree type_in)
{
- enum machine_mode in_mode, out_mode;
+ machine_mode in_mode, out_mode;
int in_n, out_n;
if (TARGET_DEBUG_BUILTIN)
@@ -4852,6 +5077,28 @@ rs6000_file_start (void)
putc ('\n', file);
}
+#ifdef USING_ELFOS_H
+ if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
+ || !global_options_set.x_rs6000_cpu_index)
+ {
+ fputs ("\t.machine ", asm_out_file);
+ if ((TARGET_DEFAULT & OPTION_MASK_DIRECT_MOVE) != 0)
+ fputs ("power8\n", asm_out_file);
+ else if ((TARGET_DEFAULT & OPTION_MASK_POPCNTD) != 0)
+ fputs ("power7\n", asm_out_file);
+ else if ((TARGET_DEFAULT & OPTION_MASK_CMPB) != 0)
+ fputs ("power6\n", asm_out_file);
+ else if ((TARGET_DEFAULT & OPTION_MASK_POPCNTB) != 0)
+ fputs ("power5\n", asm_out_file);
+ else if ((TARGET_DEFAULT & OPTION_MASK_MFCRF) != 0)
+ fputs ("power4\n", asm_out_file);
+ else if ((TARGET_DEFAULT & OPTION_MASK_POWERPC64) != 0)
+ fputs ("ppc64\n", asm_out_file);
+ else
+ fputs ("ppc\n", asm_out_file);
+ }
+#endif
+
if (DEFAULT_ABI == ABI_ELFv2)
fprintf (file, "\t.abiversion 2\n");
@@ -4893,7 +5140,7 @@ int
num_insns_constant_wide (HOST_WIDE_INT value)
{
/* signed constant loadable with addi */
- if ((unsigned HOST_WIDE_INT) (value + 0x8000) < 0x10000)
+ if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
return 1;
/* constant loadable with addis */
@@ -4925,7 +5172,7 @@ num_insns_constant_wide (HOST_WIDE_INT value)
}
int
-num_insns_constant (rtx op, enum machine_mode mode)
+num_insns_constant (rtx op, machine_mode mode)
{
HOST_WIDE_INT low, high;
@@ -4938,6 +5185,15 @@ num_insns_constant (rtx op, enum machine_mode mode)
else
return num_insns_constant_wide (INTVAL (op));
+ case CONST_WIDE_INT:
+ {
+ int i;
+ int ins = CONST_WIDE_INT_NUNITS (op) - 1;
+ for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
+ ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
+ return ins;
+ }
+
case CONST_DOUBLE:
if (mode == SFmode || mode == SDmode)
{
@@ -5019,8 +5275,8 @@ const_vector_elt_as_int (rtx op, unsigned int elt)
static bool
vspltis_constant (rtx op, unsigned step, unsigned copies)
{
- enum machine_mode mode = GET_MODE (op);
- enum machine_mode inner = GET_MODE_INNER (mode);
+ machine_mode mode = GET_MODE (op);
+ machine_mode inner = GET_MODE_INNER (mode);
unsigned i;
unsigned nunits;
@@ -5096,7 +5352,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
with a vspltisb, vspltish or vspltisw. */
bool
-easy_altivec_constant (rtx op, enum machine_mode mode)
+easy_altivec_constant (rtx op, machine_mode mode)
{
unsigned step, copies;
@@ -5112,8 +5368,6 @@ easy_altivec_constant (rtx op, enum machine_mode mode)
else if (mode == V2DImode)
{
- /* In case the compiler is built 32-bit, CONST_DOUBLE constants are not
- easy. */
if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
|| GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
return false;
@@ -5166,7 +5420,7 @@ easy_altivec_constant (rtx op, enum machine_mode mode)
rtx
gen_easy_altivec_constant (rtx op)
{
- enum machine_mode mode = GET_MODE (op);
+ machine_mode mode = GET_MODE (op);
int nunits = GET_MODE_NUNITS (mode);
rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
unsigned step = nunits / 4;
@@ -5201,7 +5455,7 @@ const char *
output_vec_const_move (rtx *operands)
{
int cst, cst2;
- enum machine_mode mode;
+ machine_mode mode;
rtx dest, vec;
dest = operands[0];
@@ -5260,8 +5514,10 @@ output_vec_const_move (rtx *operands)
operands[2] = CONST_VECTOR_ELT (vec, 1);
if (cst == cst2)
return "li %0,%1\n\tevmergelo %0,%0,%0";
- else
+ else if (WORDS_BIG_ENDIAN)
return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
+ else
+ return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
}
/* Initialize TARGET of vector PAIRED to VALS. */
@@ -5269,7 +5525,7 @@ output_vec_const_move (rtx *operands)
void
paired_expand_vector_init (rtx target, rtx vals)
{
- enum machine_mode mode = GET_MODE (target);
+ machine_mode mode = GET_MODE (target);
int n_elts = GET_MODE_NUNITS (mode);
int n_var = 0;
rtx x, new_rtx, tmp, constant_op, op1, op2;
@@ -5278,9 +5534,7 @@ paired_expand_vector_init (rtx target, rtx vals)
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!(CONST_INT_P (x)
- || GET_CODE (x) == CONST_DOUBLE
- || GET_CODE (x) == CONST_FIXED))
+ if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
++n_var;
}
if (n_var == 0)
@@ -5421,8 +5675,8 @@ paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
void
rs6000_expand_vector_init (rtx target, rtx vals)
{
- enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ machine_mode mode = GET_MODE (target);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode);
int n_var = 0, one_var = -1;
bool all_same = true, all_const_zero = true;
@@ -5432,9 +5686,7 @@ rs6000_expand_vector_init (rtx target, rtx vals)
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!(CONST_INT_P (x)
- || GET_CODE (x) == CONST_DOUBLE
- || GET_CODE (x) == CONST_FIXED))
+ if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
++n_var, one_var = i;
else if (x != CONST0_RTX (inner_mode))
all_const_zero = false;
@@ -5584,8 +5836,8 @@ rs6000_expand_vector_init (rtx target, rtx vals)
void
rs6000_expand_vector_set (rtx target, rtx val, int elt)
{
- enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ machine_mode mode = GET_MODE (target);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
rtx reg = gen_reg_rtx (mode);
rtx mask, mem, x;
int width = GET_MODE_SIZE (inner_mode);
@@ -5635,11 +5887,15 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
UNSPEC_VPERM);
else
{
- /* Invert selector. */
+ /* Invert selector. We prefer to generate VNAND on P8 so
+ that future fusion opportunities can kick in, but must
+ generate VNOR elsewhere. */
rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
- rtx andx = gen_rtx_AND (V16QImode, notx, notx);
+ rtx iorx = (TARGET_P8_VECTOR
+ ? gen_rtx_IOR (V16QImode, notx, notx)
+ : gen_rtx_AND (V16QImode, notx, notx));
rtx tmp = gen_reg_rtx (V16QImode);
- emit_move_insn (tmp, andx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
/* Permute with operands reversed and adjusted selector. */
x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
@@ -5654,8 +5910,8 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
void
rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
{
- enum machine_mode mode = GET_MODE (vec);
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ machine_mode mode = GET_MODE (vec);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
rtx mem;
if (VECTOR_MEM_VSX_P (mode))
@@ -5757,7 +6013,7 @@ build_mask64_2_operands (rtx in, rtx *out)
/* Return TRUE if OP is an invalid SUBREG operation on the e500. */
bool
-invalid_e500_subreg (rtx op, enum machine_mode mode)
+invalid_e500_subreg (rtx op, machine_mode mode)
{
if (TARGET_E500_DOUBLE)
{
@@ -5838,6 +6094,32 @@ rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
return align;
}
+/* Previous GCC releases forced all vector types to have 16-byte alignment. */
+
+bool
+rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
+{
+ if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
+ {
+ if (computed != 128)
+ {
+ static bool warned;
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "the layout of aggregates containing vectors with"
+ " %d-byte alignment has changed in GCC 5",
+ computed / BITS_PER_UNIT);
+ }
+ }
+ /* In current GCC there is no special case. */
+ return false;
+ }
+
+ return false;
+}
+
/* AIX increases natural record alignment to doubleword if the first
field is an FP double while the FP fields remain word aligned. */
@@ -5903,7 +6185,7 @@ darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
int
small_data_operand (rtx op ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED)
+ machine_mode mode ATTRIBUTE_UNUSED)
{
#if TARGET_ELF
rtx sym_ref;
@@ -6071,7 +6353,7 @@ address_offset (rtx op)
of the address calculation. */
bool
-mem_operand_gpr (rtx op, enum machine_mode mode)
+mem_operand_gpr (rtx op, machine_mode mode)
{
unsigned HOST_WIDE_INT offset;
int extra;
@@ -6086,7 +6368,8 @@ mem_operand_gpr (rtx op, enum machine_mode mode)
return false;
extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
- gcc_assert (extra >= 0);
+ if (extra < 0)
+ extra = 0;
if (GET_CODE (addr) == LO_SUM)
/* For lo_sum addresses, we must allow any offset except one that
@@ -6099,7 +6382,7 @@ mem_operand_gpr (rtx op, enum machine_mode mode)
/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
static bool
-reg_offset_addressing_ok_p (enum machine_mode mode)
+reg_offset_addressing_ok_p (machine_mode mode)
{
switch (mode)
{
@@ -6167,7 +6450,7 @@ virtual_stack_registers_memory_p (rtx op)
static bool
offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
- enum machine_mode mode)
+ machine_mode mode)
{
tree decl, type;
unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
@@ -6199,7 +6482,7 @@ offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
else if (CONSTANT_POOL_ADDRESS_P (op))
{
/* It would be nice to have get_pool_align().. */
- enum machine_mode cmode = get_pool_mode (op);
+ machine_mode cmode = get_pool_mode (op);
dalign = GET_MODE_ALIGNMENT (cmode);
}
@@ -6311,7 +6594,7 @@ toc_relative_expr_p (const_rtx op, bool strict)
if X is a toc-relative address known to be offsettable within MODE. */
bool
-legitimate_constant_pool_address_p (const_rtx x, enum machine_mode mode,
+legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
bool strict)
{
return (toc_relative_expr_p (x, strict)
@@ -6323,7 +6606,7 @@ legitimate_constant_pool_address_p (const_rtx x, enum machine_mode mode,
}
static bool
-legitimate_small_data_p (enum machine_mode mode, rtx x)
+legitimate_small_data_p (machine_mode mode, rtx x)
{
return (DEFAULT_ABI == ABI_V4
&& !flag_pic && !TARGET_TOC
@@ -6335,7 +6618,7 @@ legitimate_small_data_p (enum machine_mode mode, rtx x)
#define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
bool
-rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
+rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
bool strict, bool worst_case)
{
unsigned HOST_WIDE_INT offset;
@@ -6442,7 +6725,7 @@ legitimate_indexed_address_p (rtx x, int strict)
}
bool
-avoiding_indexed_address_p (enum machine_mode mode)
+avoiding_indexed_address_p (machine_mode mode)
{
/* Avoid indexed addressing for modes that have non-indexed
load/store instruction forms. */
@@ -6456,7 +6739,7 @@ legitimate_indirect_address_p (rtx x, int strict)
}
bool
-macho_lo_sum_memory_operand (rtx x, enum machine_mode mode)
+macho_lo_sum_memory_operand (rtx x, machine_mode mode)
{
if (!TARGET_MACHO || !flag_pic
|| mode != SImode || GET_CODE (x) != MEM)
@@ -6475,7 +6758,7 @@ macho_lo_sum_memory_operand (rtx x, enum machine_mode mode)
}
static bool
-legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
+legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
{
if (GET_CODE (x) != LO_SUM)
return false;
@@ -6544,7 +6827,7 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
static rtx
rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
- enum machine_mode mode)
+ machine_mode mode)
{
unsigned int extra;
@@ -6668,6 +6951,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
&& TARGET_NO_TOC
&& ! flag_pic
&& GET_CODE (x) != CONST_INT
+ && GET_CODE (x) != CONST_WIDE_INT
&& GET_CODE (x) != CONST_DOUBLE
&& CONSTANT_P (x)
&& GET_MODE_NUNITS (mode) == 1
@@ -6694,10 +6978,10 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
/* Debug version of rs6000_legitimize_address. */
static rtx
-rs6000_debug_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
{
rtx ret;
- rtx insns;
+ rtx_insn *insns;
start_sequence ();
ret = rs6000_legitimize_address (x, oldx, mode);
@@ -6764,6 +7048,16 @@ rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
fputs ("@dtprel+0x8000", file);
}
+/* Return true if X is a symbol that refers to real (rather than emulated)
+ TLS. */
+
+static bool
+rs6000_real_tls_symbol_ref_p (rtx x)
+{
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
+}
+
/* In the name of slightly smaller debug output, and to cater to
general assembler lossage, recognize various UNSPEC sequences
and turn them back into a direct symbol reference. */
@@ -6795,24 +7089,6 @@ rs6000_delegitimize_address (rtx orig_x)
if (GET_CODE (y) == UNSPEC
&& XINT (y, 1) == UNSPEC_TOCREL)
{
-#ifdef ENABLE_CHECKING
- if (REG_P (XVECEXP (y, 0, 1))
- && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
- {
- /* All good. */
- }
- else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
- {
- /* Weirdness alert. df_note_compute can replace r2 with a
- debug_expr when this unspec is in a debug_insn.
- Seen in gcc.dg/pr51957-1.c */
- }
- else
- {
- debug_rtx (orig_x);
- abort ();
- }
-#endif
y = XVECEXP (y, 0, 0);
#ifdef HAVE_AS_TLS
@@ -6821,7 +7097,7 @@ rs6000_delegitimize_address (rtx orig_x)
if (TARGET_XCOFF
&& GET_CODE (y) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (y)
- && SYMBOL_REF_TLS_MODEL (get_pool_constant (y)) >= TLS_MODEL_REAL)
+ && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
return orig_x;
#endif
@@ -6857,7 +7133,7 @@ rs6000_const_not_ok_for_debug_p (rtx x)
&& CONSTANT_POOL_ADDRESS_P (x))
{
rtx c = get_pool_constant (x);
- enum machine_mode cmode = get_pool_mode (x);
+ machine_mode cmode = get_pool_mode (x);
if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
return true;
}
@@ -7174,21 +7450,10 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
return dest;
}
-/* Return 1 if X contains a thread-local symbol. */
-
-static bool
-rs6000_tls_referenced_p (rtx x)
-{
- if (! TARGET_HAVE_TLS)
- return false;
-
- return for_each_rtx (&x, &rs6000_tls_symbol_ref_1, 0);
-}
-
/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
static bool
-rs6000_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
if (GET_CODE (x) == HIGH
&& GET_CODE (XEXP (x, 0)) == UNSPEC)
@@ -7202,16 +7467,7 @@ rs6000_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
return true;
/* Do not place an ELF TLS symbol in the constant pool. */
- return TARGET_ELF && rs6000_tls_referenced_p (x);
-}
-
-/* Return 1 if *X is a thread-local symbol. This is the same as
- rs6000_tls_symbol_ref except for the type of the unused argument. */
-
-static int
-rs6000_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
-{
- return RS6000_SYMBOL_REF_TLS_P (*x);
+ return TARGET_ELF && tls_referenced_p (x);
}
/* Return true iff the given SYMBOL_REF refers to a constant pool entry
@@ -7242,7 +7498,7 @@ use_toc_relative_ref (rtx sym)
The Darwin code is inside #if TARGET_MACHO because only then are the
machopic_* functions defined. */
static rtx
-rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
+rs6000_legitimize_reload_address (rtx x, machine_mode mode,
int opnum, int type,
int ind_levels ATTRIBUTE_UNUSED, int *win)
{
@@ -7375,7 +7631,11 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
naturally aligned. Since we say the address is good here, we
can't disable offsets from LO_SUMs in mem_operand_gpr.
FIXME: Allow offset from lo_sum for other modes too, when
- mem is sufficiently aligned. */
+ mem is sufficiently aligned.
+
+ Also disallow this if the type can go in VMX/Altivec registers, since
+ those registers do not have d-form (reg+offset) address modes. */
+ && !reg_addr[mode].scalar_in_vmx_p
&& mode != TFmode
&& mode != TDmode
&& (mode != TImode || !TARGET_VSX_TIMODE)
@@ -7442,7 +7702,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
/* Debug version of rs6000_legitimize_reload_address. */
static rtx
-rs6000_debug_legitimize_reload_address (rtx x, enum machine_mode mode,
+rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
int opnum, int type,
int ind_levels, int *win)
{
@@ -7485,7 +7745,7 @@ rs6000_debug_legitimize_reload_address (rtx x, enum machine_mode mode,
because adjacent memory cells are accessed by adding word-sized offsets
during assembly output. */
static bool
-rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
+rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
{
bool reg_offset_p = reg_offset_addressing_ok_p (mode);
@@ -7559,7 +7819,7 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
/* Debug version of rs6000_legitimate_address_p. */
static bool
-rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
+rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
bool reg_ok_strict)
{
bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
@@ -7686,7 +7946,7 @@ rs6000_find_base_term (rtx op)
in 32-bit mode, that the recog predicate rejects. */
static bool
-rs6000_offsettable_memref_p (rtx op, enum machine_mode reg_mode)
+rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
{
bool worst_case;
@@ -7799,53 +8059,51 @@ rs6000_conditional_register_usage (void)
}
-/* Try to output insns to set TARGET equal to the constant C if it can
- be done in less than N insns. Do all computations in MODE.
- Returns the place where the output has been placed if it can be
- done and the insns have been emitted. If it would take more than N
- insns, zero is returned and no insns and emitted. */
+/* Output insns to set DEST equal to the constant SOURCE as a series of
+ lis, ori and shl instructions and return TRUE. */
-rtx
-rs6000_emit_set_const (rtx dest, enum machine_mode mode,
- rtx source, int n ATTRIBUTE_UNUSED)
+bool
+rs6000_emit_set_const (rtx dest, rtx source)
{
- rtx result, insn, set;
- HOST_WIDE_INT c0, c1;
+ machine_mode mode = GET_MODE (dest);
+ rtx temp, set;
+ rtx_insn *insn;
+ HOST_WIDE_INT c;
+ gcc_checking_assert (CONST_INT_P (source));
+ c = INTVAL (source);
switch (mode)
{
- case QImode:
+ case QImode:
case HImode:
- if (dest == NULL)
- dest = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (VOIDmode, dest, source));
- return dest;
+ return true;
case SImode:
- result = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
+ temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
- emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result),
- GEN_INT (INTVAL (source)
- & (~ (HOST_WIDE_INT) 0xffff))));
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
+ GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
emit_insn (gen_rtx_SET (VOIDmode, dest,
- gen_rtx_IOR (SImode, copy_rtx (result),
- GEN_INT (INTVAL (source) & 0xffff))));
- result = dest;
+ gen_rtx_IOR (SImode, copy_rtx (temp),
+ GEN_INT (c & 0xffff))));
break;
case DImode:
- switch (GET_CODE (source))
+ if (!TARGET_POWERPC64)
{
- case CONST_INT:
- c0 = INTVAL (source);
- c1 = -(c0 < 0);
- break;
+ rtx hi, lo;
- default:
- gcc_unreachable ();
+ hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
+ DImode);
+ lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
+ DImode);
+ emit_move_insn (hi, GEN_INT (c >> 32));
+ c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
+ emit_move_insn (lo, GEN_INT (c));
}
-
- result = rs6000_emit_set_long_const (dest, c0, c1);
+ else
+ rs6000_emit_set_long_const (dest, c);
break;
default:
@@ -7855,107 +8113,103 @@ rs6000_emit_set_const (rtx dest, enum machine_mode mode,
insn = get_last_insn ();
set = single_set (insn);
if (! CONSTANT_P (SET_SRC (set)))
- set_unique_reg_note (insn, REG_EQUAL, source);
+ set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
- return result;
+ return true;
}
-/* Having failed to find a 3 insn sequence in rs6000_emit_set_const,
- fall back to a straight forward decomposition. We do this to avoid
- exponential run times encountered when looking for longer sequences
- with rs6000_emit_set_const. */
-static rtx
-rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
-{
- if (!TARGET_POWERPC64)
- {
- rtx operand1, operand2;
+/* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
+ Output insns to set DEST equal to the constant C as a series of
+ lis, ori and shl instructions. */
- operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0,
- DImode);
- operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0,
- DImode);
- emit_move_insn (operand1, GEN_INT (c1));
- emit_move_insn (operand2, GEN_INT (c2));
+static void
+rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
+{
+ rtx temp;
+ HOST_WIDE_INT ud1, ud2, ud3, ud4;
+
+ ud1 = c & 0xffff;
+ c = c >> 16;
+ ud2 = c & 0xffff;
+ c = c >> 16;
+ ud3 = c & 0xffff;
+ c = c >> 16;
+ ud4 = c & 0xffff;
+
+ if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
+ || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
+ emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
+
+ else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
+ || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
+ {
+ temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+ emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+ GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
+ if (ud1 != 0)
+ emit_move_insn (dest,
+ gen_rtx_IOR (DImode, copy_rtx (temp),
+ GEN_INT (ud1)));
+ }
+ else if (ud3 == 0 && ud4 == 0)
+ {
+ temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+ gcc_assert (ud2 & 0x8000);
+ emit_move_insn (copy_rtx (temp),
+ GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
+ if (ud1 != 0)
+ emit_move_insn (copy_rtx (temp),
+ gen_rtx_IOR (DImode, copy_rtx (temp),
+ GEN_INT (ud1)));
+ emit_move_insn (dest,
+ gen_rtx_ZERO_EXTEND (DImode,
+ gen_lowpart (SImode,
+ copy_rtx (temp))));
+ }
+ else if ((ud4 == 0xffff && (ud3 & 0x8000))
+ || (ud4 == 0 && ! (ud3 & 0x8000)))
+ {
+ temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+ emit_move_insn (copy_rtx (temp),
+ GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
+ if (ud2 != 0)
+ emit_move_insn (copy_rtx (temp),
+ gen_rtx_IOR (DImode, copy_rtx (temp),
+ GEN_INT (ud2)));
+ emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+ gen_rtx_ASHIFT (DImode, copy_rtx (temp),
+ GEN_INT (16)));
+ if (ud1 != 0)
+ emit_move_insn (dest,
+ gen_rtx_IOR (DImode, copy_rtx (temp),
+ GEN_INT (ud1)));
}
else
{
- HOST_WIDE_INT ud1, ud2, ud3, ud4;
-
- ud1 = c1 & 0xffff;
- ud2 = (c1 & 0xffff0000) >> 16;
- c2 = c1 >> 32;
- ud3 = c2 & 0xffff;
- ud4 = (c2 & 0xffff0000) >> 16;
-
- if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
- || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
- emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
-
- else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
- || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
- {
- emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
- - 0x80000000));
- if (ud1 != 0)
- emit_move_insn (copy_rtx (dest),
- gen_rtx_IOR (DImode, copy_rtx (dest),
- GEN_INT (ud1)));
- }
- else if (ud3 == 0 && ud4 == 0)
- {
- gcc_assert (ud2 & 0x8000);
- emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
- - 0x80000000));
- if (ud1 != 0)
- emit_move_insn (copy_rtx (dest),
- gen_rtx_IOR (DImode, copy_rtx (dest),
- GEN_INT (ud1)));
- emit_move_insn (copy_rtx (dest),
- gen_rtx_ZERO_EXTEND (DImode,
- gen_lowpart (SImode,
- copy_rtx (dest))));
- }
- else if ((ud4 == 0xffff && (ud3 & 0x8000))
- || (ud4 == 0 && ! (ud3 & 0x8000)))
- {
- emit_move_insn (dest, GEN_INT (((ud3 << 16) ^ 0x80000000)
- - 0x80000000));
- if (ud2 != 0)
- emit_move_insn (copy_rtx (dest),
- gen_rtx_IOR (DImode, copy_rtx (dest),
- GEN_INT (ud2)));
- emit_move_insn (copy_rtx (dest),
- gen_rtx_ASHIFT (DImode, copy_rtx (dest),
- GEN_INT (16)));
- if (ud1 != 0)
- emit_move_insn (copy_rtx (dest),
- gen_rtx_IOR (DImode, copy_rtx (dest),
- GEN_INT (ud1)));
- }
- else
- {
- emit_move_insn (dest, GEN_INT (((ud4 << 16) ^ 0x80000000)
- - 0x80000000));
- if (ud3 != 0)
- emit_move_insn (copy_rtx (dest),
- gen_rtx_IOR (DImode, copy_rtx (dest),
- GEN_INT (ud3)));
-
- emit_move_insn (copy_rtx (dest),
- gen_rtx_ASHIFT (DImode, copy_rtx (dest),
- GEN_INT (32)));
- if (ud2 != 0)
- emit_move_insn (copy_rtx (dest),
- gen_rtx_IOR (DImode, copy_rtx (dest),
- GEN_INT (ud2 << 16)));
- if (ud1 != 0)
- emit_move_insn (copy_rtx (dest),
- gen_rtx_IOR (DImode, copy_rtx (dest),
- GEN_INT (ud1)));
- }
+ temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
+
+ emit_move_insn (copy_rtx (temp),
+ GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
+ if (ud3 != 0)
+ emit_move_insn (copy_rtx (temp),
+ gen_rtx_IOR (DImode, copy_rtx (temp),
+ GEN_INT (ud3)));
+
+ emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
+ gen_rtx_ASHIFT (DImode, copy_rtx (temp),
+ GEN_INT (32)));
+ if (ud2 != 0)
+ emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
+ gen_rtx_IOR (DImode, copy_rtx (temp),
+ GEN_INT (ud2 << 16)));
+ if (ud1 != 0)
+ emit_move_insn (dest,
+ gen_rtx_IOR (DImode, copy_rtx (temp),
+ GEN_INT (ud1)));
}
- return dest;
}
/* Helper for the following. Get rid of [r+r] memory refs
@@ -7987,7 +8241,7 @@ rs6000_eliminate_indexed_memrefs (rtx operands[2])
/* Generate a vector of constants to permute MODE for a little-endian
storage operation by swapping the two halves of a vector. */
static rtvec
-rs6000_const_vec (enum machine_mode mode)
+rs6000_const_vec (machine_mode mode)
{
int i, subparts;
rtvec v;
@@ -8028,7 +8282,7 @@ rs6000_const_vec (enum machine_mode mode)
/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
for a VSX load or store operation. */
rtx
-rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
+rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
{
rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
return gen_rtx_VEC_SELECT (mode, source, par);
@@ -8038,7 +8292,7 @@ rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
register DEST in mode MODE. The load is done with two permuting
insn's that represent an lxvd2x and xxpermdi. */
void
-rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
+rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
{
rtx tmp, permute_mem, permute_reg;
@@ -8062,7 +8316,7 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
register SOURCE in mode MODE. The store is done with two permuting
insn's that represent an xxpermdi and an stxvd2x. */
void
-rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
+rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
{
rtx tmp, permute_src, permute_tmp;
@@ -8089,7 +8343,7 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
handled with a split. The expand-time RTL generation allows
us to optimize away redundant pairs of register-permutes. */
void
-rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
+rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
{
gcc_assert (!BYTES_BIG_ENDIAN
&& VECTOR_MEM_VSX_P (mode)
@@ -8111,7 +8365,7 @@ rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
/* Emit a move from SOURCE to DEST in mode MODE. */
void
-rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
+rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
{
rtx operands[2];
operands[0] = dest;
@@ -8132,21 +8386,12 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
}
/* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
- if (GET_CODE (operands[1]) == CONST_DOUBLE
- && ! FLOAT_MODE_P (mode)
+ if (CONST_WIDE_INT_P (operands[1])
&& GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
{
- /* FIXME. This should never happen. */
- /* Since it seems that it does, do the safe thing and convert
- to a CONST_INT. */
- operands[1] = gen_int_mode (CONST_DOUBLE_LOW (operands[1]), mode);
+ /* This should be fixed with the introduction of CONST_WIDE_INT. */
+ gcc_unreachable ();
}
- gcc_assert (GET_CODE (operands[1]) != CONST_DOUBLE
- || FLOAT_MODE_P (mode)
- || ((CONST_DOUBLE_HIGH (operands[1]) != 0
- || CONST_DOUBLE_LOW (operands[1]) < 0)
- && (CONST_DOUBLE_HIGH (operands[1]) != -1
- || CONST_DOUBLE_LOW (operands[1]) >= 0)));
/* Check if GCC is setting up a block move that will end up using FP
registers as temporaries. We must make sure this is acceptable. */
@@ -8176,7 +8421,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
/* Recognize the case where operand[1] is a reference to thread-local
data and load its address to a register. */
- if (rs6000_tls_referenced_p (operands[1]))
+ if (tls_referenced_p (operands[1]))
{
enum tls_model model;
rtx tmp = operands[1];
@@ -8207,9 +8452,11 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
|| ! nonimmediate_operand (operands[0], mode)))
goto emit_set;
- /* 128-bit constant floating-point values on Darwin should really be
- loaded as two parts. */
+ /* 128-bit constant floating-point values on Darwin should really be loaded
+ as two parts. However, this premature splitting is a problem when DFmode
+ values can go into Altivec registers. */
if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
+ && !reg_addr[DFmode].scalar_in_vmx_p
&& mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
{
rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
@@ -8228,6 +8475,30 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
+ /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
+ p1:SD) if p1 is not of floating point class and p0 is spilled as
+ we can have no analogous movsd_store for this. */
+ if (lra_in_progress && mode == DDmode
+ && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+ && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+ && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
+ && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
+ {
+ enum reg_class cl;
+ int regno = REGNO (SUBREG_REG (operands[1]));
+
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ cl = reg_preferred_class (regno);
+ regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
+ }
+ if (regno >= 0 && ! FP_REGNO_P (regno))
+ {
+ mode = SDmode;
+ operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
+ operands[1] = SUBREG_REG (operands[1]);
+ }
+ }
if (lra_in_progress
&& mode == SDmode
&& REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
@@ -8258,6 +8529,30 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
gcc_unreachable();
return;
}
+ /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
+ p:DD)) if p0 is not of floating point class and p1 is spilled as
+ we can have no analogous movsd_load for this. */
+ if (lra_in_progress && mode == DDmode
+ && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
+ && GET_MODE (SUBREG_REG (operands[0])) == SDmode
+ && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+ && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+ {
+ enum reg_class cl;
+ int regno = REGNO (SUBREG_REG (operands[0]));
+
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ cl = reg_preferred_class (regno);
+ regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
+ }
+ if (regno >= 0 && ! FP_REGNO_P (regno))
+ {
+ mode = SDmode;
+ operands[0] = SUBREG_REG (operands[0]);
+ operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
+ }
+ }
if (lra_in_progress
&& mode == SDmode
&& (REG_P (operands[0])
@@ -8571,7 +8866,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
fit into 1, whereas DI still needs two. */
static bool
-rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
+rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
{
return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
|| (TARGET_E500_DOUBLE && mode == DFmode));
@@ -8598,9 +8893,9 @@ rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
sub-tree. */
static int
-rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
+rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
{
- enum machine_mode mode;
+ machine_mode mode;
HOST_WIDE_INT size;
switch (TREE_CODE (type))
@@ -8662,8 +8957,10 @@ rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
int count;
tree index = TYPE_DOMAIN (type);
- /* Can't handle incomplete types. */
- if (!COMPLETE_TYPE_P (type))
+ /* Can't handle incomplete types nor sizes that are not
+ fixed. */
+ if (!COMPLETE_TYPE_P (type)
+ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
return -1;
count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
@@ -8680,9 +8977,7 @@ rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
- tree_to_uhwi (TYPE_MIN_VALUE (index)));
/* There must be no padding. */
- if (!tree_fits_uhwi_p (TYPE_SIZE (type))
- || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep)))
+ if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -8694,8 +8989,10 @@ rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
int sub_count;
tree field;
- /* Can't handle incomplete types. */
- if (!COMPLETE_TYPE_P (type))
+ /* Can't handle incomplete types nor sizes that are not
+ fixed. */
+ if (!COMPLETE_TYPE_P (type)
+ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
return -1;
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
@@ -8710,9 +9007,7 @@ rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
}
/* There must be no padding. */
- if (!tree_fits_uhwi_p (TYPE_SIZE (type))
- || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep)))
+ if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -8726,8 +9021,10 @@ rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
int sub_count;
tree field;
- /* Can't handle incomplete types. */
- if (!COMPLETE_TYPE_P (type))
+ /* Can't handle incomplete types nor sizes that are not
+ fixed. */
+ if (!COMPLETE_TYPE_P (type)
+ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
return -1;
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
@@ -8742,9 +9039,7 @@ rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
}
/* There must be no padding. */
- if (!tree_fits_uhwi_p (TYPE_SIZE (type))
- || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
- != count * GET_MODE_BITSIZE (*modep)))
+ if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
return -1;
return count;
@@ -8765,8 +9060,8 @@ rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
static bool
-rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
- enum machine_mode *elt_mode,
+rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
+ machine_mode *elt_mode,
int *n_elts)
{
/* Note that we do not accept complex types at the top level as
@@ -8775,7 +9070,7 @@ rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
can be elements of homogeneous aggregates, however. */
if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
{
- enum machine_mode field_mode = VOIDmode;
+ machine_mode field_mode = VOIDmode;
int field_count = rs6000_aggregate_candidate (type, &field_mode);
if (field_count > 0)
@@ -8916,7 +9211,7 @@ rs6000_return_in_msb (const_tree valtype)
static bool
call_ABI_of_interest (tree fndecl)
{
- if (cgraph_state == CGRAPH_STATE_EXPANSION)
+ if (symtab->state == EXPANSION)
{
struct cgraph_node *c_node;
@@ -8929,9 +9224,9 @@ call_ABI_of_interest (tree fndecl)
return true;
/* Interesting functions that we are emitting in this object file. */
- c_node = cgraph_get_node (fndecl);
- c_node = cgraph_function_or_thunk_node (c_node, NULL);
- return !cgraph_only_called_directly_p (c_node);
+ c_node = cgraph_node::get (fndecl);
+ c_node = c_node->ultimate_alias_target ();
+ return !c_node->only_called_directly_p ();
}
return false;
}
@@ -8949,7 +9244,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
rtx libname ATTRIBUTE_UNUSED, int incoming,
int libcall, int n_named_args,
tree fndecl ATTRIBUTE_UNUSED,
- enum machine_mode return_mode ATTRIBUTE_UNUSED)
+ machine_mode return_mode ATTRIBUTE_UNUSED)
{
static CUMULATIVE_ARGS zero_cumulative;
@@ -9040,10 +9335,33 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
}
}
+/* The mode the ABI uses for a word. This is not the same as word_mode
+ for -m32 -mpowerpc64. This is used to implement various target hooks. */
+
+static machine_mode
+rs6000_abi_word_mode (void)
+{
+ return TARGET_32BIT ? SImode : DImode;
+}
+
+/* On rs6000, function arguments are promoted, as are function return
+ values. */
+
+static machine_mode
+rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+ machine_mode mode,
+ int *punsignedp ATTRIBUTE_UNUSED,
+ const_tree, int)
+{
+ PROMOTE_MODE (mode, *punsignedp, type);
+
+ return mode;
+}
+
/* Return true if TYPE must be passed on the stack and not in registers. */
static bool
-rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
+rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
{
if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
return must_pass_in_stack_var_size (mode, type);
@@ -9061,7 +9379,7 @@ rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
argument slot. */
enum direction
-function_arg_padding (enum machine_mode mode, const_tree type)
+function_arg_padding (machine_mode mode, const_tree type)
{
#ifndef AGGREGATE_PADDING_FIXED
#define AGGREGATE_PADDING_FIXED 0
@@ -9124,9 +9442,9 @@ function_arg_padding (enum machine_mode mode, const_tree type)
Quadword align large synthetic vector types. */
static unsigned int
-rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
+rs6000_function_arg_boundary (machine_mode mode, const_tree type)
{
- enum machine_mode elt_mode;
+ machine_mode elt_mode;
int n_elts;
rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
@@ -9146,14 +9464,48 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
|| (type && TREE_CODE (type) == VECTOR_TYPE
&& int_size_in_bytes (type) >= 16))
return 128;
- else if (((TARGET_MACHO && rs6000_darwin64_abi)
- || DEFAULT_ABI == ABI_ELFv2
- || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
- && mode == BLKmode
- && type && TYPE_ALIGN (type) > 64)
+
+ /* Aggregate types that need > 8 byte alignment are quadword-aligned
+ in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
+ -mcompat-align-parm is used. */
+ if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
+ || DEFAULT_ABI == ABI_ELFv2)
+ && type && TYPE_ALIGN (type) > 64)
+ {
+ /* "Aggregate" means any AGGREGATE_TYPE except for single-element
+ or homogeneous float/vector aggregates here. We already handled
+ vector aggregates above, but still need to check for float here. */
+ bool aggregate_p = (AGGREGATE_TYPE_P (type)
+ && !SCALAR_FLOAT_MODE_P (elt_mode));
+
+ /* We used to check for BLKmode instead of the above aggregate type
+ check. Warn when this results in any difference to the ABI. */
+ if (aggregate_p != (mode == BLKmode))
+ {
+ static bool warned;
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "the ABI of passing aggregates with %d-byte alignment"
+ " has changed in GCC 5",
+ (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
+ }
+ }
+
+ if (aggregate_p)
+ return 128;
+ }
+
+ /* Similar for the Darwin64 ABI. Note that for historical reasons we
+ implement the "aggregate type" check as a BLKmode check here; this
+ means certain aggregate types are in fact not aligned. */
+ if (TARGET_MACHO && rs6000_darwin64_abi
+ && mode == BLKmode
+ && type && TYPE_ALIGN (type) > 64)
return 128;
- else
- return PARM_BOUNDARY;
+
+ return PARM_BOUNDARY;
}
/* The offset in words to the start of the parameter save area. */
@@ -9170,7 +9522,7 @@ rs6000_parm_offset (void)
the parameter area. NWORDS of the parameter area are already used. */
static unsigned int
-rs6000_parm_start (enum machine_mode mode, const_tree type,
+rs6000_parm_start (machine_mode mode, const_tree type,
unsigned int nwords)
{
unsigned int align;
@@ -9182,7 +9534,7 @@ rs6000_parm_start (enum machine_mode mode, const_tree type,
/* Compute the size (in words) of a function argument. */
static unsigned long
-rs6000_arg_size (enum machine_mode mode, const_tree type)
+rs6000_arg_size (machine_mode mode, const_tree type)
{
unsigned long size;
@@ -9205,7 +9557,7 @@ rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
{
unsigned int startbit, endbit;
int intregs, intoffset;
- enum machine_mode mode;
+ machine_mode mode;
/* Handle the situations where a float is taking up the first half
of the GPR, and the other half is empty (typically due to
@@ -9270,7 +9622,7 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
{
HOST_WIDE_INT bitpos = startbitpos;
tree ftype = TREE_TYPE (f);
- enum machine_mode mode;
+ machine_mode mode;
if (ftype == error_mark_node)
continue;
mode = TYPE_MODE (ftype);
@@ -9339,7 +9691,7 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
bit ABI. These are record types where the mode is BLK or the structure is
8 bytes in size. */
static int
-rs6000_darwin64_struct_check_p (enum machine_mode mode, const_tree type)
+rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
{
return rs6000_darwin64_abi
&& ((mode == BLKmode
@@ -9358,10 +9710,10 @@ rs6000_darwin64_struct_check_p (enum machine_mode mode, const_tree type)
itself. */
static void
-rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
const_tree type, bool named, int depth)
{
- enum machine_mode elt_mode;
+ machine_mode elt_mode;
int n_elts;
rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
@@ -9571,7 +9923,7 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
}
static void
-rs6000_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
+rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
const_tree type, bool named)
{
rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
@@ -9579,7 +9931,7 @@ rs6000_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
}
static rtx
-spe_build_register_parallel (enum machine_mode mode, int gregno)
+spe_build_register_parallel (machine_mode mode, int gregno)
{
rtx r1, r3, r5, r7;
@@ -9616,7 +9968,7 @@ spe_build_register_parallel (enum machine_mode mode, int gregno)
/* Determine where to put a SIMD argument on the SPE. */
static rtx
-rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
const_tree type)
{
int gregno = cum->sysv_gregno;
@@ -9649,7 +10001,7 @@ rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
{
rtx r1, r2;
- enum machine_mode m = SImode;
+ machine_mode m = SImode;
r1 = gen_rtx_REG (m, gregno);
r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
@@ -9676,7 +10028,7 @@ static void
rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
HOST_WIDE_INT bitpos, rtx rvec[], int *k)
{
- enum machine_mode mode;
+ machine_mode mode;
unsigned int regno;
unsigned int startbit, endbit;
int this_regno, intregs, intoffset;
@@ -9752,7 +10104,7 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
{
HOST_WIDE_INT bitpos = startbitpos;
tree ftype = TREE_TYPE (f);
- enum machine_mode mode;
+ machine_mode mode;
if (ftype == error_mark_node)
continue;
mode = TYPE_MODE (ftype);
@@ -9866,7 +10218,7 @@ rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
/* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
static rtx
-rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
+rs6000_mixed_function_arg (machine_mode mode, const_tree type,
int align_words)
{
int n_units;
@@ -9916,7 +10268,7 @@ rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
to the GPRs and/or memory. Return the number of elements used. */
static int
-rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
+rs6000_psave_function_arg (machine_mode mode, const_tree type,
int align_words, rtx *rvec)
{
int k = 0;
@@ -9931,7 +10283,7 @@ rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
{
/* If this is partially on the stack, then we only
include the portion actually in registers here. */
- enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
+ machine_mode rmode = TARGET_32BIT ? SImode : DImode;
int i = 0;
if (align_words + n_words > GP_ARG_NUM_REG)
@@ -9971,7 +10323,7 @@ rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
Construct the final function_arg return value from it. */
static rtx
-rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
+rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
{
gcc_assert (k >= 1);
@@ -10017,12 +10369,12 @@ rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
itself. */
static rtx
-rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
+rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
const_tree type, bool named)
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
enum rs6000_abi abi = DEFAULT_ABI;
- enum machine_mode elt_mode;
+ machine_mode elt_mode;
int n_elts;
/* Return a marker to indicate whether CR1 needs to set or clear the
@@ -10096,7 +10448,7 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
/* Vector parameters to varargs functions under AIX or Darwin
get passed in memory and possibly also in GPRs. */
int align, align_words, n_words;
- enum machine_mode part_mode;
+ machine_mode part_mode;
/* Vector parameters must be 16-byte aligned. In 32-bit
mode this means we need to take into account the offset
@@ -10191,6 +10543,7 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
rtx r, off;
int i, k = 0;
unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+ int fpr_words;
/* Do we also need to pass this argument in the parameter
save area? */
@@ -10207,7 +10560,7 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
/* Check if the argument is split over registers and memory.
This can only ever happen for long double or _Decimal128;
complex types are handled via split_complex_arg. */
- enum machine_mode fmode = elt_mode;
+ machine_mode fmode = elt_mode;
if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
{
gcc_assert (fmode == TFmode || fmode == TDmode);
@@ -10219,6 +10572,47 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
}
+ /* If there were not enough FPRs to hold the argument, the rest
+ usually goes into memory. However, if the current position
+ is still within the register parameter area, a portion may
+ actually have to go into GPRs.
+
+ Note that it may happen that the portion of the argument
+ passed in the first "half" of the first GPR was already
+ passed in the last FPR as well.
+
+ For unnamed arguments, we already set up GPRs to cover the
+ whole argument in rs6000_psave_function_arg, so there is
+ nothing further to do at this point. */
+ fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
+ if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
+ && cum->nargs_prototype > 0)
+ {
+ static bool warned;
+
+ machine_mode rmode = TARGET_32BIT ? SImode : DImode;
+ int n_words = rs6000_arg_size (mode, type);
+
+ align_words += fpr_words;
+ n_words -= fpr_words;
+
+ do
+ {
+ r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
+ off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+ }
+ while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
+
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "the ABI of passing homogeneous float aggregates"
+ " has changed in GCC 5");
+ }
+ }
+
return rs6000_finish_function_arg (mode, rvec, k);
}
else if (align_words < GP_ARG_NUM_REG)
@@ -10240,14 +10634,14 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
returns the number of bytes used by the first element of the PARALLEL. */
static int
-rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
tree type, bool named)
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
bool passed_in_gprs = true;
int ret = 0;
int align_words;
- enum machine_mode elt_mode;
+ machine_mode elt_mode;
int n_elts;
rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
@@ -10294,8 +10688,23 @@ rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
/* Otherwise, we pass in FPRs only. Check for partial copies. */
passed_in_gprs = false;
if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
- ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
- * MIN (8, GET_MODE_SIZE (elt_mode)));
+ {
+ /* Compute number of bytes / words passed in FPRs. If there
+ is still space available in the register parameter area
+ *after* that amount, a part of the argument will be passed
+ in GPRs. In that case, the total amount passed in any
+ registers is equal to the amount that would have been passed
+ in GPRs if everything were passed there, so we fall back to
+ the GPR code below to compute the appropriate value. */
+ int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
+ * MIN (8, GET_MODE_SIZE (elt_mode)));
+ int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
+
+ if (align_words + fpr_words < GP_ARG_NUM_REG)
+ passed_in_gprs = true;
+ else
+ ret = fpr;
+ }
}
if (passed_in_gprs
@@ -10325,7 +10734,7 @@ rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
static bool
rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
- enum machine_mode mode, const_tree type,
+ machine_mode mode, const_tree type,
bool named ATTRIBUTE_UNUSED)
{
if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
@@ -10387,7 +10796,7 @@ rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
static bool
rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
{
- enum machine_mode mode;
+ machine_mode mode;
int unsignedp;
rtx entry_parm;
@@ -10445,35 +10854,65 @@ rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
list, or passes any parameter in memory. */
static bool
-rs6000_function_parms_need_stack (tree fun)
+rs6000_function_parms_need_stack (tree fun, bool incoming)
{
- function_args_iterator args_iter;
- tree arg_type;
+ tree fntype, result;
CUMULATIVE_ARGS args_so_far_v;
cumulative_args_t args_so_far;
if (!fun)
/* Must be a libcall, all of which only use reg parms. */
return false;
+
+ fntype = fun;
if (!TYPE_P (fun))
- fun = TREE_TYPE (fun);
+ fntype = TREE_TYPE (fun);
/* Varargs functions need the parameter save area. */
- if (!prototype_p (fun) || stdarg_p (fun))
+ if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
return true;
- INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
+ INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
args_so_far = pack_cumulative_args (&args_so_far_v);
- if (aggregate_value_p (TREE_TYPE (fun), fun))
+ /* When incoming, we will have been passed the function decl.
+ It is necessary to use the decl to handle K&R style functions,
+ where TYPE_ARG_TYPES may not be available. */
+ if (incoming)
{
- tree type = build_pointer_type (TREE_TYPE (fun));
- rs6000_parm_needs_stack (args_so_far, type);
+ gcc_assert (DECL_P (fun));
+ result = DECL_RESULT (fun);
}
+ else
+ result = TREE_TYPE (fntype);
- FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
- if (rs6000_parm_needs_stack (args_so_far, arg_type))
- return true;
+ if (result && aggregate_value_p (result, fntype))
+ {
+ if (!TYPE_P (result))
+ result = TREE_TYPE (result);
+ result = build_pointer_type (result);
+ rs6000_parm_needs_stack (args_so_far, result);
+ }
+
+ if (incoming)
+ {
+ tree parm;
+
+ for (parm = DECL_ARGUMENTS (fun);
+ parm && parm != void_list_node;
+ parm = TREE_CHAIN (parm))
+ if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
+ return true;
+ }
+ else
+ {
+ function_args_iterator args_iter;
+ tree arg_type;
+
+ FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
+ if (rs6000_parm_needs_stack (args_so_far, arg_type))
+ return true;
+ }
return false;
}
@@ -10485,7 +10924,7 @@ rs6000_function_parms_need_stack (tree fun)
all parameters in registers. */
int
-rs6000_reg_parm_stack_space (tree fun)
+rs6000_reg_parm_stack_space (tree fun, bool incoming)
{
int reg_parm_stack_space;
@@ -10503,7 +10942,7 @@ rs6000_reg_parm_stack_space (tree fun)
case ABI_ELFv2:
/* ??? Recomputing this every time is a bit expensive. Is there
a place to cache this information? */
- if (rs6000_function_parms_need_stack (fun))
+ if (rs6000_function_parms_need_stack (fun, incoming))
reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
else
reg_parm_stack_space = 0;
@@ -10517,7 +10956,7 @@ static void
rs6000_move_block_from_reg (int regno, rtx x, int nregs)
{
int i;
- enum machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
+ machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
if (nregs == 0)
return;
@@ -10557,7 +10996,7 @@ rs6000_move_block_from_reg (int regno, rtx x, int nregs)
stack and set PRETEND_SIZE to the length of the registers pushed. */
static void
-setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
+setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
tree type, int *pretend_size ATTRIBUTE_UNUSED,
int no_rtl)
{
@@ -10838,7 +11277,7 @@ rs6000_va_start (tree valist, rtx nextarg)
/* Find the overflow area. */
t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
if (words != 0)
- t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
+ t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
@@ -10930,7 +11369,7 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
{
tree elem_type = TREE_TYPE (type);
- enum machine_mode elem_mode = TYPE_MODE (elem_type);
+ machine_mode elem_mode = TYPE_MODE (elem_type);
int elem_size = GET_MODE_SIZE (elem_mode);
if (elem_size < UNITS_PER_WORD)
@@ -11054,7 +11493,7 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
/* _Decimal32 varargs are located in the second word of the 64-bit
FP register for 32-bit binaries. */
- if (!TARGET_POWERPC64
+ if (TARGET_32BIT
&& TARGET_HARD_FLOAT && TARGET_FPRS
&& TYPE_MODE (type) == SDmode)
t = fold_build_pointer_plus_hwi (t, size);
@@ -11530,7 +11969,7 @@ static rtx
rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
{
rtx pat;
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
if (icode == CODE_FOR_nothing)
/* Builtin not supported on this processor. */
@@ -11558,8 +11997,8 @@ rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
tree arg1 = CALL_EXPR_ARG (exp, 1);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[icode].operand[1].mode;
+ machine_mode mode0 = insn_data[icode].operand[0].mode;
+ machine_mode mode1 = insn_data[icode].operand[1].mode;
if (icode == CODE_FOR_nothing)
/* Builtin not supported on this processor. */
@@ -11598,8 +12037,8 @@ rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
rtx pat;
tree arg0 = CALL_EXPR_ARG (exp, 0);
rtx op0 = expand_normal (arg0);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
if (icode == CODE_FOR_nothing)
/* Builtin not supported on this processor. */
@@ -11647,8 +12086,8 @@ altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
rtx pat, scratch1, scratch2;
tree arg0 = CALL_EXPR_ARG (exp, 0);
rtx op0 = expand_normal (arg0);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
/* If we have invalid arguments, bail out before generating bad rtl. */
if (arg0 == error_mark_node)
@@ -11681,9 +12120,9 @@ rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
tree arg1 = CALL_EXPR_ARG (exp, 1);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = insn_data[icode].operand[2].mode;
if (icode == CODE_FOR_nothing)
/* Builtin not supported on this processor. */
@@ -11755,9 +12194,9 @@ altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
tree arg1 = CALL_EXPR_ARG (exp, 2);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
- enum machine_mode tmode = SImode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+ machine_mode tmode = SImode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = insn_data[icode].operand[2].mode;
int cr6_form_int;
if (TREE_CODE (cr6_form) != INTEGER_CST)
@@ -11827,9 +12266,9 @@ paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
rtx pat, addr;
tree arg0 = CALL_EXPR_ARG (exp, 0);
tree arg1 = CALL_EXPR_ARG (exp, 1);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = Pmode;
- enum machine_mode mode1 = Pmode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode0 = Pmode;
+ machine_mode mode1 = Pmode;
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
@@ -11870,7 +12309,7 @@ paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
/* Return a constant vector for use as a little-endian permute control vector
to reverse the order of elements of the given vector mode. */
static rtx
-swap_selector_for_mode (enum machine_mode mode)
+swap_selector_for_mode (machine_mode mode)
{
/* These are little endian vectors, so their elements are reversed
from what you would normally expect for a permute control vector. */
@@ -11911,7 +12350,7 @@ swap_selector_for_mode (enum machine_mode mode)
with -maltivec=be specified. Issue the load followed by an element-reversing
permute. */
void
-altivec_expand_lvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
{
rtx tmp = gen_reg_rtx (mode);
rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
@@ -11929,7 +12368,7 @@ altivec_expand_lvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec
with -maltivec=be specified. Issue the store preceded by an element-reversing
permute. */
void
-altivec_expand_stvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
{
rtx tmp = gen_reg_rtx (mode);
rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
@@ -11947,9 +12386,9 @@ altivec_expand_stvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspe
/* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
specified. Issue the store preceded by an element-reversing permute. */
void
-altivec_expand_stvex_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
{
- enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ machine_mode inner_mode = GET_MODE_INNER (mode);
rtx tmp = gen_reg_rtx (mode);
rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
rtx sel = swap_selector_for_mode (mode);
@@ -11967,9 +12406,9 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
rtx pat, addr;
tree arg0 = CALL_EXPR_ARG (exp, 0);
tree arg1 = CALL_EXPR_ARG (exp, 1);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = Pmode;
- enum machine_mode mode1 = Pmode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode0 = Pmode;
+ machine_mode mode1 = Pmode;
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
@@ -12017,9 +12456,9 @@ spe_expand_stv_builtin (enum insn_code icode, tree exp)
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
rtx pat;
- enum machine_mode mode0 = insn_data[icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[icode].operand[1].mode;
- enum machine_mode mode2 = insn_data[icode].operand[2].mode;
+ machine_mode mode0 = insn_data[icode].operand[0].mode;
+ machine_mode mode1 = insn_data[icode].operand[1].mode;
+ machine_mode mode2 = insn_data[icode].operand[2].mode;
/* Invalid arguments. Bail before doing anything stoopid! */
if (arg0 == error_mark_node
@@ -12050,9 +12489,9 @@ paired_expand_stv_builtin (enum insn_code icode, tree exp)
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
rtx pat, addr;
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode1 = Pmode;
- enum machine_mode mode2 = Pmode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode1 = Pmode;
+ machine_mode mode2 = Pmode;
/* Invalid arguments. Bail before doing anything stoopid! */
if (arg0 == error_mark_node
@@ -12091,10 +12530,10 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
rtx pat, addr;
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode smode = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = Pmode;
- enum machine_mode mode2 = Pmode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode smode = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = Pmode;
+ machine_mode mode2 = Pmode;
/* Invalid arguments. Bail before doing anything stoopid! */
if (arg0 == error_mark_node
@@ -12199,7 +12638,7 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp)
if (nonvoid)
{
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
if (!target
|| GET_MODE (target) != tmode
|| !(*insn_data[icode].operand[0].predicate) (target, tmode))
@@ -12321,10 +12760,10 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
- enum machine_mode mode2 = insn_data[icode].operand[3].mode;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = insn_data[icode].operand[2].mode;
+ machine_mode mode2 = insn_data[icode].operand[3].mode;
if (icode == CODE_FOR_nothing)
/* Builtin not supported on this processor. */
@@ -12376,7 +12815,15 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
}
}
else if (icode == CODE_FOR_vsx_set_v2df
- || icode == CODE_FOR_vsx_set_v2di)
+ || icode == CODE_FOR_vsx_set_v2di
+ || icode == CODE_FOR_bcdadd
+ || icode == CODE_FOR_bcdadd_lt
+ || icode == CODE_FOR_bcdadd_eq
+ || icode == CODE_FOR_bcdadd_gt
+ || icode == CODE_FOR_bcdsub
+ || icode == CODE_FOR_bcdsub_lt
+ || icode == CODE_FOR_bcdsub_eq
+ || icode == CODE_FOR_bcdsub_gt)
{
/* Only allow 1-bit unsigned literals. */
STRIP_NOPS (arg2);
@@ -12387,22 +12834,58 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
return const0_rtx;
}
}
+ else if (icode == CODE_FOR_dfp_ddedpd_dd
+ || icode == CODE_FOR_dfp_ddedpd_td)
+ {
+ /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
+ STRIP_NOPS (arg0);
+ if (TREE_CODE (arg0) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg2) & ~0x3)
+ {
+ error ("argument 1 must be 0 or 2");
+ return const0_rtx;
+ }
+ }
+ else if (icode == CODE_FOR_dfp_denbcd_dd
+ || icode == CODE_FOR_dfp_denbcd_td)
+ {
+ /* Only allow 1-bit unsigned literals. */
+ STRIP_NOPS (arg0);
+ if (TREE_CODE (arg0) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg0) & ~0x1)
+ {
+ error ("argument 1 must be a 1-bit unsigned literal");
+ return const0_rtx;
+ }
+ }
+ else if (icode == CODE_FOR_dfp_dscli_dd
+ || icode == CODE_FOR_dfp_dscli_td
+ || icode == CODE_FOR_dfp_dscri_dd
+ || icode == CODE_FOR_dfp_dscri_td)
+ {
+ /* Only allow 6-bit unsigned literals. */
+ STRIP_NOPS (arg1);
+ if (TREE_CODE (arg1) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg1) & ~0x3f)
+ {
+ error ("argument 2 must be a 6-bit unsigned literal");
+ return const0_rtx;
+ }
+ }
else if (icode == CODE_FOR_crypto_vshasigmaw
|| icode == CODE_FOR_crypto_vshasigmad)
{
/* Check whether the 2nd and 3rd arguments are integer constants and in
range and prepare arguments. */
STRIP_NOPS (arg1);
- if (TREE_CODE (arg1) != INTEGER_CST
- || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
+ if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
{
error ("argument 2 must be 0 or 1");
return const0_rtx;
}
STRIP_NOPS (arg2);
- if (TREE_CODE (arg2) != INTEGER_CST
- || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
+ if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
{
error ("argument 3 must be in the range 0..15");
return const0_rtx;
@@ -12439,7 +12922,7 @@ altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
tree arg0;
- enum machine_mode tmode, mode0;
+ machine_mode tmode, mode0;
rtx pat, op0;
enum insn_code icode;
@@ -12500,7 +12983,7 @@ altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
tree arg0, arg1;
- enum machine_mode mode0, mode1;
+ machine_mode mode0, mode1;
rtx pat, op0, op1;
enum insn_code icode;
@@ -12559,7 +13042,7 @@ altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
tree arg0, arg1, arg2;
- enum machine_mode mode0, mode1;
+ machine_mode mode0, mode1;
rtx pat, op0, op1, op2;
const struct builtin_description *d;
size_t i;
@@ -12614,8 +13097,8 @@ altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
static rtx
altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
{
- enum machine_mode tmode = TYPE_MODE (type);
- enum machine_mode inner_mode = GET_MODE_INNER (tmode);
+ machine_mode tmode = TYPE_MODE (type);
+ machine_mode inner_mode = GET_MODE_INNER (tmode);
int i, n_elt = GET_MODE_NUNITS (tmode);
gcc_assert (VECTOR_MODE_P (tmode));
@@ -12669,7 +13152,7 @@ get_element_number (tree vec_type, tree arg)
static rtx
altivec_expand_vec_set_builtin (tree exp)
{
- enum machine_mode tmode, mode1;
+ machine_mode tmode, mode1;
tree arg0, arg1, arg2;
int elt;
rtx op0, op1;
@@ -12701,7 +13184,7 @@ altivec_expand_vec_set_builtin (tree exp)
static rtx
altivec_expand_vec_ext_builtin (tree exp, rtx target)
{
- enum machine_mode tmode, mode0;
+ machine_mode tmode, mode0;
tree arg0, arg1;
int elt;
rtx op0;
@@ -12737,7 +13220,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
tree arg0;
rtx op0, pat;
- enum machine_mode tmode, mode0;
+ machine_mode tmode, mode0;
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
@@ -13100,7 +13583,7 @@ spe_expand_builtin (tree exp, rtx target, bool *expandedp)
tree arg1, arg0;
enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
enum insn_code icode;
- enum machine_mode tmode, mode0;
+ machine_mode tmode, mode0;
rtx pat, op0;
const struct builtin_description *d;
size_t i;
@@ -13234,8 +13717,8 @@ paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
tree arg1 = CALL_EXPR_ARG (exp, 2);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = insn_data[icode].operand[2].mode;
int form_int;
enum rtx_code code;
@@ -13306,8 +13789,8 @@ spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
tree arg1 = CALL_EXPR_ARG (exp, 2);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = insn_data[icode].operand[2].mode;
int form_int;
enum rtx_code code;
@@ -13417,8 +13900,8 @@ spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
rtx op3 = expand_normal (arg3);
- enum machine_mode mode0 = insn_data[icode].operand[1].mode;
- enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = insn_data[icode].operand[2].mode;
gcc_assert (mode0 == mode1);
@@ -13478,6 +13961,20 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode)
error ("Builtin function %s requires the -mpaired option", name);
else if ((fnmask & RS6000_BTM_SPE) != 0)
error ("Builtin function %s requires the -mspe option", name);
+ else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
+ == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
+ error ("Builtin function %s requires the -mhard-dfp and"
+ " -mpower8-vector options", name);
+ else if ((fnmask & RS6000_BTM_DFP) != 0)
+ error ("Builtin function %s requires the -mhard-dfp option", name);
+ else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
+ error ("Builtin function %s requires the -mpower8-vector option", name);
+ else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
+ == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
+ error ("Builtin function %s requires the -mhard-float and"
+ " -mlong-double-128 options", name);
+ else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
+ error ("Builtin function %s requires the -mhard-float option", name);
else
error ("Builtin function %s is not supported with the current options",
name);
@@ -13491,7 +13988,7 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode)
static rtx
rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
+ machine_mode mode ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
@@ -13582,10 +14079,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
case ALTIVEC_BUILTIN_MASK_FOR_STORE:
{
- int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
- : (int) CODE_FOR_altivec_lvsl);
- enum machine_mode tmode = insn_data[icode].operand[0].mode;
- enum machine_mode mode = insn_data[icode].operand[1].mode;
+ int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode = insn_data[icode].operand[1].mode;
tree arg;
rtx op, addr, pat;
@@ -13611,7 +14108,6 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
- /*pat = gen_altivec_lvsr (target, op);*/
pat = GEN_FCN (icode) (target, op);
if (!pat)
return 0;
@@ -13666,7 +14162,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return ret;
}
- gcc_assert (TARGET_ALTIVEC || TARGET_VSX || TARGET_SPE || TARGET_PAIRED_FLOAT);
+ unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY);
/* Handle simple unary operations. */
d = bdesc_1arg;
@@ -13694,7 +14193,7 @@ rs6000_init_builtins (void)
{
tree tdecl;
tree ftype;
- enum machine_mode mode;
+ machine_mode mode;
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
@@ -13757,6 +14256,9 @@ rs6000_init_builtins (void)
uintTI_type_internal_node = unsigned_intTI_type_node;
float_type_internal_node = float_type_node;
double_type_internal_node = double_type_node;
+ long_double_type_internal_node = long_double_type_node;
+ dfloat64_type_internal_node = dfloat64_type_node;
+ dfloat128_type_internal_node = dfloat128_type_node;
void_type_internal_node = void_type_node;
/* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -13771,6 +14273,9 @@ rs6000_init_builtins (void)
builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
builtin_mode_to_type[SFmode][0] = float_type_node;
builtin_mode_to_type[DFmode][0] = double_type_node;
+ builtin_mode_to_type[TFmode][0] = long_double_type_node;
+ builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
+ builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
@@ -14502,7 +15007,7 @@ altivec_init_builtins (void)
d = bdesc_altivec_preds;
for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
{
- enum machine_mode mode1;
+ machine_mode mode1;
tree type;
if (rs6000_overloaded_builtin_p (d->code))
@@ -14544,7 +15049,7 @@ altivec_init_builtins (void)
d = bdesc_abs;
for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
{
- enum machine_mode mode0;
+ machine_mode mode0;
tree type;
mode0 = insn_data[d->icode].operand[0].mode;
@@ -14789,13 +15294,11 @@ htm_init_builtins (void)
/* Hash function for builtin functions with up to 3 arguments and a return
type. */
-static unsigned
-builtin_hash_function (const void *hash_entry)
+hashval_t
+builtin_hasher::hash (builtin_hash_struct *bh)
{
unsigned ret = 0;
int i;
- const struct builtin_hash_struct *bh =
- (const struct builtin_hash_struct *) hash_entry;
for (i = 0; i < 4; i++)
{
@@ -14807,12 +15310,9 @@ builtin_hash_function (const void *hash_entry)
}
/* Compare builtin hash entries H1 and H2 for equivalence. */
-static int
-builtin_hash_eq (const void *h1, const void *h2)
+bool
+builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
{
- const struct builtin_hash_struct *p1 = (const struct builtin_hash_struct *) h1;
- const struct builtin_hash_struct *p2 = (const struct builtin_hash_struct *) h2;
-
return ((p1->mode[0] == p2->mode[0])
&& (p1->mode[1] == p2->mode[1])
&& (p1->mode[2] == p2->mode[2])
@@ -14827,13 +15327,12 @@ builtin_hash_eq (const void *h1, const void *h2)
arguments. Functions with fewer than 3 arguments use VOIDmode as the type
of the argument. */
static tree
-builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
- enum machine_mode mode_arg1, enum machine_mode mode_arg2,
+builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
+ machine_mode mode_arg1, machine_mode mode_arg2,
enum rs6000_builtins builtin, const char *name)
{
struct builtin_hash_struct h;
struct builtin_hash_struct *h2;
- void **found;
int num_args = 3;
int i;
tree ret_type = NULL_TREE;
@@ -14841,8 +15340,7 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
/* Create builtin_hash_table. */
if (builtin_hash_table == NULL)
- builtin_hash_table = htab_create_ggc (1500, builtin_hash_function,
- builtin_hash_eq, NULL);
+ builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
h.type = NULL_TREE;
h.mode[0] = mode_ret;
@@ -14863,6 +15361,8 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
/* unsigned 1 argument functions. */
case CRYPTO_BUILTIN_VSBOX:
case P8V_BUILTIN_VGBBD:
+ case MISC_BUILTIN_CDTBCD:
+ case MISC_BUILTIN_CBCDTD:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
break;
@@ -14881,6 +15381,11 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
case CRYPTO_BUILTIN_VPMSUMW:
case CRYPTO_BUILTIN_VPMSUMD:
case CRYPTO_BUILTIN_VPMSUM:
+ case MISC_BUILTIN_ADDG6S:
+ case MISC_BUILTIN_DIVWEU:
+ case MISC_BUILTIN_DIVWEUO:
+ case MISC_BUILTIN_DIVDEU:
+ case MISC_BUILTIN_DIVDEUO:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
h.uns_p[2] = 1;
@@ -14942,9 +15447,18 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
/* signed args, unsigned return. */
case VSX_BUILTIN_XVCVDPUXDS_UNS:
case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
+ case MISC_BUILTIN_UNPACK_TD:
+ case MISC_BUILTIN_UNPACK_V1TI:
h.uns_p[0] = 1;
break;
+ /* unsigned arguments for 128-bit pack instructions. */
+ case MISC_BUILTIN_PACK_TD:
+ case MISC_BUILTIN_PACK_V1TI:
+ h.uns_p[1] = 1;
+ h.uns_p[2] = 1;
+ break;
+
default:
break;
}
@@ -14982,18 +15496,18 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
GET_MODE_NAME (m));
}
- found = htab_find_slot (builtin_hash_table, &h, INSERT);
+ builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
if (*found == NULL)
{
- h2 = ggc_alloc_builtin_hash_struct ();
+ h2 = ggc_alloc<builtin_hash_struct> ();
*h2 = h;
- *found = (void *)h2;
+ *found = h2;
h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
arg_type[2], NULL_TREE);
}
- return ((struct builtin_hash_struct *)(*found))->type;
+ return (*found)->type;
}
static void
@@ -15084,7 +15598,7 @@ rs6000_common_init_builtins (void)
d = bdesc_2arg;
for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
{
- enum machine_mode mode0, mode1, mode2;
+ machine_mode mode0, mode1, mode2;
tree type;
HOST_WIDE_INT mask = d->mask;
@@ -15162,7 +15676,7 @@ rs6000_common_init_builtins (void)
d = bdesc_1arg;
for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
{
- enum machine_mode mode0, mode1;
+ machine_mode mode0, mode1;
tree type;
HOST_WIDE_INT mask = d->mask;
@@ -15335,7 +15849,7 @@ expand_block_clear (rtx operands[])
load zero and three to do clearing. */
if (TARGET_ALTIVEC && align >= 128)
clear_step = 16;
- else if (TARGET_POWERPC64 && align >= 32)
+ else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
clear_step = 8;
else if (TARGET_SPE && align >= 64)
clear_step = 8;
@@ -15349,7 +15863,7 @@ expand_block_clear (rtx operands[])
for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
{
- enum machine_mode mode = BLKmode;
+ machine_mode mode = BLKmode;
rtx dest;
if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
@@ -15363,12 +15877,27 @@ expand_block_clear (rtx operands[])
mode = V2SImode;
}
else if (bytes >= 8 && TARGET_POWERPC64
- /* 64-bit loads and stores require word-aligned
- displacements. */
- && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
+ && (align >= 64 || !STRICT_ALIGNMENT))
{
clear_bytes = 8;
mode = DImode;
+ if (offset == 0 && align < 64)
+ {
+ rtx addr;
+
+ /* If the address form is reg+offset with offset not a
+ multiple of four, reload into reg indirect form here
+ rather than waiting for reload. This way we get one
+ reload, not one per store. */
+ addr = XEXP (orig_dest, 0);
+ if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
+ && GET_CODE (XEXP (addr, 1)) == CONST_INT
+ && (INTVAL (XEXP (addr, 1)) & 3) != 0)
+ {
+ addr = copy_addr_to_reg (addr);
+ orig_dest = replace_equiv_address (orig_dest, addr);
+ }
+ }
}
else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
{ /* move 4 bytes */
@@ -15442,7 +15971,7 @@ expand_block_move (rtx operands[])
rtx (*movmemsi) (rtx, rtx, rtx, rtx);
rtx (*mov) (rtx, rtx);
} gen_func;
- enum machine_mode mode = BLKmode;
+ machine_mode mode = BLKmode;
rtx src, dest;
/* Altivec first, since it will be faster than a string move
@@ -15496,13 +16025,36 @@ expand_block_move (rtx operands[])
gen_func.movmemsi = gen_movmemsi_4reg;
}
else if (bytes >= 8 && TARGET_POWERPC64
- /* 64-bit loads and stores require word-aligned
- displacements. */
- && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
+ && (align >= 64 || !STRICT_ALIGNMENT))
{
move_bytes = 8;
mode = DImode;
gen_func.mov = gen_movdi;
+ if (offset == 0 && align < 64)
+ {
+ rtx addr;
+
+ /* If the address form is reg+offset with offset not a
+ multiple of four, reload into reg indirect form here
+ rather than waiting for reload. This way we get one
+ reload, not one per load and/or store. */
+ addr = XEXP (orig_dest, 0);
+ if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
+ && GET_CODE (XEXP (addr, 1)) == CONST_INT
+ && (INTVAL (XEXP (addr, 1)) & 3) != 0)
+ {
+ addr = copy_addr_to_reg (addr);
+ orig_dest = replace_equiv_address (orig_dest, addr);
+ }
+ addr = XEXP (orig_src, 0);
+ if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
+ && GET_CODE (XEXP (addr, 1)) == CONST_INT
+ && (INTVAL (XEXP (addr, 1)) & 3) != 0)
+ {
+ addr = copy_addr_to_reg (addr);
+ orig_src = replace_equiv_address (orig_src, addr);
+ }
+ }
}
else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
{ /* move up to 8 bytes at a time */
@@ -15598,8 +16150,7 @@ rs6000_output_load_multiple (rtx operands[3])
return "lwz %2,0(%1)";
for (i = 0; i < words; i++)
- if (refers_to_regno_p (REGNO (operands[2]) + i,
- REGNO (operands[2]) + i + 1, operands[1], 0))
+ if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
{
if (i == words-1)
{
@@ -15643,7 +16194,7 @@ rs6000_output_load_multiple (rtx operands[3])
never be generated. */
void
-validate_condition_mode (enum rtx_code code, enum machine_mode mode)
+validate_condition_mode (enum rtx_code code, machine_mode mode)
{
gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
|| GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
@@ -15710,13 +16261,13 @@ includes_rldic_lshift_p (rtx shiftop, rtx andop)
{
if (GET_CODE (andop) == CONST_INT)
{
- HOST_WIDE_INT c, lsb, shift_mask;
+ unsigned HOST_WIDE_INT c, lsb, shift_mask;
c = INTVAL (andop);
- if (c == 0 || c == ~0)
+ if (c == 0 || c == HOST_WIDE_INT_M1U)
return 0;
- shift_mask = ~0;
+ shift_mask = HOST_WIDE_INT_M1U;
shift_mask <<= INTVAL (shiftop);
/* Find the least significant one bit. */
@@ -15749,9 +16300,9 @@ includes_rldicr_lshift_p (rtx shiftop, rtx andop)
{
if (GET_CODE (andop) == CONST_INT)
{
- HOST_WIDE_INT c, lsb, shift_mask;
+ unsigned HOST_WIDE_INT c, lsb, shift_mask;
- shift_mask = ~0;
+ shift_mask = HOST_WIDE_INT_M1U;
shift_mask <<= INTVAL (shiftop);
c = INTVAL (andop);
@@ -15889,7 +16440,7 @@ mems_ok_for_quad_peep (rtx mem1, rtx mem2)
rtx
-rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
+rs6000_secondary_memory_needed_rtx (machine_mode mode)
{
static bool eliminated = false;
rtx ret;
@@ -15926,8 +16477,8 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
/* Return the mode to be used for memory when a secondary memory
location is needed. For SDmode values we need to use DDmode, in
all other cases we can use the same mode. */
-enum machine_mode
-rs6000_secondary_memory_needed_mode (enum machine_mode mode)
+machine_mode
+rs6000_secondary_memory_needed_mode (machine_mode mode)
{
if (lra_in_progress && mode == SDmode)
return DDmode;
@@ -16001,13 +16552,296 @@ register_to_reg_type (rtx reg, bool *is_altivec)
return reg_class_to_reg_type[(int)rclass];
}
+/* Helper function to return the cost of adding a TOC entry address. */
+
+static inline int
+rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
+{
+ int ret;
+
+ if (TARGET_CMODEL != CMODEL_SMALL)
+ ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
+
+ else
+ ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
+
+ return ret;
+}
+
+/* Helper function for rs6000_secondary_reload to determine whether the memory
+ address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
+ needs reloading. Return negative if the memory is not handled by the memory
+ helper functions and to try a different reload method, 0 if no additional
+ instructions are need, and positive to give the extra cost for the
+ memory. */
+
+static int
+rs6000_secondary_reload_memory (rtx addr,
+ enum reg_class rclass,
+ enum machine_mode mode)
+{
+ int extra_cost = 0;
+ rtx reg, and_arg, plus_arg0, plus_arg1;
+ addr_mask_type addr_mask;
+ const char *type = NULL;
+ const char *fail_msg = NULL;
+
+ if (GPR_REG_CLASS_P (rclass))
+ addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
+
+ else if (rclass == FLOAT_REGS)
+ addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
+
+ else if (rclass == ALTIVEC_REGS)
+ addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
+
+ /* For the combined VSX_REGS, turn off Altivec AND -16. */
+ else if (rclass == VSX_REGS)
+ addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
+ & ~RELOAD_REG_AND_M16);
+
+ else
+ {
+ if (TARGET_DEBUG_ADDR)
+ fprintf (stderr,
+ "rs6000_secondary_reload_memory: mode = %s, class = %s, "
+ "class is not GPR, FPR, VMX\n",
+ GET_MODE_NAME (mode), reg_class_names[rclass]);
+
+ return -1;
+ }
+
+ /* If the register isn't valid in this register class, just return now. */
+ if ((addr_mask & RELOAD_REG_VALID) == 0)
+ {
+ if (TARGET_DEBUG_ADDR)
+ fprintf (stderr,
+ "rs6000_secondary_reload_memory: mode = %s, class = %s, "
+ "not valid in class\n",
+ GET_MODE_NAME (mode), reg_class_names[rclass]);
+
+ return -1;
+ }
+
+ switch (GET_CODE (addr))
+ {
+ /* Does the register class supports auto update forms for this mode? We
+ don't need a scratch register, since the powerpc only supports
+ PRE_INC, PRE_DEC, and PRE_MODIFY. */
+ case PRE_INC:
+ case PRE_DEC:
+ reg = XEXP (addr, 0);
+ if (!base_reg_operand (addr, GET_MODE (reg)))
+ {
+ fail_msg = "no base register #1";
+ extra_cost = -1;
+ }
+
+ else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
+ {
+ extra_cost = 1;
+ type = "update";
+ }
+ break;
+
+ case PRE_MODIFY:
+ reg = XEXP (addr, 0);
+ plus_arg1 = XEXP (addr, 1);
+ if (!base_reg_operand (reg, GET_MODE (reg))
+ || GET_CODE (plus_arg1) != PLUS
+ || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
+ {
+ fail_msg = "bad PRE_MODIFY";
+ extra_cost = -1;
+ }
+
+ else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
+ {
+ extra_cost = 1;
+ type = "update";
+ }
+ break;
+
+ /* Do we need to simulate AND -16 to clear the bottom address bits used
+ in VMX load/stores? Only allow the AND for vector sizes. */
+ case AND:
+ and_arg = XEXP (addr, 0);
+ if (GET_MODE_SIZE (mode) != 16
+ || GET_CODE (XEXP (addr, 1)) != CONST_INT
+ || INTVAL (XEXP (addr, 1)) != -16)
+ {
+ fail_msg = "bad Altivec AND #1";
+ extra_cost = -1;
+ }
+
+ if (rclass != ALTIVEC_REGS)
+ {
+ if (legitimate_indirect_address_p (and_arg, false))
+ extra_cost = 1;
+
+ else if (legitimate_indexed_address_p (and_arg, false))
+ extra_cost = 2;
+
+ else
+ {
+ fail_msg = "bad Altivec AND #2";
+ extra_cost = -1;
+ }
+
+ type = "and";
+ }
+ break;
+
+ /* If this is an indirect address, make sure it is a base register. */
+ case REG:
+ case SUBREG:
+ if (!legitimate_indirect_address_p (addr, false))
+ {
+ extra_cost = 1;
+ type = "move";
+ }
+ break;
+
+ /* If this is an indexed address, make sure the register class can handle
+ indexed addresses for this mode. */
+ case PLUS:
+ plus_arg0 = XEXP (addr, 0);
+ plus_arg1 = XEXP (addr, 1);
+
+ /* (plus (plus (reg) (constant)) (constant)) is generated during
+ push_reload processing, so handle it now. */
+ if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
+ {
+ if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+ {
+ extra_cost = 1;
+ type = "offset";
+ }
+ }
+
+ /* (plus (plus (reg) (constant)) (reg)) is also generated during
+ push_reload processing, so handle it now. */
+ else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
+ {
+ if ((addr_mask & RELOAD_REG_INDEXED) == 0)
+ {
+ extra_cost = 1;
+ type = "indexed #2";
+ }
+ }
+
+ else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
+ {
+ fail_msg = "no base register #2";
+ extra_cost = -1;
+ }
+
+ else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
+ {
+ if ((addr_mask & RELOAD_REG_INDEXED) == 0
+ || !legitimate_indexed_address_p (addr, false))
+ {
+ extra_cost = 1;
+ type = "indexed";
+ }
+ }
+
+ /* Make sure the register class can handle offset addresses. */
+ else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
+ {
+ if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+ {
+ extra_cost = 1;
+ type = "offset";
+ }
+ }
+
+ else
+ {
+ fail_msg = "bad PLUS";
+ extra_cost = -1;
+ }
+
+ break;
+
+ case LO_SUM:
+ if (!legitimate_lo_sum_address_p (mode, addr, false))
+ {
+ fail_msg = "bad LO_SUM";
+ extra_cost = -1;
+ }
+
+ if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+ {
+ extra_cost = 1;
+ type = "lo_sum";
+ }
+ break;
+
+ /* Static addresses need to create a TOC entry. */
+ case CONST:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ type = "address";
+ extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
+ break;
+
+ /* TOC references look like offsetable memory. */
+ case UNSPEC:
+ if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
+ {
+ fail_msg = "bad UNSPEC";
+ extra_cost = -1;
+ }
+
+ else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+ {
+ extra_cost = 1;
+ type = "toc reference";
+ }
+ break;
+
+ default:
+ {
+ fail_msg = "bad address";
+ extra_cost = -1;
+ }
+ }
+
+ if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
+ {
+ if (extra_cost < 0)
+ fprintf (stderr,
+ "rs6000_secondary_reload_memory error: mode = %s, "
+ "class = %s, addr_mask = '%s', %s\n",
+ GET_MODE_NAME (mode),
+ reg_class_names[rclass],
+ rs6000_debug_addr_mask (addr_mask, false),
+ (fail_msg != NULL) ? fail_msg : "<bad address>");
+
+ else
+ fprintf (stderr,
+ "rs6000_secondary_reload_memory: mode = %s, class = %s, "
+ "addr_mask = '%s', extra cost = %d, %s\n",
+ GET_MODE_NAME (mode),
+ reg_class_names[rclass],
+ rs6000_debug_addr_mask (addr_mask, false),
+ extra_cost,
+ (type) ? type : "<none>");
+
+ debug_rtx (addr);
+ }
+
+ return extra_cost;
+}
+
/* Helper function for rs6000_secondary_reload to return true if a move to a
different register classe is really a simple move. */
static bool
rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
enum rs6000_reg_type from_type,
- enum machine_mode mode)
+ machine_mode mode)
{
int size;
@@ -16045,7 +16879,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
static bool
rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
enum rs6000_reg_type from_type,
- enum machine_mode mode,
+ machine_mode mode,
secondary_reload_info *sri,
bool altivec_p)
{
@@ -16150,7 +16984,7 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
static bool
rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
enum rs6000_reg_type from_type,
- enum machine_mode mode,
+ machine_mode mode,
secondary_reload_info *sri,
bool altivec_p)
{
@@ -16200,15 +17034,22 @@ static reg_class_t
rs6000_secondary_reload (bool in_p,
rtx x,
reg_class_t rclass_i,
- enum machine_mode mode,
+ machine_mode mode,
secondary_reload_info *sri)
{
enum reg_class rclass = (enum reg_class) rclass_i;
reg_class_t ret = ALL_REGS;
enum insn_code icode;
bool default_p = false;
+ bool done_p = false;
+
+ /* Allow subreg of memory before/during reload. */
+ bool memory_p = (MEM_P (x)
+ || (!reload_completed && GET_CODE (x) == SUBREG
+ && MEM_P (SUBREG_REG (x))));
sri->icode = CODE_FOR_nothing;
+ sri->extra_cost = 0;
icode = ((in_p)
? reg_addr[mode].reload_load
: reg_addr[mode].reload_store);
@@ -16232,121 +17073,54 @@ rs6000_secondary_reload (bool in_p,
{
icode = (enum insn_code)sri->icode;
default_p = false;
+ done_p = true;
ret = NO_REGS;
}
}
- /* Handle vector moves with reload helper functions. */
- if (ret == ALL_REGS && icode != CODE_FOR_nothing)
+ /* Make sure 0.0 is not reloaded or forced into memory. */
+ if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
{
ret = NO_REGS;
- sri->icode = CODE_FOR_nothing;
- sri->extra_cost = 0;
+ default_p = false;
+ done_p = true;
+ }
- if (GET_CODE (x) == MEM)
- {
- rtx addr = XEXP (x, 0);
+ /* If this is a scalar floating point value and we want to load it into the
+ traditional Altivec registers, do it via a move via a traditional floating
+ point register. Also make sure that non-zero constants use a FPR. */
+ if (!done_p && reg_addr[mode].scalar_in_vmx_p
+ && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
+ && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
+ {
+ ret = FLOAT_REGS;
+ default_p = false;
+ done_p = true;
+ }
- /* Loads to and stores from gprs can do reg+offset, and wouldn't need
- an extra register in that case, but it would need an extra
- register if the addressing is reg+reg or (reg+reg)&(-16). Special
- case load/store quad. */
- if (rclass == GENERAL_REGS || rclass == BASE_REGS)
- {
- if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
- && GET_MODE_SIZE (mode) == 16
- && quad_memory_operand (x, mode))
- {
- sri->icode = icode;
- sri->extra_cost = 2;
- }
+ /* Handle reload of load/stores if we have reload helper functions. */
+ if (!done_p && icode != CODE_FOR_nothing && memory_p)
+ {
+ int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
+ mode);
- else if (!legitimate_indirect_address_p (addr, false)
- && !rs6000_legitimate_offset_address_p (PTImode, addr,
- false, true))
- {
- sri->icode = icode;
- /* account for splitting the loads, and converting the
- address from reg+reg to reg. */
- sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
- + ((GET_CODE (addr) == AND) ? 1 : 0));
- }
- }
- /* Allow scalar loads to/from the traditional floating point
- registers, even if VSX memory is set. */
- else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
- && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
- && (legitimate_indirect_address_p (addr, false)
- || legitimate_indirect_address_p (addr, false)
- || rs6000_legitimate_offset_address_p (mode, addr,
- false, true)))
-
- ;
- /* Loads to and stores from vector registers can only do reg+reg
- addressing. Altivec registers can also do (reg+reg)&(-16). Allow
- scalar modes loading up the traditional floating point registers
- to use offset addresses. */
- else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
- || rclass == FLOAT_REGS || rclass == NO_REGS)
- {
- if (!VECTOR_MEM_ALTIVEC_P (mode)
- && GET_CODE (addr) == AND
- && GET_CODE (XEXP (addr, 1)) == CONST_INT
- && INTVAL (XEXP (addr, 1)) == -16
- && (legitimate_indirect_address_p (XEXP (addr, 0), false)
- || legitimate_indexed_address_p (XEXP (addr, 0), false)))
- {
- sri->icode = icode;
- sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS)
- ? 2 : 1);
- }
- else if (!legitimate_indirect_address_p (addr, false)
- && (rclass == NO_REGS
- || !legitimate_indexed_address_p (addr, false)))
- {
- sri->icode = icode;
- sri->extra_cost = 1;
- }
- else
- icode = CODE_FOR_nothing;
- }
- /* Any other loads, including to pseudo registers which haven't been
- assigned to a register yet, default to require a scratch
- register. */
- else
- {
- sri->icode = icode;
- sri->extra_cost = 2;
- }
- }
- else if (REG_P (x))
+ if (extra_cost >= 0)
{
- int regno = true_regnum (x);
-
- icode = CODE_FOR_nothing;
- if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
- default_p = true;
- else
+ done_p = true;
+ ret = NO_REGS;
+ if (extra_cost > 0)
{
- enum reg_class xclass = REGNO_REG_CLASS (regno);
- enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
- enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
-
- /* If memory is needed, use default_secondary_reload to create the
- stack slot. */
- if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
- default_p = true;
- else
- ret = NO_REGS;
+ sri->extra_cost = extra_cost;
+ sri->icode = icode;
}
}
- else
- default_p = true;
}
- else if (TARGET_POWERPC64
- && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
- && MEM_P (x)
- && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
+
+ /* Handle unaligned loads and stores of integer registers. */
+ if (!done_p && TARGET_POWERPC64
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
+ && memory_p
+ && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
{
rtx addr = XEXP (x, 0);
rtx off = address_offset (addr);
@@ -16369,12 +17143,16 @@ rs6000_secondary_reload (bool in_p,
: (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
&& (offset & 3) != 0))
{
+ /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
if (in_p)
- sri->icode = CODE_FOR_reload_di_load;
+ sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
+ : CODE_FOR_reload_di_load);
else
- sri->icode = CODE_FOR_reload_di_store;
+ sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
+ : CODE_FOR_reload_di_store);
sri->extra_cost = 2;
ret = NO_REGS;
+ done_p = true;
}
else
default_p = true;
@@ -16382,10 +17160,11 @@ rs6000_secondary_reload (bool in_p,
else
default_p = true;
}
- else if (!TARGET_POWERPC64
- && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
- && MEM_P (x)
- && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+
+ if (!done_p && !TARGET_POWERPC64
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
+ && memory_p
+ && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
{
rtx addr = XEXP (x, 0);
rtx off = address_offset (addr);
@@ -16421,6 +17200,7 @@ rs6000_secondary_reload (bool in_p,
sri->icode = CODE_FOR_reload_si_store;
sri->extra_cost = 2;
ret = NO_REGS;
+ done_p = true;
}
else
default_p = true;
@@ -16428,7 +17208,8 @@ rs6000_secondary_reload (bool in_p,
else
default_p = true;
}
- else
+
+ if (!done_p)
default_p = true;
if (default_p)
@@ -16446,15 +17227,20 @@ rs6000_secondary_reload (bool in_p,
reg_class_names[rclass],
GET_MODE_NAME (mode));
+ if (reload_completed)
+ fputs (", after reload", stderr);
+
+ if (!done_p)
+ fputs (", done_p not set", stderr);
+
if (default_p)
- fprintf (stderr, ", default secondary reload");
+ fputs (", default secondary reload", stderr);
if (sri->icode != CODE_FOR_nothing)
- fprintf (stderr, ", reload func = %s, extra cost = %d\n",
+ fprintf (stderr, ", reload func = %s, extra cost = %d",
insn_data[sri->icode].name, sri->extra_cost);
- else
- fprintf (stderr, "\n");
+ fputs ("\n", stderr);
debug_rtx (x);
}
@@ -16483,6 +17269,9 @@ rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
}
+static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
+ ATTRIBUTE_NORETURN;
+
static void
rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
bool store_p)
@@ -16491,209 +17280,148 @@ rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
gcc_unreachable ();
}
-/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
- to SP+reg addressing. */
+/* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
+ reload helper functions. These were identified in
+ rs6000_secondary_reload_memory, and if reload decided to use the secondary
+ reload, it calls the insns:
+ reload_<RELOAD:mode>_<P:mptrsize>_store
+ reload_<RELOAD:mode>_<P:mptrsize>_load
+
+ which in turn calls this function, to do whatever is necessary to create
+ valid addresses. */
void
rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
{
int regno = true_regnum (reg);
- enum machine_mode mode = GET_MODE (reg);
- enum reg_class rclass;
+ machine_mode mode = GET_MODE (reg);
+ addr_mask_type addr_mask;
rtx addr;
- rtx and_op2 = NULL_RTX;
- rtx addr_op1;
- rtx addr_op2;
- rtx scratch_or_premodify = scratch;
- rtx and_rtx;
+ rtx new_addr;
+ rtx op_reg, op0, op1;
+ rtx and_op;
rtx cc_clobber;
+ rtvec rv;
- if (TARGET_DEBUG_ADDR)
- rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
+ || !base_reg_operand (scratch, GET_MODE (scratch)))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+ if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
+ addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
- if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+ else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
+ addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
+
+ else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
+ addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
+
+ else
rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
- if (GET_CODE (mem) != MEM)
+ /* Make sure the mode is valid in this register class. */
+ if ((addr_mask & RELOAD_REG_VALID) == 0)
rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
- rclass = REGNO_REG_CLASS (regno);
- addr = find_replacement (&XEXP (mem, 0));
+ if (TARGET_DEBUG_ADDR)
+ rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
- switch (rclass)
+ new_addr = addr = XEXP (mem, 0);
+ switch (GET_CODE (addr))
{
- /* GPRs can handle reg + small constant, all other addresses need to use
- the scratch register. */
- case GENERAL_REGS:
- case BASE_REGS:
- if (GET_CODE (addr) == AND)
+ /* Does the register class support auto update forms for this mode? If
+ not, do the update now. We don't need a scratch register, since the
+ powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
+ case PRE_INC:
+ case PRE_DEC:
+ op_reg = XEXP (addr, 0);
+ if (!base_reg_operand (op_reg, Pmode))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+ if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
{
- and_op2 = XEXP (addr, 1);
- addr = find_replacement (&XEXP (addr, 0));
+ emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
+ new_addr = op_reg;
}
+ break;
- if (GET_CODE (addr) == PRE_MODIFY)
- {
- scratch_or_premodify = find_replacement (&XEXP (addr, 0));
- if (!REG_P (scratch_or_premodify))
- rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+ case PRE_MODIFY:
+ op0 = XEXP (addr, 0);
+ op1 = XEXP (addr, 1);
+ if (!base_reg_operand (op0, Pmode)
+ || GET_CODE (op1) != PLUS
+ || !rtx_equal_p (op0, XEXP (op1, 0)))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
- addr = find_replacement (&XEXP (addr, 1));
- if (GET_CODE (addr) != PLUS)
- rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+ if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+ new_addr = reg;
}
+ break;
- if (GET_CODE (addr) == PLUS
- && (and_op2 != NULL_RTX
- || !rs6000_legitimate_offset_address_p (PTImode, addr,
- false, true)))
+ /* Do we need to simulate AND -16 to clear the bottom address bits used
+ in VMX load/stores? */
+ case AND:
+ op0 = XEXP (addr, 0);
+ op1 = XEXP (addr, 1);
+ if ((addr_mask & RELOAD_REG_AND_M16) == 0)
{
- /* find_replacement already recurses into both operands of
- PLUS so we don't need to call it here. */
- addr_op1 = XEXP (addr, 0);
- addr_op2 = XEXP (addr, 1);
- if (!legitimate_indirect_address_p (addr_op1, false))
- rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+ if (REG_P (op0) || GET_CODE (op0) == SUBREG)
+ op_reg = op0;
- if (!REG_P (addr_op2)
- && (GET_CODE (addr_op2) != CONST_INT
- || !satisfies_constraint_I (addr_op2)))
+ else if (GET_CODE (op1) == PLUS)
{
- if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr,
- "\nMove plus addr to register %s, mode = %s: ",
- rs6000_reg_names[REGNO (scratch)],
- GET_MODE_NAME (mode));
- debug_rtx (addr_op2);
- }
- rs6000_emit_move (scratch, addr_op2, Pmode);
- addr_op2 = scratch;
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
+ op_reg = scratch;
}
- emit_insn (gen_rtx_SET (VOIDmode,
- scratch_or_premodify,
- gen_rtx_PLUS (Pmode,
- addr_op1,
- addr_op2)));
+ else
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
- addr = scratch_or_premodify;
- scratch_or_premodify = scratch;
- }
- else if (!legitimate_indirect_address_p (addr, false)
- && !rs6000_legitimate_offset_address_p (PTImode, addr,
- false, true))
- {
- if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
- rs6000_reg_names[REGNO (scratch_or_premodify)],
- GET_MODE_NAME (mode));
- debug_rtx (addr);
- }
- rs6000_emit_move (scratch_or_premodify, addr, Pmode);
- addr = scratch_or_premodify;
- scratch_or_premodify = scratch;
+ and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
+ cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
+ rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
+ new_addr = scratch;
}
break;
- /* Float registers can do offset+reg addressing for scalar types. */
- case FLOAT_REGS:
- if (legitimate_indirect_address_p (addr, false) /* reg */
- || legitimate_indexed_address_p (addr, false) /* reg+reg */
- || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
- && and_op2 == NULL_RTX
- && scratch_or_premodify == scratch
- && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
- break;
-
- /* If this isn't a legacy floating point load/store, fall through to the
- VSX defaults. */
-
- /* VSX/Altivec registers can only handle reg+reg addressing. Move other
- addresses into a scratch register. */
- case VSX_REGS:
- case ALTIVEC_REGS:
-
- /* With float regs, we need to handle the AND ourselves, since we can't
- use the Altivec instruction with an implicit AND -16. Allow scalar
- loads to float registers to use reg+offset even if VSX. */
- if (GET_CODE (addr) == AND
- && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16
- || GET_CODE (XEXP (addr, 1)) != CONST_INT
- || INTVAL (XEXP (addr, 1)) != -16
- || !VECTOR_MEM_ALTIVEC_P (mode)))
- {
- and_op2 = XEXP (addr, 1);
- addr = find_replacement (&XEXP (addr, 0));
- }
-
- /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
- as the address later. */
- if (GET_CODE (addr) == PRE_MODIFY
- && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
- && (rclass != FLOAT_REGS
- || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
- || and_op2 != NULL_RTX
- || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
- {
- scratch_or_premodify = find_replacement (&XEXP (addr, 0));
- if (!legitimate_indirect_address_p (scratch_or_premodify, false))
- rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
-
- addr = find_replacement (&XEXP (addr, 1));
- if (GET_CODE (addr) != PLUS)
- rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+ /* If this is an indirect address, make sure it is a base register. */
+ case REG:
+ case SUBREG:
+ if (!base_reg_operand (addr, GET_MODE (addr)))
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+ new_addr = scratch;
}
+ break;
- if (legitimate_indirect_address_p (addr, false) /* reg */
- || legitimate_indexed_address_p (addr, false) /* reg+reg */
- || (GET_CODE (addr) == AND /* Altivec memory */
- && rclass == ALTIVEC_REGS
- && GET_CODE (XEXP (addr, 1)) == CONST_INT
- && INTVAL (XEXP (addr, 1)) == -16
- && (legitimate_indirect_address_p (XEXP (addr, 0), false)
- || legitimate_indexed_address_p (XEXP (addr, 0), false))))
- ;
+ /* If this is an indexed address, make sure the register class can handle
+ indexed addresses for this mode. */
+ case PLUS:
+ op0 = XEXP (addr, 0);
+ op1 = XEXP (addr, 1);
+ if (!base_reg_operand (op0, Pmode))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
- else if (GET_CODE (addr) == PLUS)
+ else if (int_reg_operand (op1, Pmode))
{
- addr_op1 = XEXP (addr, 0);
- addr_op2 = XEXP (addr, 1);
- if (!REG_P (addr_op1))
- rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
-
- if (TARGET_DEBUG_ADDR)
+ if ((addr_mask & RELOAD_REG_INDEXED) == 0)
{
- fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ",
- rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
- debug_rtx (addr_op2);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+ new_addr = scratch;
}
- rs6000_emit_move (scratch, addr_op2, Pmode);
- emit_insn (gen_rtx_SET (VOIDmode,
- scratch_or_premodify,
- gen_rtx_PLUS (Pmode,
- addr_op1,
- scratch)));
- addr = scratch_or_premodify;
- scratch_or_premodify = scratch;
}
- else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
- || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
- || REG_P (addr))
+ /* Make sure the register class can handle offset addresses. */
+ else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
{
- if (TARGET_DEBUG_ADDR)
+ if ((addr_mask & RELOAD_REG_OFFSET) == 0)
{
- fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
- rs6000_reg_names[REGNO (scratch_or_premodify)],
- GET_MODE_NAME (mode));
- debug_rtx (addr);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+ new_addr = scratch;
}
-
- rs6000_emit_move (scratch_or_premodify, addr, Pmode);
- addr = scratch_or_premodify;
- scratch_or_premodify = scratch;
}
else
@@ -16701,55 +17429,51 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
break;
- default:
- rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
- }
-
- /* If the original address involved a pre-modify that we couldn't use the VSX
- memory instruction with update, and we haven't taken care of already,
- store the address in the pre-modify register and use that as the
- address. */
- if (scratch_or_premodify != scratch && scratch_or_premodify != addr)
- {
- emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr));
- addr = scratch_or_premodify;
- }
+ case LO_SUM:
+ op0 = XEXP (addr, 0);
+ op1 = XEXP (addr, 1);
+ if (!base_reg_operand (op0, Pmode))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
- /* If the original address involved an AND -16 and we couldn't use an ALTIVEC
- memory instruction, recreate the AND now, including the clobber which is
- generated by the general ANDSI3/ANDDI3 patterns for the
- andi. instruction. */
- if (and_op2 != NULL_RTX)
- {
- if (! legitimate_indirect_address_p (addr, false))
+ else if (int_reg_operand (op1, Pmode))
{
- emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
- addr = scratch;
+ if ((addr_mask & RELOAD_REG_INDEXED) == 0)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+ new_addr = scratch;
+ }
}
- if (TARGET_DEBUG_ADDR)
+ /* Make sure the register class can handle offset addresses. */
+ else if (legitimate_lo_sum_address_p (mode, addr, false))
{
- fprintf (stderr, "\nAnd addr to register %s, mode = %s: ",
- rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
- debug_rtx (and_op2);
+ if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+ new_addr = scratch;
+ }
}
- and_rtx = gen_rtx_SET (VOIDmode,
- scratch,
- gen_rtx_AND (Pmode,
- addr,
- and_op2));
+ else
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+ break;
- cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
- emit_insn (gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2, and_rtx, cc_clobber)));
- addr = scratch;
+ case SYMBOL_REF:
+ case CONST:
+ case LABEL_REF:
+ rs6000_emit_move (scratch, addr, Pmode);
+ new_addr = scratch;
+ break;
+
+ default:
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
}
/* Adjust the address if it changed. */
- if (addr != XEXP (mem, 0))
+ if (addr != new_addr)
{
- mem = replace_equiv_address_nv (mem, addr);
+ mem = replace_equiv_address_nv (mem, new_addr);
if (TARGET_DEBUG_ADDR)
fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
}
@@ -16893,17 +17617,44 @@ rs6000_instantiate_decls (void)
static enum reg_class
rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
+ bool is_constant = CONSTANT_P (x);
- if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
- return rclass;
+ /* Do VSX tests before handling traditional floaitng point registers. */
+ if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
+ {
+ if (is_constant)
+ {
+ /* Zero is always allowed in all VSX registers. */
+ if (x == CONST0_RTX (mode))
+ return rclass;
- if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
- && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
- && easy_vector_constant (x, mode))
- return ALTIVEC_REGS;
+ /* If this is a vector constant that can be formed with a few Altivec
+ instructions, we want altivec registers. */
+ if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
+ return ALTIVEC_REGS;
- if ((CONSTANT_P (x) || GET_CODE (x) == PLUS))
+ /* Force constant to memory. */
+ return NO_REGS;
+ }
+
+ /* If this is a scalar floating point value, prefer the traditional
+ floating point registers so that we can use D-form (register+offset)
+ addressing. */
+ if (GET_MODE_SIZE (mode) < 16)
+ return FLOAT_REGS;
+
+ /* Prefer the Altivec registers if Altivec is handling the vector
+ operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
+ loads. */
+ if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
+ || mode == V1TImode)
+ return ALTIVEC_REGS;
+
+ return rclass;
+ }
+
+ if (is_constant || GET_CODE (x) == PLUS)
{
if (reg_class_subset_p (GENERAL_REGS, rclass))
return GENERAL_REGS;
@@ -16915,22 +17666,6 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
return GENERAL_REGS;
- /* For VSX, prefer the traditional registers for 64-bit values because we can
- use the non-VSX loads. Prefer the Altivec registers if Altivec is
- handling the vector operations (i.e. V16QI, V8HI, and V4SI), or if we
- prefer Altivec loads.. */
- if (rclass == VSX_REGS)
- {
- if (GET_MODE_SIZE (mode) <= 8)
- return FLOAT_REGS;
-
- if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
- || mode == V1TImode)
- return ALTIVEC_REGS;
-
- return rclass;
- }
-
return rclass;
}
@@ -16959,7 +17694,7 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
static bool
rs6000_secondary_memory_needed (enum reg_class from_class,
enum reg_class to_class,
- enum machine_mode mode)
+ machine_mode mode)
{
enum rs6000_reg_type from_type, to_type;
bool altivec_p = ((from_class == ALTIVEC_REGS)
@@ -16985,7 +17720,7 @@ rs6000_secondary_memory_needed (enum reg_class from_class,
static bool
rs6000_debug_secondary_memory_needed (enum reg_class from_class,
enum reg_class to_class,
- enum machine_mode mode)
+ machine_mode mode)
{
bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
@@ -17005,7 +17740,7 @@ rs6000_debug_secondary_memory_needed (enum reg_class from_class,
NO_REGS is returned. */
static enum reg_class
-rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
+rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
rtx in)
{
int regno;
@@ -17050,30 +17785,34 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
else
regno = -1;
+ /* If we have VSX register moves, prefer moving scalar values between
+ Altivec registers and GPR by going via an FPR (and then via memory)
+ instead of reloading the secondary memory address for Altivec moves. */
+ if (TARGET_VSX
+ && GET_MODE_SIZE (mode) < 16
+ && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
+ && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
+ || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
+ && (regno >= 0 && INT_REGNO_P (regno)))))
+ return FLOAT_REGS;
+
/* We can place anything into GENERAL_REGS and can put GENERAL_REGS
into anything. */
if (rclass == GENERAL_REGS || rclass == BASE_REGS
|| (regno >= 0 && INT_REGNO_P (regno)))
return NO_REGS;
+ /* Constants, memory, and VSX registers can go into VSX registers (both the
+ traditional floating point and the altivec registers). */
+ if (rclass == VSX_REGS
+ && (regno == -1 || VSX_REGNO_P (regno)))
+ return NO_REGS;
+
/* Constants, memory, and FP registers can go into FP registers. */
if ((regno == -1 || FP_REGNO_P (regno))
&& (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
- /* Memory, and FP/altivec registers can go into fp/altivec registers under
- VSX. However, for scalar variables, use the traditional floating point
- registers so that we can use offset+register addressing. */
- if (TARGET_VSX
- && (regno == -1 || VSX_REGNO_P (regno))
- && VSX_REG_CLASS_P (rclass))
- {
- if (GET_MODE_SIZE (mode) < 16)
- return FLOAT_REGS;
-
- return NO_REGS;
- }
-
/* Memory, and AltiVec registers can go into AltiVec registers. */
if ((regno == -1 || ALTIVEC_REGNO_P (regno))
&& rclass == ALTIVEC_REGS)
@@ -17091,7 +17830,7 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
/* Debug version of rs6000_secondary_reload_class. */
static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class rclass,
- enum machine_mode mode, rtx in)
+ machine_mode mode, rtx in)
{
enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
fprintf (stderr,
@@ -17107,8 +17846,8 @@ rs6000_debug_secondary_reload_class (enum reg_class rclass,
/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
static bool
-rs6000_cannot_change_mode_class (enum machine_mode from,
- enum machine_mode to,
+rs6000_cannot_change_mode_class (machine_mode from,
+ machine_mode to,
enum reg_class rclass)
{
unsigned from_size = GET_MODE_SIZE (from);
@@ -17192,8 +17931,8 @@ rs6000_cannot_change_mode_class (enum machine_mode from,
/* Debug version of rs6000_cannot_change_mode_class. */
static bool
-rs6000_debug_cannot_change_mode_class (enum machine_mode from,
- enum machine_mode to,
+rs6000_debug_cannot_change_mode_class (machine_mode from,
+ machine_mode to,
enum reg_class rclass)
{
bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
@@ -17215,7 +17954,7 @@ rs6000_output_move_128bit (rtx operands[])
{
rtx dest = operands[0];
rtx src = operands[1];
- enum machine_mode mode = GET_MODE (dest);
+ machine_mode mode = GET_MODE (dest);
int dest_regno;
int src_regno;
bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
@@ -17340,6 +18079,7 @@ rs6000_output_move_128bit (rtx operands[])
/* Constants. */
else if (dest_regno >= 0
&& (GET_CODE (src) == CONST_INT
+ || GET_CODE (src) == CONST_WIDE_INT
|| GET_CODE (src) == CONST_DOUBLE
|| GET_CODE (src) == CONST_VECTOR))
{
@@ -17366,7 +18106,7 @@ rs6000_output_move_128bit (rtx operands[])
bool
rs6000_move_128bit_ok_p (rtx operands[])
{
- enum machine_mode mode = GET_MODE (operands[0]);
+ machine_mode mode = GET_MODE (operands[0]);
return (gpc_reg_operand (operands[0], mode)
|| gpc_reg_operand (operands[1], mode));
}
@@ -17400,7 +18140,7 @@ int
ccr_bit (rtx op, int scc_p)
{
enum rtx_code code = GET_CODE (op);
- enum machine_mode cc_mode;
+ machine_mode cc_mode;
int cc_regnum;
int base_bit;
rtx reg;
@@ -17478,7 +18218,7 @@ static struct machine_function *
rs6000_init_machine_status (void)
{
stack_info.reload_completed = 0;
- return ggc_alloc_cleared_machine_function ();
+ return ggc_cleared_alloc<machine_function> ();
}
#define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
@@ -17548,46 +18288,6 @@ extract_ME (rtx op)
return i;
}
-/* Locate some local-dynamic symbol still in use by this function
- so that we can print its name in some tls_ld pattern. */
-
-static const char *
-rs6000_get_some_local_dynamic_name (void)
-{
- rtx insn;
-
- if (cfun->machine->some_ld_name)
- return cfun->machine->some_ld_name;
-
- for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
- if (INSN_P (insn)
- && for_each_rtx (&PATTERN (insn),
- rs6000_get_some_local_dynamic_name_1, 0))
- return cfun->machine->some_ld_name;
-
- gcc_unreachable ();
-}
-
-/* Helper function for rs6000_get_some_local_dynamic_name. */
-
-static int
-rs6000_get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
- rtx x = *px;
-
- if (GET_CODE (x) == SYMBOL_REF)
- {
- const char *str = XSTR (x, 0);
- if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
- {
- cfun->machine->some_ld_name = str;
- return 1;
- }
- }
-
- return 0;
-}
-
/* Write out a function code label. */
void
@@ -17666,6 +18366,19 @@ print_operand (FILE *file, rtx x, int code)
fprintf (file, "%d", i + 1);
return;
+ case 'e':
+ /* If the low 16 bits are 0, but some other bit is set, write 's'. */
+ if (! INT_P (x))
+ {
+ output_operand_lossage ("invalid %%e value");
+ return;
+ }
+
+ uval = INTVAL (x);
+ if ((uval & 0xffff) == 0 && uval != 0)
+ putc ('s', file);
+ return;
+
case 'E':
/* X is a CR register. Print the number of the EQ bit of the CR */
if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
@@ -17968,12 +18681,19 @@ print_operand (FILE *file, rtx x, int code)
return;
case 'u':
- /* High-order 16 bits of constant for use in unsigned operand. */
+ /* High-order or low-order 16 bits of constant, whichever is non-zero,
+ for use in unsigned operand. */
if (! INT_P (x))
- output_operand_lossage ("invalid %%u value");
- else
- fprintf (file, HOST_WIDE_INT_PRINT_HEX,
- (INTVAL (x) >> 16) & 0xffff);
+ {
+ output_operand_lossage ("invalid %%u value");
+ return;
+ }
+
+ uval = INTVAL (x);
+ if ((uval & 0xffff) == 0)
+ uval >>= 16;
+
+ fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
return;
case 'v':
@@ -18191,7 +18911,7 @@ print_operand (FILE *file, rtx x, int code)
fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
else
{
- if (!GET_CODE (tmp) == PLUS
+ if (GET_CODE (tmp) != PLUS
|| !REG_P (XEXP (tmp, 0))
|| !REG_P (XEXP (tmp, 1)))
{
@@ -18243,7 +18963,11 @@ print_operand (FILE *file, rtx x, int code)
return;
case '&':
- assemble_name (file, rs6000_get_some_local_dynamic_name ());
+ if (const char *name = get_some_local_dynamic_name ())
+ assemble_name (file, name);
+ else
+ output_operand_lossage ("'%%&' used without any "
+ "local dynamic TLS references");
return;
default:
@@ -18379,8 +19103,7 @@ rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
if (TARGET_RELOCATABLE
&& in_section != toc_section
&& !recurse
- && GET_CODE (x) != CONST_INT
- && GET_CODE (x) != CONST_DOUBLE
+ && !CONST_SCALAR_INT_P (x)
&& CONSTANT_P (x))
{
char buf[256];
@@ -18454,7 +19177,7 @@ rs6000_assemble_visibility (tree decl, int vis)
#endif
enum rtx_code
-rs6000_reverse_condition (enum machine_mode mode, enum rtx_code code)
+rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
{
/* Reversal of FP compares takes care -- an ordered compare
becomes an unordered compare and vice versa. */
@@ -18471,9 +19194,9 @@ rs6000_reverse_condition (enum machine_mode mode, enum rtx_code code)
represents the result of the compare. */
static rtx
-rs6000_generate_compare (rtx cmp, enum machine_mode mode)
+rs6000_generate_compare (rtx cmp, machine_mode mode)
{
- enum machine_mode comp_mode;
+ machine_mode comp_mode;
rtx compare_result;
enum rtx_code code = GET_CODE (cmp);
rtx op0 = XEXP (cmp, 0);
@@ -18512,7 +19235,7 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
&& FLOAT_MODE_P (mode))
{
rtx cmp, or_result, compare_result2;
- enum machine_mode op_mode = GET_MODE (op0);
+ machine_mode op_mode = GET_MODE (op0);
bool reverse_p;
if (op_mode == VOIDmode)
@@ -18759,25 +19482,40 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
/* Emit the RTL for an sISEL pattern. */
void
-rs6000_emit_sISEL (enum machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
+rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
{
rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
}
+/* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
+ can be used as that dest register. Return the dest register. */
+
+rtx
+rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
+{
+ if (op2 == const0_rtx)
+ return op1;
+
+ if (GET_CODE (scratch) == SCRATCH)
+ scratch = gen_reg_rtx (mode);
+
+ if (logical_operand (op2, mode))
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, gen_rtx_XOR (mode, op1, op2)));
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, scratch,
+ gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
+
+ return scratch;
+}
+
void
-rs6000_emit_sCOND (enum machine_mode mode, rtx operands[])
+rs6000_emit_sCOND (machine_mode mode, rtx operands[])
{
rtx condition_rtx;
- enum machine_mode op_mode;
+ machine_mode op_mode;
enum rtx_code cond_code;
rtx result = operands[0];
- if (TARGET_ISEL && (mode == SImode || mode == DImode))
- {
- rs6000_emit_sISEL (mode, operands);
- return;
- }
-
condition_rtx = rs6000_generate_compare (operands[1], mode);
cond_code = GET_CODE (condition_rtx);
@@ -18805,7 +19543,7 @@ rs6000_emit_sCOND (enum machine_mode mode, rtx operands[])
{
rtx not_result = gen_reg_rtx (CCEQmode);
rtx not_op, rev_cond_rtx;
- enum machine_mode cc_mode;
+ machine_mode cc_mode;
cc_mode = GET_MODE (XEXP (condition_rtx, 0));
@@ -18835,7 +19573,7 @@ rs6000_emit_sCOND (enum machine_mode mode, rtx operands[])
/* Emit a branch of kind CODE to location LOC. */
void
-rs6000_emit_cbranch (enum machine_mode mode, rtx operands[])
+rs6000_emit_cbranch (machine_mode mode, rtx operands[])
{
rtx condition_rtx, loc_ref;
@@ -18859,12 +19597,12 @@ rs6000_emit_cbranch (enum machine_mode mode, rtx operands[])
INSN is the insn. */
char *
-output_cbranch (rtx op, const char *label, int reversed, rtx insn)
+output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
{
static char string[64];
enum rtx_code code = GET_CODE (op);
rtx cc_reg = XEXP (op, 0);
- enum machine_mode mode = GET_MODE (cc_reg);
+ machine_mode mode = GET_MODE (cc_reg);
int cc_regno = REGNO (cc_reg) - CR0_REGNO;
int need_longbranch = label != NULL && get_attr_length (insn) == 8;
int really_reversed = reversed ^ need_longbranch;
@@ -19006,7 +19744,7 @@ static rtx
rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
{
rtx mask;
- enum machine_mode mode = GET_MODE (op0);
+ machine_mode mode = GET_MODE (op0);
switch (code)
{
@@ -19040,7 +19778,7 @@ rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
static rtx
rs6000_emit_vector_compare (enum rtx_code rcode,
rtx op0, rtx op1,
- enum machine_mode dmode)
+ machine_mode dmode)
{
rtx mask;
bool swap_operands = false;
@@ -19151,12 +19889,7 @@ rs6000_emit_vector_compare (enum rtx_code rcode,
if (try_again)
{
if (swap_operands)
- {
- rtx tmp;
- tmp = op0;
- op0 = op1;
- op1 = tmp;
- }
+ std::swap (op0, op1);
mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
if (mask)
@@ -19175,10 +19908,10 @@ int
rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
rtx cond, rtx cc_op0, rtx cc_op1)
{
- enum machine_mode dest_mode = GET_MODE (dest);
- enum machine_mode mask_mode = GET_MODE (cc_op0);
+ machine_mode dest_mode = GET_MODE (dest);
+ machine_mode mask_mode = GET_MODE (cc_op0);
enum rtx_code rcode = GET_CODE (cond);
- enum machine_mode cc_mode = CCmode;
+ machine_mode cc_mode = CCmode;
rtx mask;
rtx cond2;
rtx tmp;
@@ -19254,8 +19987,8 @@ rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
rtx op0 = XEXP (op, 0);
rtx op1 = XEXP (op, 1);
REAL_VALUE_TYPE c1;
- enum machine_mode compare_mode = GET_MODE (op0);
- enum machine_mode result_mode = GET_MODE (dest);
+ machine_mode compare_mode = GET_MODE (op0);
+ machine_mode result_mode = GET_MODE (dest);
rtx temp;
bool is_against_zero;
@@ -19440,7 +20173,7 @@ static int
rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
{
rtx condition_rtx, cr;
- enum machine_mode mode = GET_MODE (dest);
+ machine_mode mode = GET_MODE (dest);
enum rtx_code cond_code;
rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
bool signedp;
@@ -19469,9 +20202,7 @@ rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
default:
/* We need to swap the sense of the comparison. */
{
- rtx t = true_cond;
- true_cond = false_cond;
- false_cond = t;
+ std::swap (false_cond, true_cond);
PUT_CODE (condition_rtx, reverse_condition (cond_code));
}
break;
@@ -19507,7 +20238,7 @@ output_isel (rtx *operands)
void
rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
{
- enum machine_mode mode = GET_MODE (op0);
+ machine_mode mode = GET_MODE (op0);
enum rtx_code c;
rtx target;
@@ -19557,7 +20288,7 @@ emit_unlikely_jump (rtx cond, rtx label)
the zero_extend operation. */
static void
-emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
+emit_load_locked (machine_mode mode, rtx reg, rtx mem)
{
rtx (*fn) (rtx, rtx) = NULL;
@@ -19593,7 +20324,7 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
instruction in MODE. */
static void
-emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
+emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
{
rtx (*fn) (rtx, rtx, rtx) = NULL;
@@ -19688,7 +20419,7 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
{
rtx addr, align, shift, mask, mem;
HOST_WIDE_INT shift_mask;
- enum machine_mode mode = GET_MODE (orig_mem);
+ machine_mode mode = GET_MODE (orig_mem);
/* For smaller modes, we have to implement this via SImode. */
shift_mask = (mode == QImode ? 0x18 : 0x10);
@@ -19710,7 +20441,7 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
shift = gen_reg_rtx (SImode);
addr = gen_lowpart (SImode, addr);
emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
- if (WORDS_BIG_ENDIAN)
+ if (BYTES_BIG_ENDIAN)
shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
shift, 1, OPTAB_LIB_WIDEN);
*pshift = shift;
@@ -19760,7 +20491,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
{
rtx boolval, retval, mem, oldval, newval, cond;
rtx label1, label2, x, mask, shift;
- enum machine_mode mode, orig_mode;
+ machine_mode mode, orig_mode;
enum memmodel mod_s, mod_f;
bool is_weak;
@@ -19883,7 +20614,7 @@ void
rs6000_expand_atomic_exchange (rtx operands[])
{
rtx retval, mem, val, cond;
- enum machine_mode mode;
+ machine_mode mode;
enum memmodel model;
rtx label, x, mask, shift;
@@ -19942,8 +20673,8 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
rtx orig_before, rtx orig_after, rtx model_rtx)
{
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
- enum machine_mode mode = GET_MODE (mem);
- enum machine_mode store_mode = mode;
+ machine_mode mode = GET_MODE (mem);
+ machine_mode store_mode = mode;
rtx label, x, cond, mask, shift;
rtx before = orig_before, after = orig_after;
@@ -20084,9 +20815,9 @@ rs6000_split_multireg_move (rtx dst, rtx src)
/* The register number of the first register being moved. */
int reg;
/* The mode that is to be moved. */
- enum machine_mode mode;
+ machine_mode mode;
/* The mode that the move is being done in, and its size. */
- enum machine_mode reg_mode;
+ machine_mode reg_mode;
int reg_mode_size;
/* The number of registers that will be moved. */
int nregs;
@@ -20449,7 +21180,7 @@ compute_save_world_info (rs6000_stack_t *info_ptr)
are none. (This check is expensive, but seldom executed.) */
if (WORLD_SAVE_P (info_ptr))
{
- rtx insn;
+ rtx_insn *insn;
for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
if (CALL_P (insn) && SIBLING_CALL_P (insn))
{
@@ -21167,7 +21898,7 @@ rs6000_stack_info (void)
static bool
spe_func_has_64bit_regs_p (void)
{
- rtx insns, insn;
+ rtx_insn *insns, *insn;
/* Functions that save and restore all the call-saved registers will
need to save/restore the registers in 64-bits. */
@@ -21194,7 +21925,7 @@ spe_func_has_64bit_regs_p (void)
i = PATTERN (insn);
if (GET_CODE (i) == SET)
{
- enum machine_mode mode = GET_MODE (SET_SRC (i));
+ machine_mode mode = GET_MODE (SET_SRC (i));
if (SPE_VECTOR_MODE (mode))
return true;
@@ -21436,9 +22167,9 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp)
static int
rs6000_ra_ever_killed (void)
{
- rtx top;
+ rtx_insn *top;
rtx reg;
- rtx insn;
+ rtx_insn *insn;
if (cfun->is_thunk)
return 0;
@@ -21642,7 +22373,7 @@ get_TOC_alias_set (void)
static int
uses_TOC (void)
{
- rtx insn;
+ rtx_insn *insn;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
if (INSN_P (insn))
@@ -21747,7 +22478,7 @@ rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
static void
rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
{
- rtx insn;
+ rtx_insn *insn;
rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
rtx tmp_reg = gen_rtx_REG (Pmode, 0);
rtx todec = gen_int_mode (-size, Pmode);
@@ -22164,7 +22895,7 @@ gen_frame_store (rtx reg, rtx frame_reg, int offset)
Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
static rtx
-emit_frame_save (rtx frame_reg, enum machine_mode mode,
+emit_frame_save (rtx frame_reg, machine_mode mode,
unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
{
rtx reg, insn;
@@ -22187,7 +22918,7 @@ emit_frame_save (rtx frame_reg, enum machine_mode mode,
converting to a valid addressing mode. */
static rtx
-gen_frame_mem_offset (enum machine_mode mode, rtx reg, int offset)
+gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
{
rtx int_rtx, offset_rtx;
@@ -22449,7 +23180,7 @@ ptr_regno_for_savres (int sel)
static rtx
rs6000_emit_savres_rtx (rs6000_stack_t *info,
rtx frame_reg_rtx, int save_area_offset, int lr_offset,
- enum machine_mode reg_mode, int sel)
+ machine_mode reg_mode, int sel)
{
int i;
int offset, start_reg, end_reg, n_regs, use_reg;
@@ -22586,7 +23317,7 @@ void
rs6000_emit_prologue (void)
{
rs6000_stack_t *info = rs6000_stack_info ();
- enum machine_mode reg_mode = Pmode;
+ machine_mode reg_mode = Pmode;
int reg_size = TARGET_32BIT ? 4 : 8;
rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
rtx frame_reg_rtx = sp_reg_rtx;
@@ -23645,7 +24376,7 @@ load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
{
rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
rtx reg = gen_rtx_REG (SImode, regno);
- rtx insn = emit_move_insn (reg, mem);
+ rtx_insn *insn = emit_move_insn (reg, mem);
if (!exit_func && DEFAULT_ABI == ABI_V4)
{
@@ -23676,7 +24407,7 @@ restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
if (using_mfcr_multiple && count > 1)
{
- rtx insn;
+ rtx_insn *insn;
rtvec p;
int ndx;
@@ -23732,7 +24463,7 @@ restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
if (!exit_func && DEFAULT_ABI != ABI_ELFv2
&& (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
{
- rtx insn = get_last_insn ();
+ rtx_insn *insn = get_last_insn ();
rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
add_reg_note (insn, REG_CFA_RESTORE, cr);
@@ -23759,7 +24490,7 @@ restore_saved_lr (int regno, bool exit_func)
{
rtx reg = gen_rtx_REG (Pmode, regno);
rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
- rtx insn = emit_move_insn (lr, reg);
+ rtx_insn *insn = emit_move_insn (lr, reg);
if (!exit_func && flag_shrink_wrap)
{
@@ -23811,7 +24542,7 @@ offset_below_red_zone_p (HOST_WIDE_INT offset)
static void
emit_cfa_restores (rtx cfa_restores)
{
- rtx insn = get_last_insn ();
+ rtx_insn *insn = get_last_insn ();
rtx *loc = &REG_NOTES (insn);
while (*loc)
@@ -23839,7 +24570,7 @@ rs6000_emit_epilogue (int sibcall)
rtx cfa_restores = NULL_RTX;
rtx insn;
rtx cr_save_reg = NULL_RTX;
- enum machine_mode reg_mode = Pmode;
+ machine_mode reg_mode = Pmode;
int reg_size = TARGET_32BIT ? 4 : 8;
int i;
bool exit_func;
@@ -24691,8 +25422,8 @@ rs6000_output_function_epilogue (FILE *file,
/* Mach-O doesn't support labels at the end of objects, so if
it looks like we might want one, insert a NOP. */
{
- rtx insn = get_last_insn ();
- rtx deleted_debug_label = NULL_RTX;
+ rtx_insn *insn = get_last_insn ();
+ rtx_insn *deleted_debug_label = NULL;
while (insn
&& NOTE_P (insn)
&& NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
@@ -24784,7 +25515,7 @@ rs6000_output_function_epilogue (FILE *file,
Java is 13. Objective-C is 14. Objective-C++ isn't assigned
a number, so for now use 9. LTO and Go aren't assigned numbers
either, so for now use 0. */
- if (! strcmp (language_string, "GNU C")
+ if (lang_GNU_C ()
|| ! strcmp (language_string, "GNU GIMPLE")
|| ! strcmp (language_string, "GNU Go"))
i = 0;
@@ -24795,7 +25526,7 @@ rs6000_output_function_epilogue (FILE *file,
i = 2;
else if (! strcmp (language_string, "GNU Ada"))
i = 3;
- else if (! strcmp (language_string, "GNU C++")
+ else if (lang_GNU_CXX ()
|| ! strcmp (language_string, "GNU Objective-C++"))
i = 9;
else if (! strcmp (language_string, "GNU Java"))
@@ -24846,7 +25577,7 @@ rs6000_output_function_epilogue (FILE *file,
decl; decl = DECL_CHAIN (decl))
{
rtx parameter = DECL_INCOMING_RTL (decl);
- enum machine_mode mode = GET_MODE (parameter);
+ machine_mode mode = GET_MODE (parameter);
if (GET_CODE (parameter) == REG)
{
@@ -24991,7 +25722,8 @@ rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
tree function)
{
- rtx this_rtx, insn, funexp;
+ rtx this_rtx, funexp;
+ rtx_insn *insn;
reload_completed = 1;
epilogue_completed = 1;
@@ -25113,7 +25845,7 @@ static unsigned
rs6000_hash_constant (rtx k)
{
enum rtx_code code = GET_CODE (k);
- enum machine_mode mode = GET_MODE (k);
+ machine_mode mode = GET_MODE (k);
unsigned result = (code << 3) ^ mode;
const char *format;
int flen, fidx;
@@ -25127,6 +25859,15 @@ rs6000_hash_constant (rtx k)
case LABEL_REF:
return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
+ case CONST_WIDE_INT:
+ {
+ int i;
+ flen = CONST_WIDE_INT_NUNITS (k);
+ for (i = 0; i < flen; i++)
+ result = result * 613 + CONST_WIDE_INT_ELT (k, i);
+ return result;
+ }
+
case CONST_DOUBLE:
if (mode != VOIDmode)
return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
@@ -25182,24 +25923,21 @@ rs6000_hash_constant (rtx k)
return result;
}
-static unsigned
-toc_hash_function (const void *hash_entry)
+hashval_t
+toc_hasher::hash (toc_hash_struct *thc)
{
- const struct toc_hash_struct *thc =
- (const struct toc_hash_struct *) hash_entry;
return rs6000_hash_constant (thc->key) ^ thc->key_mode;
}
/* Compare H1 and H2 for equivalence. */
-static int
-toc_hash_eq (const void *h1, const void *h2)
+bool
+toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
{
- rtx r1 = ((const struct toc_hash_struct *) h1)->key;
- rtx r2 = ((const struct toc_hash_struct *) h2)->key;
+ rtx r1 = h1->key;
+ rtx r2 = h2->key;
- if (((const struct toc_hash_struct *) h1)->key_mode
- != ((const struct toc_hash_struct *) h2)->key_mode)
+ if (h1->key_mode != h2->key_mode)
return 0;
return rtx_equal_p (r1, r2);
@@ -25270,7 +26008,7 @@ rs6000_output_symbol_ref (FILE *file, rtx x)
written. */
void
-output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
+output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
{
char buf[256];
const char *name = buf;
@@ -25286,20 +26024,18 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
{
struct toc_hash_struct *h;
- void * * found;
/* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
time because GGC is not initialized at that point. */
if (toc_hash_table == NULL)
- toc_hash_table = htab_create_ggc (1021, toc_hash_function,
- toc_hash_eq, NULL);
+ toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
- h = ggc_alloc_toc_hash_struct ();
+ h = ggc_alloc<toc_hash_struct> ();
h->key = x;
h->key_mode = mode;
h->labelno = labelno;
- found = htab_find_slot (toc_hash_table, h, INSERT);
+ toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
if (*found == NULL)
*found = h;
else /* This is indeed a duplicate.
@@ -25309,8 +26045,7 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
fprintf (file, "%d,", labelno);
ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
- fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
- found)->labelno));
+ fprintf (file, "%d\n", ((*found)->labelno));
#ifdef HAVE_AS_TLS
if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
@@ -25321,8 +26056,7 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
fprintf (file, "%d,", labelno);
ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
- fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
- found)->labelno));
+ fprintf (file, "%d\n", ((*found)->labelno));
}
#endif
return;
@@ -25331,7 +26065,7 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
/* If we're going to put a double constant in the TOC, make sure it's
aligned properly when strict alignment is on. */
- if (GET_CODE (x) == CONST_DOUBLE
+ if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
&& STRICT_ALIGNMENT
&& GET_MODE_BITSIZE (mode) >= 64
&& ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
@@ -25910,7 +26644,7 @@ static int load_store_pendulum;
instructions to issue in this cycle. */
static int
-rs6000_variable_issue_1 (rtx insn, int more)
+rs6000_variable_issue_1 (rtx_insn *insn, int more)
{
last_scheduled_insn = insn;
if (GET_CODE (PATTERN (insn)) == USE
@@ -25950,7 +26684,7 @@ rs6000_variable_issue_1 (rtx insn, int more)
}
static int
-rs6000_variable_issue (FILE *stream, int verbose, rtx insn, int more)
+rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
{
int r = rs6000_variable_issue_1 (insn, more);
if (verbose)
@@ -25962,7 +26696,7 @@ rs6000_variable_issue (FILE *stream, int verbose, rtx insn, int more)
a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
static int
-rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
{
enum attr_type attr_type;
@@ -26021,25 +26755,29 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
switch (get_attr_type (dep_insn))
{
case TYPE_CMP:
- case TYPE_COMPARE:
- case TYPE_DELAYED_COMPARE:
- case TYPE_IMUL_COMPARE:
- case TYPE_LMUL_COMPARE:
case TYPE_FPCOMPARE:
case TYPE_CR_LOGICAL:
case TYPE_DELAYED_CR:
return cost + 2;
+ case TYPE_EXTS:
+ case TYPE_MUL:
+ if (get_attr_dot (dep_insn) == DOT_YES)
+ return cost + 2;
+ else
+ break;
+ case TYPE_SHIFT:
+ if (get_attr_dot (dep_insn) == DOT_YES
+ && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
+ return cost + 2;
+ else
+ break;
default:
break;
}
break;
case TYPE_STORE:
- case TYPE_STORE_U:
- case TYPE_STORE_UX:
case TYPE_FPSTORE:
- case TYPE_FPSTORE_U:
- case TYPE_FPSTORE_UX:
if ((rs6000_cpu == PROCESSOR_POWER6)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
@@ -26056,63 +26794,49 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
switch (get_attr_type (dep_insn))
{
case TYPE_LOAD:
- case TYPE_LOAD_U:
- case TYPE_LOAD_UX:
case TYPE_CNTLZ:
{
if (! store_data_bypass_p (dep_insn, insn))
- return 4;
+ return get_attr_sign_extend (dep_insn)
+ == SIGN_EXTEND_YES ? 6 : 4;
break;
}
- case TYPE_LOAD_EXT:
- case TYPE_LOAD_EXT_U:
- case TYPE_LOAD_EXT_UX:
- case TYPE_VAR_SHIFT_ROTATE:
- case TYPE_VAR_DELAYED_COMPARE:
+ case TYPE_SHIFT:
{
if (! store_data_bypass_p (dep_insn, insn))
- return 6;
+ return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
+ 6 : 3;
break;
}
case TYPE_INTEGER:
- case TYPE_COMPARE:
- case TYPE_FAST_COMPARE:
+ case TYPE_ADD:
+ case TYPE_LOGICAL:
case TYPE_EXTS:
- case TYPE_SHIFT:
- case TYPE_INSERT_WORD:
- case TYPE_INSERT_DWORD:
- case TYPE_FPLOAD_U:
- case TYPE_FPLOAD_UX:
- case TYPE_STORE_U:
- case TYPE_STORE_UX:
- case TYPE_FPSTORE_U:
- case TYPE_FPSTORE_UX:
+ case TYPE_INSERT:
{
if (! store_data_bypass_p (dep_insn, insn))
return 3;
break;
}
- case TYPE_IMUL:
- case TYPE_IMUL2:
- case TYPE_IMUL3:
- case TYPE_LMUL:
- case TYPE_IMUL_COMPARE:
- case TYPE_LMUL_COMPARE:
+ case TYPE_STORE:
+ case TYPE_FPLOAD:
+ case TYPE_FPSTORE:
{
- if (! store_data_bypass_p (dep_insn, insn))
- return 17;
+ if (get_attr_update (dep_insn) == UPDATE_YES
+ && ! store_data_bypass_p (dep_insn, insn))
+ return 3;
break;
}
- case TYPE_IDIV:
+ case TYPE_MUL:
{
if (! store_data_bypass_p (dep_insn, insn))
- return 45;
+ return 17;
break;
}
- case TYPE_LDIV:
+ case TYPE_DIV:
{
if (! store_data_bypass_p (dep_insn, insn))
- return 57;
+ return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
break;
}
default:
@@ -26122,11 +26846,6 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
break;
case TYPE_LOAD:
- case TYPE_LOAD_U:
- case TYPE_LOAD_UX:
- case TYPE_LOAD_EXT:
- case TYPE_LOAD_EXT_U:
- case TYPE_LOAD_EXT_UX:
if ((rs6000_cpu == PROCESSOR_POWER6)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
@@ -26138,63 +26857,49 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
switch (get_attr_type (dep_insn))
{
case TYPE_LOAD:
- case TYPE_LOAD_U:
- case TYPE_LOAD_UX:
case TYPE_CNTLZ:
{
if (set_to_load_agen (dep_insn, insn))
- return 4;
+ return get_attr_sign_extend (dep_insn)
+ == SIGN_EXTEND_YES ? 6 : 4;
break;
}
- case TYPE_LOAD_EXT:
- case TYPE_LOAD_EXT_U:
- case TYPE_LOAD_EXT_UX:
- case TYPE_VAR_SHIFT_ROTATE:
- case TYPE_VAR_DELAYED_COMPARE:
+ case TYPE_SHIFT:
{
if (set_to_load_agen (dep_insn, insn))
- return 6;
+ return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
+ 6 : 3;
break;
- }
+ }
case TYPE_INTEGER:
- case TYPE_COMPARE:
- case TYPE_FAST_COMPARE:
+ case TYPE_ADD:
+ case TYPE_LOGICAL:
case TYPE_EXTS:
- case TYPE_SHIFT:
- case TYPE_INSERT_WORD:
- case TYPE_INSERT_DWORD:
- case TYPE_FPLOAD_U:
- case TYPE_FPLOAD_UX:
- case TYPE_STORE_U:
- case TYPE_STORE_UX:
- case TYPE_FPSTORE_U:
- case TYPE_FPSTORE_UX:
+ case TYPE_INSERT:
{
if (set_to_load_agen (dep_insn, insn))
return 3;
break;
}
- case TYPE_IMUL:
- case TYPE_IMUL2:
- case TYPE_IMUL3:
- case TYPE_LMUL:
- case TYPE_IMUL_COMPARE:
- case TYPE_LMUL_COMPARE:
+ case TYPE_STORE:
+ case TYPE_FPLOAD:
+ case TYPE_FPSTORE:
{
- if (set_to_load_agen (dep_insn, insn))
- return 17;
+ if (get_attr_update (dep_insn) == UPDATE_YES
+ && set_to_load_agen (dep_insn, insn))
+ return 3;
break;
}
- case TYPE_IDIV:
+ case TYPE_MUL:
{
if (set_to_load_agen (dep_insn, insn))
- return 45;
+ return 17;
break;
}
- case TYPE_LDIV:
+ case TYPE_DIV:
{
if (set_to_load_agen (dep_insn, insn))
- return 57;
+ return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
break;
}
default:
@@ -26205,6 +26910,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
case TYPE_FPLOAD:
if ((rs6000_cpu == PROCESSOR_POWER6)
+ && get_attr_update (insn) == UPDATE_NO
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0)
&& (get_attr_type (dep_insn) == TYPE_MFFGPR))
@@ -26234,7 +26940,8 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
return 1;
break;
case TYPE_FPLOAD:
- if (get_attr_type (dep_insn) == TYPE_MFFGPR)
+ if (get_attr_update (insn) == UPDATE_NO
+ && get_attr_type (dep_insn) == TYPE_MFFGPR)
return 2;
break;
default:
@@ -26256,7 +26963,8 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
/* Debug version of rs6000_adjust_cost. */
static int
-rs6000_debug_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
+ int cost)
{
int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
@@ -26286,7 +26994,7 @@ rs6000_debug_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
Return false otherwise. */
static bool
-is_microcoded_insn (rtx insn)
+is_microcoded_insn (rtx_insn *insn)
{
if (!insn || !NONDEBUG_INSN_P (insn)
|| GET_CODE (PATTERN (insn)) == USE
@@ -26300,10 +27008,12 @@ is_microcoded_insn (rtx insn)
&& (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
{
enum attr_type type = get_attr_type (insn);
- if (type == TYPE_LOAD_EXT_U
- || type == TYPE_LOAD_EXT_UX
- || type == TYPE_LOAD_UX
- || type == TYPE_STORE_UX
+ if ((type == TYPE_LOAD
+ && get_attr_update (insn) == UPDATE_YES
+ && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
+ || ((type == TYPE_LOAD || type == TYPE_STORE)
+ && get_attr_update (insn) == UPDATE_YES
+ && get_attr_indexed (insn) == INDEXED_YES)
|| type == TYPE_MFCR)
return true;
}
@@ -26315,7 +27025,7 @@ is_microcoded_insn (rtx insn)
by the processor (and therefore occupies 2 issue slots). */
static bool
-is_cracked_insn (rtx insn)
+is_cracked_insn (rtx_insn *insn)
{
if (!insn || !NONDEBUG_INSN_P (insn)
|| GET_CODE (PATTERN (insn)) == USE
@@ -26326,14 +27036,29 @@ is_cracked_insn (rtx insn)
&& (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
{
enum attr_type type = get_attr_type (insn);
- if (type == TYPE_LOAD_U || type == TYPE_STORE_U
- || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U
- || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX
- || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR
- || type == TYPE_COMPARE || type == TYPE_DELAYED_COMPARE
- || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE
- || type == TYPE_IDIV || type == TYPE_LDIV
- || type == TYPE_INSERT_WORD)
+ if ((type == TYPE_LOAD
+ && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
+ && get_attr_update (insn) == UPDATE_NO)
+ || (type == TYPE_LOAD
+ && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
+ && get_attr_update (insn) == UPDATE_YES
+ && get_attr_indexed (insn) == INDEXED_NO)
+ || (type == TYPE_STORE
+ && get_attr_update (insn) == UPDATE_YES
+ && get_attr_indexed (insn) == INDEXED_NO)
+ || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
+ && get_attr_update (insn) == UPDATE_YES)
+ || type == TYPE_DELAYED_CR
+ || (type == TYPE_EXTS
+ && get_attr_dot (insn) == DOT_YES)
+ || (type == TYPE_SHIFT
+ && get_attr_dot (insn) == DOT_YES
+ && get_attr_var_shift (insn) == VAR_SHIFT_NO)
+ || (type == TYPE_MUL
+ && get_attr_dot (insn) == DOT_YES)
+ || type == TYPE_DIV
+ || (type == TYPE_INSERT
+ && get_attr_size (insn) == SIZE_32))
return true;
}
@@ -26344,7 +27069,7 @@ is_cracked_insn (rtx insn)
the branch slot. */
static bool
-is_branch_slot_insn (rtx insn)
+is_branch_slot_insn (rtx_insn *insn)
{
if (!insn || !NONDEBUG_INSN_P (insn)
|| GET_CODE (PATTERN (insn)) == USE
@@ -26365,7 +27090,7 @@ is_branch_slot_insn (rtx insn)
/* The function returns true if out_inst sets a value that is
used in the address generation computation of in_insn */
static bool
-set_to_load_agen (rtx out_insn, rtx in_insn)
+set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
{
rtx out_set, in_set;
@@ -26465,7 +27190,7 @@ mem_locations_overlap (rtx mem1, rtx mem2)
priorities of insns. */
static int
-rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
+rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
{
rtx load_mem, str_mem;
/* On machines (like the 750) which have asymmetric integer units,
@@ -26487,8 +27212,8 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
default:
break;
- case TYPE_IMUL:
- case TYPE_IDIV:
+ case TYPE_MUL:
+ case TYPE_DIV:
fprintf (stderr, "priority was %#x (%d) before adjustment\n",
priority, priority);
if (priority >= 0 && priority < 0x01000000)
@@ -26532,7 +27257,7 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
/* Return true if the instruction is nonpipelined on the Cell. */
static bool
-is_nonpipeline_insn (rtx insn)
+is_nonpipeline_insn (rtx_insn *insn)
{
enum attr_type type;
if (!insn || !NONDEBUG_INSN_P (insn)
@@ -26541,12 +27266,8 @@ is_nonpipeline_insn (rtx insn)
return false;
type = get_attr_type (insn);
- if (type == TYPE_IMUL
- || type == TYPE_IMUL2
- || type == TYPE_IMUL3
- || type == TYPE_LMUL
- || type == TYPE_IDIV
- || type == TYPE_LDIV
+ if (type == TYPE_MUL
+ || type == TYPE_DIV
|| type == TYPE_SDIV
|| type == TYPE_DDIV
|| type == TYPE_SSQRT
@@ -26629,22 +27350,25 @@ rs6000_use_sched_lookahead (void)
}
}
-/* We are choosing insn from the ready queue. Return nonzero if INSN can be chosen. */
+/* We are choosing insn from the ready queue. Return zero if INSN can be
+ chosen. */
static int
-rs6000_use_sched_lookahead_guard (rtx insn)
+rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
{
+ if (ready_index == 0)
+ return 0;
+
if (rs6000_cpu_attr != CPU_CELL)
- return 1;
+ return 0;
- if (insn == NULL_RTX || !INSN_P (insn))
- abort ();
+ gcc_assert (insn != NULL_RTX && INSN_P (insn));
if (!reload_completed
|| is_nonpipeline_insn (insn)
|| is_microcoded_insn (insn))
- return 0;
+ return 1;
- return 1;
+ return 0;
}
/* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
@@ -26811,17 +27535,17 @@ rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
skipping any "non-active" insns - insns that will not actually occupy
an issue slot. Return NULL_RTX if such an insn is not found. */
-static rtx
-get_next_active_insn (rtx insn, rtx tail)
+static rtx_insn *
+get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
{
if (insn == NULL_RTX || insn == tail)
- return NULL_RTX;
+ return NULL;
while (1)
{
insn = NEXT_INSN (insn);
if (insn == NULL_RTX || insn == tail)
- return NULL_RTX;
+ return NULL;
if (CALL_P (insn)
|| JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
@@ -26838,7 +27562,7 @@ get_next_active_insn (rtx insn, rtx tail)
static int
rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
- rtx *ready ATTRIBUTE_UNUSED,
+ rtx_insn **ready ATTRIBUTE_UNUSED,
int *pn_ready ATTRIBUTE_UNUSED,
int clock_var ATTRIBUTE_UNUSED)
{
@@ -26854,11 +27578,7 @@ rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
if (is_nonpipeline_insn (ready[n_ready - 1])
&& (recog_memoized (ready[n_ready - 2]) > 0))
/* Simply swap first two insns. */
- {
- rtx tmp = ready[n_ready - 1];
- ready[n_ready - 1] = ready[n_ready - 2];
- ready[n_ready - 2] = tmp;
- }
+ std::swap (ready[n_ready - 1], ready[n_ready - 2]);
}
if (rs6000_cpu == PROCESSOR_POWER6)
@@ -26870,7 +27590,7 @@ rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
/* Like rs6000_sched_reorder, but called after issuing each insn. */
static int
-rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
+rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
{
if (sched_verbose)
@@ -26920,7 +27640,8 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
{
int pos;
int i;
- rtx tmp, load_mem, str_mem;
+ rtx_insn *tmp;
+ rtx load_mem, str_mem;
if (is_store_insn (last_scheduled_insn, &str_mem))
/* Issuing a store, swing the load_store_pendulum to the left */
@@ -27085,7 +27806,7 @@ rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
the first insn in the group it belongs to). */
static bool
-insn_terminates_group_p (rtx insn, enum group_termination which_group)
+insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
{
bool first, last;
@@ -27108,7 +27829,7 @@ insn_terminates_group_p (rtx insn, enum group_termination which_group)
static bool
-insn_must_be_first_in_group (rtx insn)
+insn_must_be_first_in_group (rtx_insn *insn)
{
enum attr_type type;
@@ -27142,8 +27863,7 @@ insn_must_be_first_in_group (rtx insn)
case TYPE_CR_LOGICAL:
case TYPE_MTJMPR:
case TYPE_MFJMPR:
- case TYPE_IDIV:
- case TYPE_LDIV:
+ case TYPE_DIV:
case TYPE_LOAD_L:
case TYPE_STORE_C:
case TYPE_ISYNC:
@@ -27158,21 +27878,11 @@ insn_must_be_first_in_group (rtx insn)
switch (type)
{
- case TYPE_INSERT_DWORD:
case TYPE_EXTS:
case TYPE_CNTLZ:
- case TYPE_SHIFT:
- case TYPE_VAR_SHIFT_ROTATE:
case TYPE_TRAP:
- case TYPE_IMUL:
- case TYPE_IMUL2:
- case TYPE_IMUL3:
- case TYPE_LMUL:
- case TYPE_IDIV:
- case TYPE_INSERT_WORD:
- case TYPE_DELAYED_COMPARE:
- case TYPE_IMUL_COMPARE:
- case TYPE_LMUL_COMPARE:
+ case TYPE_MUL:
+ case TYPE_INSERT:
case TYPE_FPCOMPARE:
case TYPE_MFCR:
case TYPE_MTCR:
@@ -27182,16 +27892,26 @@ insn_must_be_first_in_group (rtx insn)
case TYPE_SYNC:
case TYPE_LOAD_L:
case TYPE_STORE_C:
- case TYPE_LOAD_U:
- case TYPE_LOAD_UX:
- case TYPE_LOAD_EXT_UX:
- case TYPE_STORE_U:
- case TYPE_STORE_UX:
- case TYPE_FPLOAD_U:
- case TYPE_FPLOAD_UX:
- case TYPE_FPSTORE_U:
- case TYPE_FPSTORE_UX:
return true;
+ case TYPE_SHIFT:
+ if (get_attr_dot (insn) == DOT_NO
+ || get_attr_var_shift (insn) == VAR_SHIFT_NO)
+ return true;
+ else
+ break;
+ case TYPE_DIV:
+ if (get_attr_size (insn) == SIZE_32)
+ return true;
+ else
+ break;
+ case TYPE_LOAD:
+ case TYPE_STORE:
+ case TYPE_FPLOAD:
+ case TYPE_FPSTORE:
+ if (get_attr_update (insn) == UPDATE_YES)
+ return true;
+ else
+ break;
default:
break;
}
@@ -27205,28 +27925,33 @@ insn_must_be_first_in_group (rtx insn)
case TYPE_MFCR:
case TYPE_MFCRF:
case TYPE_MTCR:
- case TYPE_IDIV:
- case TYPE_LDIV:
- case TYPE_COMPARE:
- case TYPE_DELAYED_COMPARE:
- case TYPE_VAR_DELAYED_COMPARE:
+ case TYPE_DIV:
case TYPE_ISYNC:
case TYPE_LOAD_L:
case TYPE_STORE_C:
- case TYPE_LOAD_U:
- case TYPE_LOAD_UX:
- case TYPE_LOAD_EXT:
- case TYPE_LOAD_EXT_U:
- case TYPE_LOAD_EXT_UX:
- case TYPE_STORE_U:
- case TYPE_STORE_UX:
- case TYPE_FPLOAD_U:
- case TYPE_FPLOAD_UX:
- case TYPE_FPSTORE_U:
- case TYPE_FPSTORE_UX:
case TYPE_MFJMPR:
case TYPE_MTJMPR:
return true;
+ case TYPE_MUL:
+ case TYPE_SHIFT:
+ case TYPE_EXTS:
+ if (get_attr_dot (insn) == DOT_YES)
+ return true;
+ else
+ break;
+ case TYPE_LOAD:
+ if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
+ || get_attr_update (insn) == UPDATE_YES)
+ return true;
+ else
+ break;
+ case TYPE_STORE:
+ case TYPE_FPLOAD:
+ case TYPE_FPSTORE:
+ if (get_attr_update (insn) == UPDATE_YES)
+ return true;
+ else
+ break;
default:
break;
}
@@ -27241,25 +27966,33 @@ insn_must_be_first_in_group (rtx insn)
case TYPE_MFCR:
case TYPE_MFCRF:
case TYPE_MTCR:
- case TYPE_COMPARE:
- case TYPE_DELAYED_COMPARE:
- case TYPE_VAR_DELAYED_COMPARE:
- case TYPE_IMUL_COMPARE:
- case TYPE_LMUL_COMPARE:
case TYPE_SYNC:
case TYPE_ISYNC:
case TYPE_LOAD_L:
case TYPE_STORE_C:
- case TYPE_LOAD_U:
- case TYPE_LOAD_UX:
- case TYPE_LOAD_EXT:
- case TYPE_LOAD_EXT_U:
- case TYPE_LOAD_EXT_UX:
- case TYPE_STORE_UX:
case TYPE_VECSTORE:
case TYPE_MFJMPR:
case TYPE_MTJMPR:
return true;
+ case TYPE_SHIFT:
+ case TYPE_EXTS:
+ case TYPE_MUL:
+ if (get_attr_dot (insn) == DOT_YES)
+ return true;
+ else
+ break;
+ case TYPE_LOAD:
+ if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
+ || get_attr_update (insn) == UPDATE_YES)
+ return true;
+ else
+ break;
+ case TYPE_STORE:
+ if (get_attr_update (insn) == UPDATE_YES
+ && get_attr_indexed (insn) == INDEXED_YES)
+ return true;
+ else
+ break;
default:
break;
}
@@ -27272,7 +28005,7 @@ insn_must_be_first_in_group (rtx insn)
}
static bool
-insn_must_be_last_in_group (rtx insn)
+insn_must_be_last_in_group (rtx_insn *insn)
{
enum attr_type type;
@@ -27300,17 +28033,8 @@ insn_must_be_last_in_group (rtx insn)
{
case TYPE_EXTS:
case TYPE_CNTLZ:
- case TYPE_SHIFT:
- case TYPE_VAR_SHIFT_ROTATE:
case TYPE_TRAP:
- case TYPE_IMUL:
- case TYPE_IMUL2:
- case TYPE_IMUL3:
- case TYPE_LMUL:
- case TYPE_IDIV:
- case TYPE_DELAYED_COMPARE:
- case TYPE_IMUL_COMPARE:
- case TYPE_LMUL_COMPARE:
+ case TYPE_MUL:
case TYPE_FPCOMPARE:
case TYPE_MFCR:
case TYPE_MTCR:
@@ -27321,6 +28045,17 @@ insn_must_be_last_in_group (rtx insn)
case TYPE_LOAD_L:
case TYPE_STORE_C:
return true;
+ case TYPE_SHIFT:
+ if (get_attr_dot (insn) == DOT_NO
+ || get_attr_var_shift (insn) == VAR_SHIFT_NO)
+ return true;
+ else
+ break;
+ case TYPE_DIV:
+ if (get_attr_size (insn) == SIZE_32)
+ return true;
+ else
+ break;
default:
break;
}
@@ -27334,10 +28069,19 @@ insn_must_be_last_in_group (rtx insn)
case TYPE_SYNC:
case TYPE_LOAD_L:
case TYPE_STORE_C:
- case TYPE_LOAD_EXT_U:
- case TYPE_LOAD_EXT_UX:
- case TYPE_STORE_UX:
return true;
+ case TYPE_LOAD:
+ if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
+ && get_attr_update (insn) == UPDATE_YES)
+ return true;
+ else
+ break;
+ case TYPE_STORE:
+ if (get_attr_update (insn) == UPDATE_YES
+ && get_attr_indexed (insn) == INDEXED_YES)
+ return true;
+ else
+ break;
default:
break;
}
@@ -27353,10 +28097,19 @@ insn_must_be_last_in_group (rtx insn)
case TYPE_SYNC:
case TYPE_LOAD_L:
case TYPE_STORE_C:
- case TYPE_LOAD_EXT_U:
- case TYPE_LOAD_EXT_UX:
- case TYPE_STORE_UX:
return true;
+ case TYPE_LOAD:
+ if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
+ && get_attr_update (insn) == UPDATE_YES)
+ return true;
+ else
+ break;
+ case TYPE_STORE:
+ if (get_attr_update (insn) == UPDATE_YES
+ && get_attr_indexed (insn) == INDEXED_YES)
+ return true;
+ else
+ break;
default:
break;
}
@@ -27414,7 +28167,7 @@ is_costly_group (rtx *group_insns, rtx next_insn)
static int
force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
- rtx next_insn, bool *group_end, int can_issue_more,
+ rtx_insn *next_insn, bool *group_end, int can_issue_more,
int *group_count)
{
rtx nop;
@@ -27561,9 +28314,10 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
start a new group. */
static int
-redefine_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
+redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
+ rtx_insn *tail)
{
- rtx insn, next_insn;
+ rtx_insn *insn, *next_insn;
int issue_rate;
int can_issue_more;
int slot, i;
@@ -27638,9 +28392,10 @@ redefine_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
returns the number of dispatch groups found. */
static int
-pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
+pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
+ rtx_insn *tail)
{
- rtx insn, next_insn;
+ rtx_insn *insn, *next_insn;
rtx nop;
int issue_rate;
int can_issue_more;
@@ -27924,7 +28679,7 @@ rs6000_handle_altivec_attribute (tree *node,
bool *no_add_attrs)
{
tree type = *node, result = NULL_TREE;
- enum machine_mode mode;
+ machine_mode mode;
int unsigned_p;
char altivec_type
= ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
@@ -28233,7 +28988,7 @@ rs6000_elf_asm_init_sections (void)
/* Implement TARGET_SELECT_RTX_SECTION. */
static section *
-rs6000_elf_select_rtx_section (enum machine_mode mode, rtx x,
+rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
unsigned HOST_WIDE_INT align)
{
if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
@@ -28296,7 +29051,7 @@ rs6000_elf_in_small_data_p (const_tree decl)
if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
{
- const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+ const char *section = DECL_SECTION_NAME (decl);
if (compare_section_name (section, ".sdata")
|| compare_section_name (section, ".sdata2")
|| compare_section_name (section, ".gnu.linkonce.s")
@@ -28327,7 +29082,7 @@ rs6000_elf_in_small_data_p (const_tree decl)
/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
static bool
-rs6000_use_blocks_for_constant_p (enum machine_mode mode, const_rtx x)
+rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
{
return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
}
@@ -28518,7 +29273,7 @@ get_prev_label (tree function_name)
CALL_DEST is the routine we are calling. */
char *
-output_call (rtx insn, rtx *operands, int dest_operand_number,
+output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
int cookie_operand_number)
{
static char buf[256];
@@ -28644,7 +29399,7 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub)
#define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
rtx
-rs6000_machopic_legitimize_pic_address (rtx orig, enum machine_mode mode,
+rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
rtx reg)
{
rtx base, offset;
@@ -28938,7 +29693,11 @@ rs6000_xcoff_asm_output_anchor (rtx symbol)
sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
SYMBOL_REF_BLOCK_OFFSET (symbol));
- ASM_OUTPUT_DEF (asm_out_file, XSTR (symbol, 0), buffer);
+ fprintf (asm_out_file, "%s", SET_ASM_OP);
+ RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
+ fprintf (asm_out_file, ",");
+ RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
+ fprintf (asm_out_file, "\n");
}
static void
@@ -29065,7 +29824,7 @@ rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
#define IN_NAMED_SECTION(DECL) \
((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
- && DECL_SECTION_NAME (DECL) != NULL_TREE)
+ && DECL_SECTION_NAME (DECL) != NULL)
static section *
rs6000_xcoff_select_section (tree decl, int reloc,
@@ -29130,7 +29889,7 @@ rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
name = (*targetm.strip_name_encoding) (name);
- DECL_SECTION_NAME (decl) = build_string (strlen (name), name);
+ set_decl_section_name (decl, name);
}
/* Select section for constant in constant pool.
@@ -29140,7 +29899,7 @@ rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
toc entry. */
static section *
-rs6000_xcoff_select_rtx_section (enum machine_mode mode, rtx x,
+rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
{
if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
@@ -29235,6 +29994,171 @@ rs6000_xcoff_file_end (void)
asm_out_file);
}
+struct declare_alias_data
+{
+ FILE *file;
+ bool function_descriptor;
+};
+
+/* Declare alias N. A helper function for for_node_and_aliases. */
+
+static bool
+rs6000_declare_alias (struct symtab_node *n, void *d)
+{
+ struct declare_alias_data *data = (struct declare_alias_data *)d;
+ /* Main symbol is output specially, because varasm machinery does part of
+ the job for us - we do not need to declare .globl/lglobs and such. */
+ if (!n->alias || n->weakref)
+ return false;
+
+ if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
+ return false;
+
+ /* Prevent assemble_alias from trying to use .set pseudo operation
+ that does not behave as expected by the middle-end. */
+ TREE_ASM_WRITTEN (n->decl) = true;
+
+ const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
+ char *buffer = (char *) alloca (strlen (name) + 2);
+ char *p;
+ int dollar_inside = 0;
+
+ strcpy (buffer, name);
+ p = strchr (buffer, '$');
+ while (p) {
+ *p = '_';
+ dollar_inside++;
+ p = strchr (p + 1, '$');
+ }
+ if (TREE_PUBLIC (n->decl))
+ {
+ if (!RS6000_WEAK || !DECL_WEAK (n->decl))
+ {
+ if (dollar_inside) {
+ if (data->function_descriptor)
+ fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
+ else
+ fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
+ }
+ if (data->function_descriptor)
+ fputs ("\t.globl .", data->file);
+ else
+ fputs ("\t.globl ", data->file);
+ RS6000_OUTPUT_BASENAME (data->file, buffer);
+ putc ('\n', data->file);
+ }
+ else if (DECL_WEAK (n->decl) && !data->function_descriptor)
+ ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
+ }
+ else
+ {
+ if (dollar_inside)
+ {
+ if (data->function_descriptor)
+ fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
+ else
+ fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
+ }
+ if (data->function_descriptor)
+ fputs ("\t.lglobl .", data->file);
+ else
+ fputs ("\t.lglobl ", data->file);
+ RS6000_OUTPUT_BASENAME (data->file, buffer);
+ putc ('\n', data->file);
+ }
+ if (data->function_descriptor)
+ fputs (".", data->file);
+ RS6000_OUTPUT_BASENAME (data->file, buffer);
+ fputs (":\n", data->file);
+ return false;
+}
+
+/* This macro produces the initial definition of a function name.
+ On the RS/6000, we need to place an extra '.' in the function name and
+ output the function descriptor.
+ Dollar signs are converted to underscores.
+
+ The csect for the function will have already been created when
+ text_section was selected. We do have to go back to that csect, however.
+
+ The third and fourth parameters to the .function pseudo-op (16 and 044)
+ are placeholders which no longer have any use.
+
+ Because AIX assembler's .set command has unexpected semantics, we output
+ all aliases as alternative labels in front of the definition. */
+
+void
+rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
+{
+ char *buffer = (char *) alloca (strlen (name) + 1);
+ char *p;
+ int dollar_inside = 0;
+ struct declare_alias_data data = {file, false};
+
+ strcpy (buffer, name);
+ p = strchr (buffer, '$');
+ while (p) {
+ *p = '_';
+ dollar_inside++;
+ p = strchr (p + 1, '$');
+ }
+ if (TREE_PUBLIC (decl))
+ {
+ if (!RS6000_WEAK || !DECL_WEAK (decl))
+ {
+ if (dollar_inside) {
+ fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
+ fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
+ }
+ fputs ("\t.globl .", file);
+ RS6000_OUTPUT_BASENAME (file, buffer);
+ putc ('\n', file);
+ }
+ }
+ else
+ {
+ if (dollar_inside) {
+ fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
+ fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
+ }
+ fputs ("\t.lglobl .", file);
+ RS6000_OUTPUT_BASENAME (file, buffer);
+ putc ('\n', file);
+ }
+ fputs ("\t.csect ", file);
+ RS6000_OUTPUT_BASENAME (file, buffer);
+ fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
+ RS6000_OUTPUT_BASENAME (file, buffer);
+ fputs (":\n", file);
+ symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
+ fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
+ RS6000_OUTPUT_BASENAME (file, buffer);
+ fputs (", TOC[tc0], 0\n", file);
+ in_section = NULL;
+ switch_to_section (function_section (decl));
+ putc ('.', file);
+ RS6000_OUTPUT_BASENAME (file, buffer);
+ fputs (":\n", file);
+ data.function_descriptor = true;
+ symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
+ if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
+ xcoffout_declare_function (file, decl, buffer);
+ return;
+}
+
+/* This macro produces the initial definition of a object (variable) name.
+ Because AIX assembler's .set command has unexpected semantics, we output
+ all aliases as alternative labels in front of the definition. */
+
+void
+rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
+{
+ struct declare_alias_data data = {file, false};
+ RS6000_OUTPUT_BASENAME (file, name);
+ fputs (":\n", file);
+ symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
+}
+
#ifdef HAVE_AS_TLS
static void
rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
@@ -29269,7 +30193,7 @@ static bool
rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
int *total, bool speed)
{
- enum machine_mode mode = GET_MODE (x);
+ machine_mode mode = GET_MODE (x);
switch (code)
{
@@ -29337,6 +30261,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
/* FALLTHRU */
case CONST_DOUBLE:
+ case CONST_WIDE_INT:
case CONST:
case HIGH:
case SYMBOL_REF:
@@ -29530,6 +30455,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
}
break;
+ case NE:
case EQ:
case GTU:
case LTU:
@@ -29611,7 +30537,7 @@ rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
/* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
static int
-rs6000_debug_address_cost (rtx x, enum machine_mode mode,
+rs6000_debug_address_cost (rtx x, machine_mode mode,
addr_space_t as, bool speed)
{
int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
@@ -29628,7 +30554,7 @@ rs6000_debug_address_cost (rtx x, enum machine_mode mode,
CLASS1 to one of CLASS2. */
static int
-rs6000_register_move_cost (enum machine_mode mode,
+rs6000_register_move_cost (machine_mode mode,
reg_class_t from, reg_class_t to)
{
int ret;
@@ -29699,7 +30625,7 @@ rs6000_register_move_cost (enum machine_mode mode,
or from memory. */
static int
-rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
+rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
bool in ATTRIBUTE_UNUSED)
{
int ret;
@@ -29782,7 +30708,7 @@ rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
all of the vector elements. */
static rtx
-rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst)
+rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
{
rtx reg;
@@ -29816,7 +30742,7 @@ rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst)
static void
rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
{
- enum machine_mode mode = GET_MODE (target);
+ machine_mode mode = GET_MODE (target);
rtx dst;
dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
@@ -29831,7 +30757,7 @@ rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
static void
rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
{
- enum machine_mode mode = GET_MODE (target);
+ machine_mode mode = GET_MODE (target);
rtx dst;
/* Altivec does not support fms directly;
@@ -29854,7 +30780,7 @@ rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
static void
rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
{
- enum machine_mode mode = GET_MODE (dst);
+ machine_mode mode = GET_MODE (dst);
rtx r;
/* This is a tad more complicated, since the fnma_optab is for
@@ -29879,7 +30805,7 @@ rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
void
rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
{
- enum machine_mode mode = GET_MODE (dst);
+ machine_mode mode = GET_MODE (dst);
rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
int i;
@@ -29954,7 +30880,7 @@ rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
void
rs6000_emit_swrsqrt (rtx dst, rtx src)
{
- enum machine_mode mode = GET_MODE (src);
+ machine_mode mode = GET_MODE (src);
rtx x0 = gen_reg_rtx (mode);
rtx y = gen_reg_rtx (mode);
@@ -29976,7 +30902,7 @@ rs6000_emit_swrsqrt (rtx dst, rtx src)
gcc_assert (code != CODE_FOR_nothing);
/* Load up the constant 1.5 either as a scalar, or as a vector. */
- real_from_integer (&dconst3_2, VOIDmode, 3, 0, 0);
+ real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
@@ -30012,7 +30938,7 @@ rs6000_emit_swrsqrt (rtx dst, rtx src)
void
rs6000_emit_popcount (rtx dst, rtx src)
{
- enum machine_mode mode = GET_MODE (dst);
+ machine_mode mode = GET_MODE (dst);
rtx tmp1, tmp2;
/* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
@@ -30054,7 +30980,7 @@ rs6000_emit_popcount (rtx dst, rtx src)
void
rs6000_emit_parity (rtx dst, rtx src)
{
- enum machine_mode mode = GET_MODE (dst);
+ machine_mode mode = GET_MODE (dst);
rtx tmp;
tmp = gen_reg_rtx (mode);
@@ -30212,19 +31138,19 @@ altivec_expand_vec_perm_const_le (rtx operands[4])
/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
permute control vector. But here it's not a constant, so we must
- generate a vector NOR to do the adjustment. */
+ generate a vector NAND or NOR to do the adjustment. */
void
altivec_expand_vec_perm_le (rtx operands[4])
{
- rtx notx, andx, unspec;
+ rtx notx, iorx, unspec;
rtx target = operands[0];
rtx op0 = operands[1];
rtx op1 = operands[2];
rtx sel = operands[3];
rtx tmp = target;
rtx norreg = gen_reg_rtx (V16QImode);
- enum machine_mode mode = GET_MODE (target);
+ machine_mode mode = GET_MODE (target);
/* Get everything in regs so the pattern matches. */
if (!REG_P (op0))
@@ -30236,10 +31162,13 @@ altivec_expand_vec_perm_le (rtx operands[4])
if (!REG_P (target))
tmp = gen_reg_rtx (mode);
- /* Invert the selector with a VNOR. */
+ /* Invert the selector with a VNAND if available, else a VNOR.
+ The VNAND is preferred for future fusion opportunities. */
notx = gen_rtx_NOT (V16QImode, sel);
- andx = gen_rtx_AND (V16QImode, notx, notx);
- emit_move_insn (norreg, andx);
+ iorx = (TARGET_P8_VECTOR
+ ? gen_rtx_IOR (V16QImode, notx, notx)
+ : gen_rtx_AND (V16QImode, notx, notx));
+ emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
/* Permute with operands reversed and adjusted selector. */
unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
@@ -30425,8 +31354,8 @@ altivec_expand_vec_perm_const (rtx operands[4])
if (i == 16)
{
enum insn_code icode = patterns[j].impl;
- enum machine_mode omode = insn_data[icode].operand[0].mode;
- enum machine_mode imode = insn_data[icode].operand[1].mode;
+ machine_mode omode = insn_data[icode].operand[0].mode;
+ machine_mode imode = insn_data[icode].operand[1].mode;
/* For little-endian, don't use vpkuwum and vpkuhum if the
underlying vector type is not V4SI and V8HI, respectively.
@@ -30452,7 +31381,7 @@ altivec_expand_vec_perm_const (rtx operands[4])
(or swapped back) to ensure proper right-to-left numbering
from 0 to 2N-1. */
if (swapped ^ !BYTES_BIG_ENDIAN)
- x = op0, op0 = op1, op1 = x;
+ std::swap (op0, op1);
if (imode != V16QImode)
{
op0 = gen_lowpart (imode, op0);
@@ -30508,7 +31437,7 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
return false;
perm0 -= 2;
perm1 += 2;
- x = op0, op0 = op1, op1 = x;
+ std::swap (op0, op1);
}
/* If the second selector does not come from the second operand, fail. */
else if ((perm1 & 2) == 0)
@@ -30517,7 +31446,7 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
/* Success! */
if (target != NULL)
{
- enum machine_mode vmode, dmode;
+ machine_mode vmode, dmode;
rtvec v;
vmode = GET_MODE (target);
@@ -30552,7 +31481,7 @@ rs6000_expand_vec_perm_const (rtx operands[4])
/* Test whether a constant permutation is supported. */
static bool
-rs6000_vectorize_vec_perm_const_ok (enum machine_mode vmode,
+rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
const unsigned char *sel)
{
/* AltiVec (and thus VSX) can handle arbitrary permutations. */
@@ -30575,9 +31504,9 @@ rs6000_vectorize_vec_perm_const_ok (enum machine_mode vmode,
static void
rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
- enum machine_mode vmode, unsigned nelt, rtx perm[])
+ machine_mode vmode, unsigned nelt, rtx perm[])
{
- enum machine_mode imode;
+ machine_mode imode;
rtx x;
imode = vmode;
@@ -30599,7 +31528,7 @@ rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
void
rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
{
- enum machine_mode vmode = GET_MODE (target);
+ machine_mode vmode = GET_MODE (target);
unsigned i, nelt = GET_MODE_NUNITS (vmode);
rtx perm[16];
@@ -30614,7 +31543,7 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
void
rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
{
- enum machine_mode vmode = GET_MODE (target);
+ machine_mode vmode = GET_MODE (target);
unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
rtx perm[16];
@@ -30628,14 +31557,31 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
}
+/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
+void
+rs6000_scale_v2df (rtx tgt, rtx src, int scale)
+{
+ HOST_WIDE_INT hwi_scale (scale);
+ REAL_VALUE_TYPE r_pow;
+ rtvec v = rtvec_alloc (2);
+ rtx elt;
+ rtx scale_vec = gen_reg_rtx (V2DFmode);
+ (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
+ elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
+ RTVEC_ELT (v, 0) = elt;
+ RTVEC_ELT (v, 1) = elt;
+ rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
+ emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
+}
+
/* Return an RTX representing where to find the function value of a
function returning MODE. */
static rtx
-rs6000_complex_function_value (enum machine_mode mode)
+rs6000_complex_function_value (machine_mode mode)
{
unsigned int regno;
rtx r1, r2;
- enum machine_mode inner = GET_MODE_INNER (mode);
+ machine_mode inner = GET_MODE_INNER (mode);
unsigned int inner_bytes = GET_MODE_SIZE (inner);
if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
@@ -30659,6 +31605,29 @@ rs6000_complex_function_value (enum machine_mode mode)
return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
}
+/* Return an rtx describing a return value of MODE as a PARALLEL
+ in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
+ stride REG_STRIDE. */
+
+static rtx
+rs6000_parallel_return (machine_mode mode,
+ int n_elts, machine_mode elt_mode,
+ unsigned int regno, unsigned int reg_stride)
+{
+ rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
+
+ int i;
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx r = gen_rtx_REG (elt_mode, regno);
+ rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+ XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+ regno += reg_stride;
+ }
+
+ return par;
+}
+
/* Target hook for TARGET_FUNCTION_VALUE.
On the SPE, both FPs and vectors are returned in r3.
@@ -30671,9 +31640,9 @@ rs6000_function_value (const_tree valtype,
const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
bool outgoing ATTRIBUTE_UNUSED)
{
- enum machine_mode mode;
+ machine_mode mode;
unsigned int regno;
- enum machine_mode elt_mode;
+ machine_mode elt_mode;
int n_elts;
/* Special handling for structs in darwin64. */
@@ -30694,12 +31663,12 @@ rs6000_function_value (const_tree valtype,
/* Otherwise fall through to standard ABI rules. */
}
+ mode = TYPE_MODE (valtype);
+
/* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
- if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
- &elt_mode, &n_elts))
+ if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
{
- int first_reg, n_regs, i;
- rtx par;
+ int first_reg, n_regs;
if (SCALAR_FLOAT_MODE_P (elt_mode))
{
@@ -30713,53 +31682,25 @@ rs6000_function_value (const_tree valtype,
n_regs = 1;
}
- par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
- for (i = 0; i < n_elts; i++)
- {
- rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
- rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
- XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
- }
-
- return par;
+ return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
}
- if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
- {
- /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
- return gen_rtx_PARALLEL (DImode,
- gen_rtvec (2,
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SImode, GP_ARG_RETURN),
- const0_rtx),
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SImode,
- GP_ARG_RETURN + 1),
- GEN_INT (4))));
- }
- if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
- {
- return gen_rtx_PARALLEL (DCmode,
- gen_rtvec (4,
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SImode, GP_ARG_RETURN),
- const0_rtx),
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SImode,
- GP_ARG_RETURN + 1),
- GEN_INT (4)),
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SImode,
- GP_ARG_RETURN + 2),
- GEN_INT (8)),
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SImode,
- GP_ARG_RETURN + 3),
- GEN_INT (12))));
- }
+ /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
+ if (TARGET_32BIT && TARGET_POWERPC64)
+ switch (mode)
+ {
+ default:
+ break;
+ case DImode:
+ case SCmode:
+ case DCmode:
+ case TCmode:
+ int count = GET_MODE_SIZE (mode) / 4;
+ return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
+ }
- mode = TYPE_MODE (valtype);
- if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
+ if ((INTEGRAL_TYPE_P (valtype)
+ && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
|| POINTER_TYPE_P (valtype))
mode = TARGET_32BIT ? SImode : DImode;
@@ -30792,23 +31733,13 @@ rs6000_function_value (const_tree valtype,
/* Define how to find the value returned by a library function
assuming the value has mode MODE. */
rtx
-rs6000_libcall_value (enum machine_mode mode)
+rs6000_libcall_value (machine_mode mode)
{
unsigned int regno;
+ /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
- {
- /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
- return gen_rtx_PARALLEL (DImode,
- gen_rtvec (2,
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SImode, GP_ARG_RETURN),
- const0_rtx),
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (SImode,
- GP_ARG_RETURN + 1),
- GEN_INT (4))));
- }
+ return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
/* _Decimal128 must use an even/odd register pair. */
@@ -30900,7 +31831,7 @@ rs6000_dwarf_register_span (rtx reg)
rtx parts[8];
int i, words;
unsigned regno = REGNO (reg);
- enum machine_mode mode = GET_MODE (reg);
+ machine_mode mode = GET_MODE (reg);
if (TARGET_SPE
&& regno < 32
@@ -30922,13 +31853,13 @@ rs6000_dwarf_register_span (rtx reg)
{
if (BYTES_BIG_ENDIAN)
{
- parts[2 * i] = gen_rtx_REG (SImode, regno + 1200);
+ parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
}
else
{
parts[2 * i] = gen_rtx_REG (SImode, regno);
- parts[2 * i + 1] = gen_rtx_REG (SImode, regno + 1200);
+ parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
}
}
@@ -30943,16 +31874,16 @@ rs6000_init_dwarf_reg_sizes_extra (tree address)
if (TARGET_SPE)
{
int i;
- enum machine_mode mode = TYPE_MODE (char_type_node);
+ machine_mode mode = TYPE_MODE (char_type_node);
rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
rtx mem = gen_rtx_MEM (BLKmode, addr);
rtx value = gen_int_mode (4, mode);
- for (i = 1201; i < 1232; i++)
+ for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
{
- int column = DWARF_REG_TO_UNWIND_COLUMN (i);
- HOST_WIDE_INT offset
- = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode);
+ int column = DWARF_REG_TO_UNWIND_COLUMN
+ (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
+ HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
emit_move_insn (adjust_address (mem, mode, offset), value);
}
@@ -30961,7 +31892,7 @@ rs6000_init_dwarf_reg_sizes_extra (tree address)
if (TARGET_MACHO && ! TARGET_ALTIVEC)
{
int i;
- enum machine_mode mode = TYPE_MODE (char_type_node);
+ machine_mode mode = TYPE_MODE (char_type_node);
rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
rtx mem = gen_rtx_MEM (BLKmode, addr);
rtx value = gen_int_mode (16, mode);
@@ -30971,26 +31902,49 @@ rs6000_init_dwarf_reg_sizes_extra (tree address)
for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
{
- int column = DWARF_REG_TO_UNWIND_COLUMN (i);
- HOST_WIDE_INT offset
- = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode);
+ int column = DWARF_REG_TO_UNWIND_COLUMN
+ (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
+ HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
emit_move_insn (adjust_address (mem, mode, offset), value);
}
}
}
-/* Map internal gcc register numbers to DWARF2 register numbers. */
+/* Map internal gcc register numbers to debug format register numbers.
+ FORMAT specifies the type of debug register number to use:
+ 0 -- debug information, except for frame-related sections
+ 1 -- DWARF .debug_frame section
+ 2 -- DWARF .eh_frame section */
unsigned int
-rs6000_dbx_register_number (unsigned int regno)
+rs6000_dbx_register_number (unsigned int regno, unsigned int format)
{
- if (regno <= 63 || write_symbols != DWARF2_DEBUG)
+ /* We never use the GCC internal number for SPE high registers.
+ Those are mapped to the 1200..1231 range for all debug formats. */
+ if (SPE_HIGH_REGNO_P (regno))
+ return regno - FIRST_SPE_HIGH_REGNO + 1200;
+
+ /* Except for the above, we use the internal number for non-DWARF
+ debug information, and also for .eh_frame. */
+ if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
+ return regno;
+
+ /* On some platforms, we use the standard DWARF register
+ numbering for .debug_info and .debug_frame. */
+#ifdef RS6000_USE_DWARF_NUMBERING
+ if (regno <= 63)
return regno;
if (regno == LR_REGNO)
return 108;
if (regno == CTR_REGNO)
return 109;
+ /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
+ translated any combination of CR2, CR3, CR4 saves to a save of CR2.
+ The actual code emitted saves the whole of CR, so we map CR2_REGNO
+ to the DWARF reg for CR. */
+ if (format == 1 && regno == CR2_REGNO)
+ return 64;
if (CR_REGNO_P (regno))
return regno - CR0_REGNO + 86;
if (regno == CA_REGNO)
@@ -31005,14 +31959,12 @@ rs6000_dbx_register_number (unsigned int regno)
return 99;
if (regno == SPEFSCR_REGNO)
return 612;
- /* SPE high reg number. We get these values of regno from
- rs6000_dwarf_register_span. */
- gcc_assert (regno >= 1200 && regno < 1232);
+#endif
return regno;
}
/* target hook eh_return_filter_mode */
-static enum machine_mode
+static machine_mode
rs6000_eh_return_filter_mode (void)
{
return TARGET_32BIT ? SImode : word_mode;
@@ -31020,8 +31972,16 @@ rs6000_eh_return_filter_mode (void)
/* Target hook for scalar_mode_supported_p. */
static bool
-rs6000_scalar_mode_supported_p (enum machine_mode mode)
-{
+rs6000_scalar_mode_supported_p (machine_mode mode)
+{
+ /* -m32 does not support TImode. This is the default, from
+ default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
+ same ABI as for -m32. But default_scalar_mode_supported_p allows
+ integer modes of precision 2 * BITS_PER_WORD, which matches TImode
+ for -mpowerpc64. */
+ if (TARGET_32BIT && mode == TImode)
+ return false;
+
if (DECIMAL_FLOAT_MODE_P (mode))
return default_decimal_float_supported_p ();
else
@@ -31030,7 +31990,7 @@ rs6000_scalar_mode_supported_p (enum machine_mode mode)
/* Target hook for vector_mode_supported_p. */
static bool
-rs6000_vector_mode_supported_p (enum machine_mode mode)
+rs6000_vector_mode_supported_p (machine_mode mode)
{
if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
@@ -31074,7 +32034,7 @@ rs6000_stack_protect_fail (void)
}
void
-rs6000_final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
+rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
int num_operands ATTRIBUTE_UNUSED)
{
if (rs6000_warn_cell_microcode)
@@ -31195,6 +32155,9 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
{ "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
{ "crypto", RS6000_BTM_CRYPTO, false, false },
{ "htm", RS6000_BTM_HTM, false, false },
+ { "hard-dfp", RS6000_BTM_DFP, false, false },
+ { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
+ { "long-double-128", RS6000_BTM_LDBL128, false, false },
};
/* Option variables that we want to support inside attribute((target)) and
@@ -31835,7 +32798,7 @@ rs6000_can_inline_p (tree caller, tree callee)
memory requirements (either offetable or REG+REG addressing). */
rtx
-rs6000_allocate_stack_temp (enum machine_mode mode,
+rs6000_allocate_stack_temp (machine_mode mode,
bool offsettable_p,
bool reg_reg_p)
{
@@ -31926,9 +32889,9 @@ rs6000_address_for_altivec (rtx x)
for particular insns, though. Only easy FP constants are acceptable. */
static bool
-rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
+rs6000_legitimate_constant_p (machine_mode mode, rtx x)
{
- if (TARGET_ELF && rs6000_tls_referenced_p (x))
+ if (TARGET_ELF && tls_referenced_p (x))
return false;
return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
@@ -31945,6 +32908,8 @@ rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
void
rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
{
+ const bool direct_call_p
+ = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
rtx toc_load = NULL_RTX;
rtx toc_restore = NULL_RTX;
@@ -32013,8 +32978,11 @@ rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
func_toc_offset));
toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
- /* If we have a static chain, load it up. */
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
+ /* If we have a static chain, load it up. But, if the call was
+ originally direct, the 3rd word has not been written since no
+ trampoline has been built, so we ought not to load it, lest we
+ override a static chain value. */
+ if (!direct_call_p && TARGET_POINTERS_TO_NESTED_FUNCTIONS)
{
rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
@@ -32137,7 +33105,7 @@ rs6000_code_end (void)
#if RS6000_WEAK
if (USE_HIDDEN_LINKONCE)
{
- DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
+ cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
targetm.asm_out.unique_section (decl, 0);
switch_to_section (get_named_section (decl, NULL, 0));
DECL_WEAK (decl) = 1;
@@ -32192,23 +33160,19 @@ rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
MODE is the machine mode.
If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
- If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
- CLOBBER_REG is either NULL or a scratch register of type CC to allow
- formation of the AND instructions. */
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
static void
rs6000_split_logical_inner (rtx dest,
rtx op1,
rtx op2,
enum rtx_code code,
- enum machine_mode mode,
+ machine_mode mode,
bool complement_final_p,
bool complement_op1_p,
- bool complement_op2_p,
- rtx clobber_reg)
+ bool complement_op2_p)
{
rtx bool_rtx;
- rtx set_rtx;
/* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
if (op2 && GET_CODE (op2) == CONST_INT
@@ -32248,12 +33212,23 @@ rs6000_split_logical_inner (rtx dest,
}
}
+ if (code == AND && mode == SImode
+ && !complement_final_p && !complement_op1_p && !complement_op2_p)
+ {
+ emit_insn (gen_andsi3 (dest, op1, op2));
+ return;
+ }
+
if (complement_op1_p)
op1 = gen_rtx_NOT (mode, op1);
if (complement_op2_p)
op2 = gen_rtx_NOT (mode, op2);
+ /* For canonical RTL, if only one arm is inverted it is the first. */
+ if (!complement_op1_p && complement_op2_p)
+ std::swap (op1, op2);
+
bool_rtx = ((code == NOT)
? gen_rtx_NOT (mode, op1)
: gen_rtx_fmt_ee (code, mode, op1, op2));
@@ -32261,17 +33236,7 @@ rs6000_split_logical_inner (rtx dest,
if (complement_final_p)
bool_rtx = gen_rtx_NOT (mode, bool_rtx);
- set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
-
- /* Is this AND with an explicit clobber? */
- if (clobber_reg)
- {
- rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
- set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
- }
-
- emit_insn (set_rtx);
- return;
+ emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
}
/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
@@ -32292,8 +33257,7 @@ rs6000_split_logical_di (rtx operands[3],
enum rtx_code code,
bool complement_final_p,
bool complement_op1_p,
- bool complement_op2_p,
- rtx clobber_reg)
+ bool complement_op2_p)
{
const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
@@ -32354,7 +33318,6 @@ rs6000_split_logical_di (rtx operands[3],
&& !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
- && clobber_reg == NULL_RTX
&& !logical_const_operand (op2_hi_lo[i], SImode))
{
HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
@@ -32367,18 +33330,15 @@ rs6000_split_logical_di (rtx operands[3],
hi_16bits |= upper_32bits;
rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
- code, SImode, false, false, false,
- NULL_RTX);
+ code, SImode, false, false, false);
rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
- code, SImode, false, false, false,
- NULL_RTX);
+ code, SImode, false, false, false);
}
else
rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
code, SImode, complement_final_p,
- complement_op1_p, complement_op2_p,
- clobber_reg);
+ complement_op1_p, complement_op2_p);
}
return;
@@ -32390,23 +33350,19 @@ rs6000_split_logical_di (rtx operands[3],
OPERANDS is an array containing the destination and two input operands.
CODE is the base operation (AND, IOR, XOR, NOT).
- MODE is the machine mode.
If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
- If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
- CLOBBER_REG is either NULL or a scratch register of type CC to allow
- formation of the AND instructions. */
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
void
rs6000_split_logical (rtx operands[3],
enum rtx_code code,
bool complement_final_p,
bool complement_op1_p,
- bool complement_op2_p,
- rtx clobber_reg)
+ bool complement_op2_p)
{
- enum machine_mode mode = GET_MODE (operands[0]);
- enum machine_mode sub_mode;
+ machine_mode mode = GET_MODE (operands[0]);
+ machine_mode sub_mode;
rtx op0, op1, op2;
int sub_size, regno0, regno1, nregs, i;
@@ -32415,8 +33371,7 @@ rs6000_split_logical (rtx operands[3],
if (mode == DImode && !TARGET_POWERPC64)
{
rs6000_split_logical_di (operands, code, complement_final_p,
- complement_op1_p, complement_op2_p,
- clobber_reg);
+ complement_op1_p, complement_op2_p);
return;
}
@@ -32449,7 +33404,7 @@ rs6000_split_logical (rtx operands[3],
rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
complement_final_p, complement_op1_p,
- complement_op2_p, clobber_reg);
+ complement_op2_p);
}
return;
@@ -32458,25 +33413,14 @@ rs6000_split_logical (rtx operands[3],
/* Return true if the peephole2 can combine a load involving a combination of
an addis instruction and a load with an offset that can be fused together on
- a power8.
-
- The operands are:
- operands[0] register set with addis
- operands[1] value set via addis
- operands[2] target register being loaded
- operands[3] D-form memory reference using operands[0].
-
- In addition, we are passed a boolean that is true if this is a peephole2,
- and we can use see if the addis_reg is dead after the insn and can be
- replaced by the target register. */
+ a power8. */
bool
-fusion_gpr_load_p (rtx *operands, bool peep2_p)
+fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
+ rtx addis_value, /* addis value. */
+ rtx target, /* target register that is loaded. */
+ rtx mem) /* bottom part of the memory addr. */
{
- rtx addis_reg = operands[0];
- rtx addis_value = operands[1];
- rtx target = operands[2];
- rtx mem = operands[3];
rtx addr;
rtx base_reg;
@@ -32490,9 +33434,6 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
return false;
- if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
- return false;
-
/* Allow sign/zero extension. */
if (GET_CODE (mem) == ZERO_EXTEND
|| (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
@@ -32501,22 +33442,22 @@ fusion_gpr_load_p (rtx *operands, bool peep2_p)
if (!MEM_P (mem))
return false;
+ if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
+ return false;
+
addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
return false;
/* Validate that the register used to load the high value is either the
- register being loaded, or we can safely replace its use in a peephole2.
+ register being loaded, or we can safely replace its use.
- If this is a peephole2, we assume that there are 2 instructions in the
- peephole (addis and load), so we want to check if the target register was
- not used in the memory address and the register to hold the addis result
- is dead after the peephole. */
+ This function is only called from the peephole2 pass and we assume that
+ there are 2 instructions in the peephole (addis and load), so we want to
+ check if the target register was not used in the memory address and the
+ register to hold the addis result is dead after the peephole. */
if (REGNO (addis_reg) != REGNO (target))
{
- if (!peep2_p)
- return false;
-
if (reg_mentioned_p (target, mem))
return false;
@@ -32553,13 +33494,10 @@ expand_fusion_gpr_load (rtx *operands)
rtx orig_mem = operands[3];
rtx new_addr, new_mem, orig_addr, offset;
enum rtx_code plus_or_lo_sum;
- enum machine_mode target_mode = GET_MODE (target);
- enum machine_mode extend_mode = target_mode;
- enum machine_mode ptr_mode = Pmode;
+ machine_mode target_mode = GET_MODE (target);
+ machine_mode extend_mode = target_mode;
+ machine_mode ptr_mode = Pmode;
enum rtx_code extend = UNKNOWN;
- rtx addis_reg = ((ptr_mode == target_mode)
- ? target
- : simplify_subreg (ptr_mode, target, target_mode, 0));
if (GET_CODE (orig_mem) == ZERO_EXTEND
|| (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
@@ -32576,13 +33514,14 @@ expand_fusion_gpr_load (rtx *operands)
gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
offset = XEXP (orig_addr, 1);
- new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
- new_mem = change_address (orig_mem, target_mode, new_addr);
+ new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
+ new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
if (extend != UNKNOWN)
new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
- emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
+ new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
+ UNSPEC_FUSION_GPR);
emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
if (extend == SIGN_EXTEND)
@@ -32601,55 +33540,40 @@ expand_fusion_gpr_load (rtx *operands)
}
/* Return a string to fuse an addis instruction with a gpr load to the same
- register that we loaded up the addis instruction. The code is complicated,
- so we call output_asm_insn directly, and just return "".
+ register that we loaded up the addis instruction. The address that is used
+ is the logical address that was formed during peephole2:
+ (lo_sum (high) (low-part))
- The operands are:
- operands[0] register set with addis (must be same reg as target).
- operands[1] value set via addis
- operands[2] target register being loaded
- operands[3] D-form memory reference using operands[0]. */
+ The code is complicated, so we call output_asm_insn directly, and just
+ return "". */
const char *
-emit_fusion_gpr_load (rtx *operands)
+emit_fusion_gpr_load (rtx target, rtx mem)
{
- rtx addis_reg = operands[0];
- rtx addis_value = operands[1];
- rtx target = operands[2];
- rtx mem = operands[3];
+ rtx addis_value;
rtx fuse_ops[10];
rtx addr;
rtx load_offset;
const char *addis_str = NULL;
const char *load_str = NULL;
- const char *extend_insn = NULL;
const char *mode_name = NULL;
char insn_template[80];
- enum machine_mode mode;
+ machine_mode mode;
const char *comment_str = ASM_COMMENT_START;
- bool sign_p = false;
-
- gcc_assert (REG_P (addis_reg) && REG_P (target));
- gcc_assert (REGNO (addis_reg) == REGNO (target));
-
- if (*comment_str == ' ')
- comment_str++;
- /* Allow sign/zero extension. */
if (GET_CODE (mem) == ZERO_EXTEND)
mem = XEXP (mem, 0);
- else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
- {
- sign_p = true;
- mem = XEXP (mem, 0);
- }
+ gcc_assert (REG_P (target) && MEM_P (mem));
+
+ if (*comment_str == ' ')
+ comment_str++;
- gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
gcc_unreachable ();
+ addis_value = XEXP (addr, 0);
load_offset = XEXP (addr, 1);
/* Now emit the load instruction to the same register. */
@@ -32659,29 +33583,22 @@ emit_fusion_gpr_load (rtx *operands)
case QImode:
mode_name = "char";
load_str = "lbz";
- extend_insn = "extsb %0,%0";
break;
case HImode:
mode_name = "short";
load_str = "lhz";
- extend_insn = "extsh %0,%0";
break;
case SImode:
mode_name = "int";
load_str = "lwz";
- extend_insn = "extsw %0,%0";
break;
case DImode:
- if (TARGET_POWERPC64)
- {
- mode_name = "long";
- load_str = "ld";
- }
- else
- gcc_unreachable ();
+ gcc_assert (TARGET_POWERPC64);
+ mode_name = "long";
+ load_str = "ld";
break;
default:
@@ -32825,15 +33742,1349 @@ emit_fusion_gpr_load (rtx *operands)
else
fatal_insn ("Unable to generate load offset for fusion", load_offset);
- /* Handle sign extension. The peephole2 pass generates this as a separate
- insn, but we handle it just in case it got reattached. */
- if (sign_p)
+ return "";
+}
+
+/* Analyze vector computations and remove unnecessary doubleword
+ swaps (xxswapdi instructions). This pass is performed only
+ for little-endian VSX code generation.
+
+ For this specific case, loads and stores of 4x32 and 2x64 vectors
+ are inefficient. These are implemented using the lvx2dx and
+ stvx2dx instructions, which invert the order of doublewords in
+ a vector register. Thus the code generation inserts an xxswapdi
+ after each such load, and prior to each such store. (For spill
+ code after register assignment, an additional xxswapdi is inserted
+ following each store in order to return a hard register to its
+ unpermuted value.)
+
+ The extra xxswapdi instructions reduce performance. This can be
+ particularly bad for vectorized code. The purpose of this pass
+ is to reduce the number of xxswapdi instructions required for
+ correctness.
+
+ The primary insight is that much code that operates on vectors
+ does not care about the relative order of elements in a register,
+ so long as the correct memory order is preserved. If we have
+ a computation where all input values are provided by lvxd2x/xxswapdi
+ sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
+ and all intermediate computations are pure SIMD (independent of
+ element order), then all the xxswapdi's associated with the loads
+ and stores may be removed.
+
+ This pass uses some of the infrastructure and logical ideas from
+ the "web" pass in web.c. We create maximal webs of computations
+ fitting the description above using union-find. Each such web is
+ then optimized by removing its unnecessary xxswapdi instructions.
+
+ The pass is placed prior to global optimization so that we can
+ perform the optimization in the safest and simplest way possible;
+ that is, by replacing each xxswapdi insn with a register copy insn.
+ Subsequent forward propagation will remove copies where possible.
+
+ There are some operations sensitive to element order for which we
+ can still allow the operation, provided we modify those operations.
+ These include CONST_VECTORs, for which we must swap the first and
+ second halves of the constant vector; and SUBREGs, for which we
+ must adjust the byte offset to account for the swapped doublewords.
+ A remaining opportunity would be non-immediate-form splats, for
+ which we should adjust the selected lane of the input. We should
+ also make code generation adjustments for sum-across operations,
+ since this is a common vectorizer reduction.
+
+ Because we run prior to the first split, we can see loads and stores
+ here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
+ vector loads and stores that have not yet been split into a permuting
+ load/store and a swap. (One way this can happen is with a builtin
+ call to vec_vsx_{ld,st}.) We can handle these as well, but rather
+ than deleting a swap, we convert the load/store into a permuting
+ load/store (which effectively removes the swap). */
+
+/* Notes on Permutes
+
+ We do not currently handle computations that contain permutes. There
+ is a general transformation that can be performed correctly, but it
+ may introduce more expensive code than it replaces. To handle these
+ would require a cost model to determine when to perform the optimization.
+ This commentary records how this could be done if desired.
+
+ The most general permute is something like this (example for V16QI):
+
+ (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
+ (parallel [(const_int a0) (const_int a1)
+ ...
+ (const_int a14) (const_int a15)]))
+
+ where a0,...,a15 are in [0,31] and select elements from op1 and op2
+ to produce in the result.
+
+ Regardless of mode, we can convert the PARALLEL to a mask of 16
+ byte-element selectors. Let's call this M, with M[i] representing
+ the ith byte-element selector value. Then if we swap doublewords
+ throughout the computation, we can get correct behavior by replacing
+ M with M' as follows:
+
+ { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
+ M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
+ { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
+ { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
+
+ This seems promising at first, since we are just replacing one mask
+ with another. But certain masks are preferable to others. If M
+ is a mask that matches a vmrghh pattern, for example, M' certainly
+ will not. Instead of a single vmrghh, we would generate a load of
+ M' and a vperm. So we would need to know how many xxswapd's we can
+ remove as a result of this transformation to determine if it's
+ profitable; and preferably the logic would need to be aware of all
+ the special preferable masks.
+
+ Another form of permute is an UNSPEC_VPERM, in which the mask is
+ already in a register. In some cases, this mask may be a constant
+ that we can discover with ud-chains, in which case the above
+ transformation is ok. However, the common usage here is for the
+ mask to be produced by an UNSPEC_LVSL, in which case the mask
+ cannot be known at compile time. In such a case we would have to
+ generate several instructions to compute M' as above at run time,
+ and a cost model is needed again. */
+
+/* This is based on the union-find logic in web.c. web_entry_base is
+ defined in df.h. */
+class swap_web_entry : public web_entry_base
+{
+ public:
+ /* Pointer to the insn. */
+ rtx_insn *insn;
+ /* Set if insn contains a mention of a vector register. All other
+ fields are undefined if this field is unset. */
+ unsigned int is_relevant : 1;
+ /* Set if insn is a load. */
+ unsigned int is_load : 1;
+ /* Set if insn is a store. */
+ unsigned int is_store : 1;
+ /* Set if insn is a doubleword swap. This can either be a register swap
+ or a permuting load or store (test is_load and is_store for this). */
+ unsigned int is_swap : 1;
+ /* Set if the insn has a live-in use of a parameter register. */
+ unsigned int is_live_in : 1;
+ /* Set if the insn has a live-out def of a return register. */
+ unsigned int is_live_out : 1;
+ /* Set if the insn contains a subreg reference of a vector register. */
+ unsigned int contains_subreg : 1;
+ /* Set if the insn contains a 128-bit integer operand. */
+ unsigned int is_128_int : 1;
+ /* Set if this is a call-insn. */
+ unsigned int is_call : 1;
+ /* Set if this insn does not perform a vector operation for which
+ element order matters, or if we know how to fix it up if it does.
+ Undefined if is_swap is set. */
+ unsigned int is_swappable : 1;
+ /* A nonzero value indicates what kind of special handling for this
+ insn is required if doublewords are swapped. Undefined if
+ is_swappable is not set. */
+ unsigned int special_handling : 3;
+ /* Set if the web represented by this entry cannot be optimized. */
+ unsigned int web_not_optimizable : 1;
+ /* Set if this insn should be deleted. */
+ unsigned int will_delete : 1;
+};
+
+enum special_handling_values {
+ SH_NONE = 0,
+ SH_CONST_VECTOR,
+ SH_SUBREG,
+ SH_NOSWAP_LD,
+ SH_NOSWAP_ST,
+ SH_EXTRACT,
+ SH_SPLAT
+};
+
+/* Union INSN with all insns containing definitions that reach USE.
+ Detect whether USE is live-in to the current function. */
+static void
+union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
+{
+ struct df_link *link = DF_REF_CHAIN (use);
+
+ if (!link)
+ insn_entry[INSN_UID (insn)].is_live_in = 1;
+
+ while (link)
{
- gcc_assert (extend_insn != NULL);
- output_asm_insn (extend_insn, fuse_ops);
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ insn_entry[INSN_UID (insn)].is_live_in = 1;
+
+ if (DF_REF_INSN_INFO (link->ref))
+ {
+ rtx def_insn = DF_REF_INSN (link->ref);
+ (void)unionfind_union (insn_entry + INSN_UID (insn),
+ insn_entry + INSN_UID (def_insn));
+ }
+
+ link = link->next;
}
+}
- return "";
+/* Union INSN with all insns containing uses reached from DEF.
+ Detect whether DEF is live-out from the current function. */
+static void
+union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
+{
+ struct df_link *link = DF_REF_CHAIN (def);
+
+ if (!link)
+ insn_entry[INSN_UID (insn)].is_live_out = 1;
+
+ while (link)
+ {
+ /* This could be an eh use or some other artificial use;
+ we treat these all the same (killing the optimization). */
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ insn_entry[INSN_UID (insn)].is_live_out = 1;
+
+ if (DF_REF_INSN_INFO (link->ref))
+ {
+ rtx use_insn = DF_REF_INSN (link->ref);
+ (void)unionfind_union (insn_entry + INSN_UID (insn),
+ insn_entry + INSN_UID (use_insn));
+ }
+
+ link = link->next;
+ }
+}
+
+/* Return 1 iff INSN is a load insn, including permuting loads that
+ represent an lvxd2x instruction; else return 0. */
+static unsigned int
+insn_is_load_p (rtx insn)
+{
+ rtx body = PATTERN (insn);
+
+ if (GET_CODE (body) == SET)
+ {
+ if (GET_CODE (SET_SRC (body)) == MEM)
+ return 1;
+
+ if (GET_CODE (SET_SRC (body)) == VEC_SELECT
+ && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
+ return 1;
+
+ return 0;
+ }
+
+ if (GET_CODE (body) != PARALLEL)
+ return 0;
+
+ rtx set = XVECEXP (body, 0, 0);
+
+ if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
+ return 1;
+
+ return 0;
+}
+
+/* Return 1 iff INSN is a store insn, including permuting stores that
+ represent an stvxd2x instruction; else return 0. */
+static unsigned int
+insn_is_store_p (rtx insn)
+{
+ rtx body = PATTERN (insn);
+ if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
+ return 1;
+ if (GET_CODE (body) != PARALLEL)
+ return 0;
+ rtx set = XVECEXP (body, 0, 0);
+ if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
+ return 1;
+ return 0;
+}
+
+/* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
+ a permuting load, or a permuting store. */
+static unsigned int
+insn_is_swap_p (rtx insn)
+{
+ rtx body = PATTERN (insn);
+ if (GET_CODE (body) != SET)
+ return 0;
+ rtx rhs = SET_SRC (body);
+ if (GET_CODE (rhs) != VEC_SELECT)
+ return 0;
+ rtx parallel = XEXP (rhs, 1);
+ if (GET_CODE (parallel) != PARALLEL)
+ return 0;
+ unsigned int len = XVECLEN (parallel, 0);
+ if (len != 2 && len != 4 && len != 8 && len != 16)
+ return 0;
+ for (unsigned int i = 0; i < len / 2; ++i)
+ {
+ rtx op = XVECEXP (parallel, 0, i);
+ if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
+ return 0;
+ }
+ for (unsigned int i = len / 2; i < len; ++i)
+ {
+ rtx op = XVECEXP (parallel, 0, i);
+ if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
+ return 0;
+ }
+ return 1;
+}
+
+/* Return 1 iff OP is an operand that will not be affected by having
+ vector doublewords swapped in memory. */
+static unsigned int
+rtx_is_swappable_p (rtx op, unsigned int *special)
+{
+ enum rtx_code code = GET_CODE (op);
+ int i, j;
+ rtx parallel;
+
+ switch (code)
+ {
+ case LABEL_REF:
+ case SYMBOL_REF:
+ case CLOBBER:
+ case REG:
+ return 1;
+
+ case VEC_CONCAT:
+ case ASM_INPUT:
+ case ASM_OPERANDS:
+ return 0;
+
+ case CONST_VECTOR:
+ {
+ *special = SH_CONST_VECTOR;
+ return 1;
+ }
+
+ case VEC_DUPLICATE:
+ /* Opportunity: If XEXP (op, 0) has the same mode as the result,
+ and XEXP (op, 1) is a PARALLEL with a single QImode const int,
+ it represents a vector splat for which we can do special
+ handling. */
+ if (GET_CODE (XEXP (op, 0)) == CONST_INT)
+ return 1;
+ else if (GET_CODE (XEXP (op, 0)) == REG
+ && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
+ /* This catches V2DF and V2DI splat, at a minimum. */
+ return 1;
+ else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
+ /* If the duplicated item is from a select, defer to the select
+ processing to see if we can change the lane for the splat. */
+ return rtx_is_swappable_p (XEXP (op, 0), special);
+ else
+ return 0;
+
+ case VEC_SELECT:
+ /* A vec_extract operation is ok if we change the lane. */
+ if (GET_CODE (XEXP (op, 0)) == REG
+ && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
+ && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+ && XVECLEN (parallel, 0) == 1
+ && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
+ {
+ *special = SH_EXTRACT;
+ return 1;
+ }
+ else
+ return 0;
+
+ case UNSPEC:
+ {
+ /* Various operations are unsafe for this optimization, at least
+ without significant additional work. Permutes are obviously
+ problematic, as both the permute control vector and the ordering
+ of the target values are invalidated by doubleword swapping.
+ Vector pack and unpack modify the number of vector lanes.
+ Merge-high/low will not operate correctly on swapped operands.
+ Vector shifts across element boundaries are clearly uncool,
+ as are vector select and concatenate operations. Vector
+ sum-across instructions define one operand with a specific
+ order-dependent element, so additional fixup code would be
+ needed to make those work. Vector set and non-immediate-form
+ vector splat are element-order sensitive. A few of these
+ cases might be workable with special handling if required. */
+ int val = XINT (op, 1);
+ switch (val)
+ {
+ default:
+ break;
+ case UNSPEC_VMRGH_DIRECT:
+ case UNSPEC_VMRGL_DIRECT:
+ case UNSPEC_VPACK_SIGN_SIGN_SAT:
+ case UNSPEC_VPACK_SIGN_UNS_SAT:
+ case UNSPEC_VPACK_UNS_UNS_MOD:
+ case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
+ case UNSPEC_VPACK_UNS_UNS_SAT:
+ case UNSPEC_VPERM:
+ case UNSPEC_VPERM_UNS:
+ case UNSPEC_VPERMHI:
+ case UNSPEC_VPERMSI:
+ case UNSPEC_VPKPX:
+ case UNSPEC_VSLDOI:
+ case UNSPEC_VSLO:
+ case UNSPEC_VSRO:
+ case UNSPEC_VSUM2SWS:
+ case UNSPEC_VSUM4S:
+ case UNSPEC_VSUM4UBS:
+ case UNSPEC_VSUMSWS:
+ case UNSPEC_VSUMSWS_DIRECT:
+ case UNSPEC_VSX_CONCAT:
+ case UNSPEC_VSX_SET:
+ case UNSPEC_VSX_SLDWI:
+ case UNSPEC_VUNPACK_HI_SIGN:
+ case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
+ case UNSPEC_VUNPACK_LO_SIGN:
+ case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
+ case UNSPEC_VUPKHPX:
+ case UNSPEC_VUPKHS_V4SF:
+ case UNSPEC_VUPKHU_V4SF:
+ case UNSPEC_VUPKLPX:
+ case UNSPEC_VUPKLS_V4SF:
+ case UNSPEC_VUPKLU_V4SF:
+ /* The following could be handled as an idiom with XXSPLTW.
+ These place a scalar in BE element zero, but the XXSPLTW
+ will currently expect it in BE element 2 in a swapped
+ region. When one of these feeds an XXSPLTW with no other
+ defs/uses either way, we can avoid the lane change for
+ XXSPLTW and things will be correct. TBD. */
+ case UNSPEC_VSX_CVDPSPN:
+ case UNSPEC_VSX_CVSPDP:
+ case UNSPEC_VSX_CVSPDPN:
+ return 0;
+ case UNSPEC_VSPLT_DIRECT:
+ *special = SH_SPLAT;
+ return 1;
+ }
+ }
+
+ default:
+ break;
+ }
+
+ const char *fmt = GET_RTX_FORMAT (code);
+ int ok = 1;
+
+ for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+ if (fmt[i] == 'e' || fmt[i] == 'u')
+ {
+ unsigned int special_op = SH_NONE;
+ ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
+ /* Ensure we never have two kinds of special handling
+ for the same insn. */
+ if (*special != SH_NONE && special_op != SH_NONE
+ && *special != special_op)
+ return 0;
+ *special = special_op;
+ }
+ else if (fmt[i] == 'E')
+ for (j = 0; j < XVECLEN (op, i); ++j)
+ {
+ unsigned int special_op = SH_NONE;
+ ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
+ /* Ensure we never have two kinds of special handling
+ for the same insn. */
+ if (*special != SH_NONE && special_op != SH_NONE
+ && *special != special_op)
+ return 0;
+ *special = special_op;
+ }
+
+ return ok;
+}
+
+/* Return 1 iff INSN is an operand that will not be affected by
+ having vector doublewords swapped in memory (in which case
+ *SPECIAL is unchanged), or that can be modified to be correct
+ if vector doublewords are swapped in memory (in which case
+ *SPECIAL is changed to a value indicating how). */
+static unsigned int
+insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
+ unsigned int *special)
+{
+ /* Calls are always bad. */
+ if (GET_CODE (insn) == CALL_INSN)
+ return 0;
+
+ /* Loads and stores seen here are not permuting, but we can still
+ fix them up by converting them to permuting ones. Exceptions:
+ UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
+ body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
+ for the SET source. */
+ rtx body = PATTERN (insn);
+ int i = INSN_UID (insn);
+
+ if (insn_entry[i].is_load)
+ {
+ if (GET_CODE (body) == SET)
+ {
+ *special = SH_NOSWAP_LD;
+ return 1;
+ }
+ else
+ return 0;
+ }
+
+ if (insn_entry[i].is_store)
+ {
+ if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
+ {
+ *special = SH_NOSWAP_ST;
+ return 1;
+ }
+ else
+ return 0;
+ }
+
+ /* Otherwise check the operands for vector lane violations. */
+ return rtx_is_swappable_p (body, special);
+}
+
+enum chain_purpose { FOR_LOADS, FOR_STORES };
+
+/* Return true if the UD or DU chain headed by LINK is non-empty,
+ and every entry on the chain references an insn that is a
+ register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
+ register swap must have only permuting loads as reaching defs.
+ If PURPOSE is FOR_STORES, each such register swap must have only
+ register swaps or permuting stores as reached uses. */
+static bool
+chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
+ enum chain_purpose purpose)
+{
+ if (!link)
+ return false;
+
+ for (; link; link = link->next)
+ {
+ if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
+ continue;
+
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ return false;
+
+ rtx reached_insn = DF_REF_INSN (link->ref);
+ unsigned uid = INSN_UID (reached_insn);
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
+
+ if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
+ || insn_entry[uid].is_store)
+ return false;
+
+ if (purpose == FOR_LOADS)
+ {
+ df_ref use;
+ FOR_EACH_INSN_INFO_USE (use, insn_info)
+ {
+ struct df_link *swap_link = DF_REF_CHAIN (use);
+
+ while (swap_link)
+ {
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ return false;
+
+ rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
+ unsigned uid2 = INSN_UID (swap_def_insn);
+
+ /* Only permuting loads are allowed. */
+ if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
+ return false;
+
+ swap_link = swap_link->next;
+ }
+ }
+ }
+ else if (purpose == FOR_STORES)
+ {
+ df_ref def;
+ FOR_EACH_INSN_INFO_DEF (def, insn_info)
+ {
+ struct df_link *swap_link = DF_REF_CHAIN (def);
+
+ while (swap_link)
+ {
+ if (DF_REF_IS_ARTIFICIAL (link->ref))
+ return false;
+
+ rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
+ unsigned uid2 = INSN_UID (swap_use_insn);
+
+ /* Permuting stores or register swaps are allowed. */
+ if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
+ return false;
+
+ swap_link = swap_link->next;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/* Mark the xxswapdi instructions associated with permuting loads and
+ stores for removal. Note that we only flag them for deletion here,
+ as there is a possibility of a swap being reached from multiple
+ loads, etc. */
+static void
+mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
+{
+ rtx insn = insn_entry[i].insn;
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+
+ if (insn_entry[i].is_load)
+ {
+ df_ref def;
+ FOR_EACH_INSN_INFO_DEF (def, insn_info)
+ {
+ struct df_link *link = DF_REF_CHAIN (def);
+
+ /* We know by now that these are swaps, so we can delete
+ them confidently. */
+ while (link)
+ {
+ rtx use_insn = DF_REF_INSN (link->ref);
+ insn_entry[INSN_UID (use_insn)].will_delete = 1;
+ link = link->next;
+ }
+ }
+ }
+ else if (insn_entry[i].is_store)
+ {
+ df_ref use;
+ FOR_EACH_INSN_INFO_USE (use, insn_info)
+ {
+ /* Ignore uses for addressability. */
+ machine_mode mode = GET_MODE (DF_REF_REG (use));
+ if (!VECTOR_MODE_P (mode))
+ continue;
+
+ struct df_link *link = DF_REF_CHAIN (use);
+
+ /* We know by now that these are swaps, so we can delete
+ them confidently. */
+ while (link)
+ {
+ rtx def_insn = DF_REF_INSN (link->ref);
+ insn_entry[INSN_UID (def_insn)].will_delete = 1;
+ link = link->next;
+ }
+ }
+ }
+}
+
+/* OP is either a CONST_VECTOR or an expression containing one.
+ Swap the first half of the vector with the second in the first
+ case. Recurse to find it in the second. */
+static void
+swap_const_vector_halves (rtx op)
+{
+ int i;
+ enum rtx_code code = GET_CODE (op);
+ if (GET_CODE (op) == CONST_VECTOR)
+ {
+ int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
+ for (i = 0; i < half_units; ++i)
+ {
+ rtx temp = CONST_VECTOR_ELT (op, i);
+ CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
+ CONST_VECTOR_ELT (op, i + half_units) = temp;
+ }
+ }
+ else
+ {
+ int j;
+ const char *fmt = GET_RTX_FORMAT (code);
+ for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+ if (fmt[i] == 'e' || fmt[i] == 'u')
+ swap_const_vector_halves (XEXP (op, i));
+ else if (fmt[i] == 'E')
+ for (j = 0; j < XVECLEN (op, i); ++j)
+ swap_const_vector_halves (XVECEXP (op, i, j));
+ }
+}
+
+/* Find all subregs of a vector expression that perform a narrowing,
+ and adjust the subreg index to account for doubleword swapping. */
+static void
+adjust_subreg_index (rtx op)
+{
+ enum rtx_code code = GET_CODE (op);
+ if (code == SUBREG
+ && (GET_MODE_SIZE (GET_MODE (op))
+ < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
+ {
+ unsigned int index = SUBREG_BYTE (op);
+ if (index < 8)
+ index += 8;
+ else
+ index -= 8;
+ SUBREG_BYTE (op) = index;
+ }
+
+ const char *fmt = GET_RTX_FORMAT (code);
+ int i,j;
+ for (i = 0; i < GET_RTX_LENGTH (code); ++i)
+ if (fmt[i] == 'e' || fmt[i] == 'u')
+ adjust_subreg_index (XEXP (op, i));
+ else if (fmt[i] == 'E')
+ for (j = 0; j < XVECLEN (op, i); ++j)
+ adjust_subreg_index (XVECEXP (op, i, j));
+}
+
+/* Convert the non-permuting load INSN to a permuting one. */
+static void
+permute_load (rtx_insn *insn)
+{
+ rtx body = PATTERN (insn);
+ rtx mem_op = SET_SRC (body);
+ rtx tgt_reg = SET_DEST (body);
+ machine_mode mode = GET_MODE (tgt_reg);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int half_elts = n_elts / 2;
+ rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
+ int i, j;
+ for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
+ XVECEXP (par, 0, i) = GEN_INT (j);
+ for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
+ XVECEXP (par, 0, i) = GEN_INT (j);
+ rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
+ SET_SRC (body) = sel;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Replacing load %d with permuted load\n",
+ INSN_UID (insn));
+}
+
+/* Convert the non-permuting store INSN to a permuting one. */
+static void
+permute_store (rtx_insn *insn)
+{
+ rtx body = PATTERN (insn);
+ rtx src_reg = SET_SRC (body);
+ machine_mode mode = GET_MODE (src_reg);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int half_elts = n_elts / 2;
+ rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
+ int i, j;
+ for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
+ XVECEXP (par, 0, i) = GEN_INT (j);
+ for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
+ XVECEXP (par, 0, i) = GEN_INT (j);
+ rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
+ SET_SRC (body) = sel;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Replacing store %d with permuted store\n",
+ INSN_UID (insn));
+}
+
+/* Given OP that contains a vector extract operation, adjust the index
+ of the extracted lane to account for the doubleword swap. */
+static void
+adjust_extract (rtx_insn *insn)
+{
+ rtx src = SET_SRC (PATTERN (insn));
+ /* The vec_select may be wrapped in a vec_duplicate for a splat, so
+ account for that. */
+ rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
+ rtx par = XEXP (sel, 1);
+ int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
+ int lane = INTVAL (XVECEXP (par, 0, 0));
+ lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
+ XVECEXP (par, 0, 0) = GEN_INT (lane);
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
+}
+
+/* Given OP that contains a vector direct-splat operation, adjust the index
+ of the source lane to account for the doubleword swap. */
+static void
+adjust_splat (rtx_insn *insn)
+{
+ rtx body = PATTERN (insn);
+ rtx unspec = XEXP (body, 1);
+ int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
+ int lane = INTVAL (XVECEXP (unspec, 0, 1));
+ lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
+ XVECEXP (unspec, 0, 1) = GEN_INT (lane);
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
+}
+
+/* The insn described by INSN_ENTRY[I] can be swapped, but only
+ with special handling. Take care of that here. */
+static void
+handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
+{
+ rtx_insn *insn = insn_entry[i].insn;
+ rtx body = PATTERN (insn);
+
+ switch (insn_entry[i].special_handling)
+ {
+ default:
+ gcc_unreachable ();
+ case SH_CONST_VECTOR:
+ {
+ /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
+ gcc_assert (GET_CODE (body) == SET);
+ rtx rhs = SET_SRC (body);
+ swap_const_vector_halves (rhs);
+ if (dump_file)
+ fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
+ break;
+ }
+ case SH_SUBREG:
+ /* A subreg of the same size is already safe. For subregs that
+ select a smaller portion of a reg, adjust the index for
+ swapped doublewords. */
+ adjust_subreg_index (body);
+ if (dump_file)
+ fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
+ break;
+ case SH_NOSWAP_LD:
+ /* Convert a non-permuting load to a permuting one. */
+ permute_load (insn);
+ break;
+ case SH_NOSWAP_ST:
+ /* Convert a non-permuting store to a permuting one. */
+ permute_store (insn);
+ break;
+ case SH_EXTRACT:
+ /* Change the lane on an extract operation. */
+ adjust_extract (insn);
+ break;
+ case SH_SPLAT:
+ /* Change the lane on a direct-splat operation. */
+ adjust_splat (insn);
+ break;
+ }
+}
+
+/* Find the insn from the Ith table entry, which is known to be a
+ register swap Y = SWAP(X). Replace it with a copy Y = X. */
+static void
+replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
+{
+ rtx_insn *insn = insn_entry[i].insn;
+ rtx body = PATTERN (insn);
+ rtx src_reg = XEXP (SET_SRC (body), 0);
+ rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
+ rtx_insn *new_insn = emit_insn_before (copy, insn);
+ set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
+ df_insn_rescan (new_insn);
+
+ if (dump_file)
+ {
+ unsigned int new_uid = INSN_UID (new_insn);
+ fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
+ }
+
+ df_insn_delete (insn);
+ remove_insn (insn);
+ insn->set_deleted ();
+}
+
+/* Dump the swap table to DUMP_FILE. */
+static void
+dump_swap_insn_table (swap_web_entry *insn_entry)
+{
+ int e = get_max_uid ();
+ fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
+
+ for (int i = 0; i < e; ++i)
+ if (insn_entry[i].is_relevant)
+ {
+ swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
+ fprintf (dump_file, "%6d %6d ", i,
+ pred_entry && pred_entry->insn
+ ? INSN_UID (pred_entry->insn) : 0);
+ if (insn_entry[i].is_load)
+ fputs ("load ", dump_file);
+ if (insn_entry[i].is_store)
+ fputs ("store ", dump_file);
+ if (insn_entry[i].is_swap)
+ fputs ("swap ", dump_file);
+ if (insn_entry[i].is_live_in)
+ fputs ("live-in ", dump_file);
+ if (insn_entry[i].is_live_out)
+ fputs ("live-out ", dump_file);
+ if (insn_entry[i].contains_subreg)
+ fputs ("subreg ", dump_file);
+ if (insn_entry[i].is_128_int)
+ fputs ("int128 ", dump_file);
+ if (insn_entry[i].is_call)
+ fputs ("call ", dump_file);
+ if (insn_entry[i].is_swappable)
+ {
+ fputs ("swappable ", dump_file);
+ if (insn_entry[i].special_handling == SH_CONST_VECTOR)
+ fputs ("special:constvec ", dump_file);
+ else if (insn_entry[i].special_handling == SH_SUBREG)
+ fputs ("special:subreg ", dump_file);
+ else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
+ fputs ("special:load ", dump_file);
+ else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
+ fputs ("special:store ", dump_file);
+ else if (insn_entry[i].special_handling == SH_EXTRACT)
+ fputs ("special:extract ", dump_file);
+ else if (insn_entry[i].special_handling == SH_SPLAT)
+ fputs ("special:splat ", dump_file);
+ }
+ if (insn_entry[i].web_not_optimizable)
+ fputs ("unoptimizable ", dump_file);
+ if (insn_entry[i].will_delete)
+ fputs ("delete ", dump_file);
+ fputs ("\n", dump_file);
+ }
+ fputs ("\n", dump_file);
+}
+
+/* Main entry point for this pass. */
+unsigned int
+rs6000_analyze_swaps (function *fun)
+{
+ swap_web_entry *insn_entry;
+ basic_block bb;
+ rtx_insn *insn;
+
+ /* Dataflow analysis for use-def chains. */
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+ df_analyze ();
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+
+ /* Allocate structure to represent webs of insns. */
+ insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
+
+ /* Walk the insns to gather basic data. */
+ FOR_ALL_BB_FN (bb, fun)
+ FOR_BB_INSNS (bb, insn)
+ {
+ unsigned int uid = INSN_UID (insn);
+ if (NONDEBUG_INSN_P (insn))
+ {
+ insn_entry[uid].insn = insn;
+
+ if (GET_CODE (insn) == CALL_INSN)
+ insn_entry[uid].is_call = 1;
+
+ /* Walk the uses and defs to see if we mention vector regs.
+ Record any constraints on optimization of such mentions. */
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ df_ref mention;
+ FOR_EACH_INSN_INFO_USE (mention, insn_info)
+ {
+ /* We use DF_REF_REAL_REG here to get inside any subregs. */
+ machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
+
+ /* If a use gets its value from a call insn, it will be
+ a hard register and will look like (reg:V4SI 3 3).
+ The df analysis creates two mentions for GPR3 and GPR4,
+ both DImode. We must recognize this and treat it as a
+ vector mention to ensure the call is unioned with this
+ use. */
+ if (mode == DImode && DF_REF_INSN_INFO (mention))
+ {
+ rtx feeder = DF_REF_INSN (mention);
+ /* FIXME: It is pretty hard to get from the df mention
+ to the mode of the use in the insn. We arbitrarily
+ pick a vector mode here, even though the use might
+ be a real DImode. We can be too conservative
+ (create a web larger than necessary) because of
+ this, so consider eventually fixing this. */
+ if (GET_CODE (feeder) == CALL_INSN)
+ mode = V4SImode;
+ }
+
+ if (VECTOR_MODE_P (mode))
+ {
+ insn_entry[uid].is_relevant = 1;
+ if (mode == TImode || mode == V1TImode)
+ insn_entry[uid].is_128_int = 1;
+ if (DF_REF_INSN_INFO (mention))
+ insn_entry[uid].contains_subreg
+ = !rtx_equal_p (DF_REF_REG (mention),
+ DF_REF_REAL_REG (mention));
+ union_defs (insn_entry, insn, mention);
+ }
+ }
+ FOR_EACH_INSN_INFO_DEF (mention, insn_info)
+ {
+ /* We use DF_REF_REAL_REG here to get inside any subregs. */
+ machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
+
+ /* If we're loading up a hard vector register for a call,
+ it looks like (set (reg:V4SI 9 9) (...)). The df
+ analysis creates two mentions for GPR9 and GPR10, both
+ DImode. So relying on the mode from the mentions
+ isn't sufficient to ensure we union the call into the
+ web with the parameter setup code. */
+ if (mode == DImode && GET_CODE (insn) == SET
+ && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
+ mode = GET_MODE (SET_DEST (insn));
+
+ if (VECTOR_MODE_P (mode))
+ {
+ insn_entry[uid].is_relevant = 1;
+ if (mode == TImode || mode == V1TImode)
+ insn_entry[uid].is_128_int = 1;
+ if (DF_REF_INSN_INFO (mention))
+ insn_entry[uid].contains_subreg
+ = !rtx_equal_p (DF_REF_REG (mention),
+ DF_REF_REAL_REG (mention));
+ /* REG_FUNCTION_VALUE_P is not valid for subregs. */
+ else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
+ insn_entry[uid].is_live_out = 1;
+ union_uses (insn_entry, insn, mention);
+ }
+ }
+
+ if (insn_entry[uid].is_relevant)
+ {
+ /* Determine if this is a load or store. */
+ insn_entry[uid].is_load = insn_is_load_p (insn);
+ insn_entry[uid].is_store = insn_is_store_p (insn);
+
+ /* Determine if this is a doubleword swap. If not,
+ determine whether it can legally be swapped. */
+ if (insn_is_swap_p (insn))
+ insn_entry[uid].is_swap = 1;
+ else
+ {
+ unsigned int special = SH_NONE;
+ insn_entry[uid].is_swappable
+ = insn_is_swappable_p (insn_entry, insn, &special);
+ if (special != SH_NONE && insn_entry[uid].contains_subreg)
+ insn_entry[uid].is_swappable = 0;
+ else if (special != SH_NONE)
+ insn_entry[uid].special_handling = special;
+ else if (insn_entry[uid].contains_subreg)
+ insn_entry[uid].special_handling = SH_SUBREG;
+ }
+ }
+ }
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nSwap insn entry table when first built\n");
+ dump_swap_insn_table (insn_entry);
+ }
+
+ /* Record unoptimizable webs. */
+ unsigned e = get_max_uid (), i;
+ for (i = 0; i < e; ++i)
+ {
+ if (!insn_entry[i].is_relevant)
+ continue;
+
+ swap_web_entry *root
+ = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
+
+ if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
+ || (insn_entry[i].contains_subreg
+ && insn_entry[i].special_handling != SH_SUBREG)
+ || insn_entry[i].is_128_int || insn_entry[i].is_call
+ || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
+ root->web_not_optimizable = 1;
+
+ /* If we have loads or stores that aren't permuting then the
+ optimization isn't appropriate. */
+ else if ((insn_entry[i].is_load || insn_entry[i].is_store)
+ && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
+ root->web_not_optimizable = 1;
+
+ /* If we have permuting loads or stores that are not accompanied
+ by a register swap, the optimization isn't appropriate. */
+ else if (insn_entry[i].is_load && insn_entry[i].is_swap)
+ {
+ rtx insn = insn_entry[i].insn;
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ df_ref def;
+
+ FOR_EACH_INSN_INFO_DEF (def, insn_info)
+ {
+ struct df_link *link = DF_REF_CHAIN (def);
+
+ if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
+ {
+ root->web_not_optimizable = 1;
+ break;
+ }
+ }
+ }
+ else if (insn_entry[i].is_store && insn_entry[i].is_swap)
+ {
+ rtx insn = insn_entry[i].insn;
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ df_ref use;
+
+ FOR_EACH_INSN_INFO_USE (use, insn_info)
+ {
+ struct df_link *link = DF_REF_CHAIN (use);
+
+ if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
+ {
+ root->web_not_optimizable = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
+ dump_swap_insn_table (insn_entry);
+ }
+
+ /* For each load and store in an optimizable web (which implies
+ the loads and stores are permuting), find the associated
+ register swaps and mark them for removal. Due to various
+ optimizations we may mark the same swap more than once. Also
+ perform special handling for swappable insns that require it. */
+ for (i = 0; i < e; ++i)
+ if ((insn_entry[i].is_load || insn_entry[i].is_store)
+ && insn_entry[i].is_swap)
+ {
+ swap_web_entry* root_entry
+ = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
+ if (!root_entry->web_not_optimizable)
+ mark_swaps_for_removal (insn_entry, i);
+ }
+ else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
+ {
+ swap_web_entry* root_entry
+ = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
+ if (!root_entry->web_not_optimizable)
+ handle_special_swappables (insn_entry, i);
+ }
+
+ /* Now delete the swaps marked for removal. */
+ for (i = 0; i < e; ++i)
+ if (insn_entry[i].will_delete)
+ replace_swap_with_copy (insn_entry, i);
+
+ /* Clean up. */
+ free (insn_entry);
+ return 0;
+}
+
+const pass_data pass_data_analyze_swaps =
+{
+ RTL_PASS, /* type */
+ "swaps", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_analyze_swaps : public rtl_opt_pass
+{
+public:
+ pass_analyze_swaps(gcc::context *ctxt)
+ : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
+ && rs6000_optimize_swaps);
+ }
+
+ virtual unsigned int execute (function *fun)
+ {
+ return rs6000_analyze_swaps (fun);
+ }
+
+}; // class pass_analyze_swaps
+
+rtl_opt_pass *
+make_pass_analyze_swaps (gcc::context *ctxt)
+{
+ return new pass_analyze_swaps (ctxt);
+}
+
+#ifdef RS6000_GLIBC_ATOMIC_FENV
+/* Function declarations for rs6000_atomic_assign_expand_fenv. */
+static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
+#endif
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
+
+static void
+rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+ if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
+ {
+#ifdef RS6000_GLIBC_ATOMIC_FENV
+ if (atomic_hold_decl == NULL_TREE)
+ {
+ atomic_hold_decl
+ = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+ get_identifier ("__atomic_feholdexcept"),
+ build_function_type_list (void_type_node,
+ double_ptr_type_node,
+ NULL_TREE));
+ TREE_PUBLIC (atomic_hold_decl) = 1;
+ DECL_EXTERNAL (atomic_hold_decl) = 1;
+ }
+
+ if (atomic_clear_decl == NULL_TREE)
+ {
+ atomic_clear_decl
+ = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+ get_identifier ("__atomic_feclearexcept"),
+ build_function_type_list (void_type_node,
+ NULL_TREE));
+ TREE_PUBLIC (atomic_clear_decl) = 1;
+ DECL_EXTERNAL (atomic_clear_decl) = 1;
+ }
+
+ tree const_double = build_qualified_type (double_type_node,
+ TYPE_QUAL_CONST);
+ tree const_double_ptr = build_pointer_type (const_double);
+ if (atomic_update_decl == NULL_TREE)
+ {
+ atomic_update_decl
+ = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+ get_identifier ("__atomic_feupdateenv"),
+ build_function_type_list (void_type_node,
+ const_double_ptr,
+ NULL_TREE));
+ TREE_PUBLIC (atomic_update_decl) = 1;
+ DECL_EXTERNAL (atomic_update_decl) = 1;
+ }
+
+ tree fenv_var = create_tmp_var (double_type_node);
+ mark_addressable (fenv_var);
+ tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
+
+ *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
+ *clear = build_call_expr (atomic_clear_decl, 0);
+ *update = build_call_expr (atomic_update_decl, 1,
+ fold_convert (const_double_ptr, fenv_addr));
+#endif
+ return;
+ }
+
+ tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
+ tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
+ tree call_mffs = build_call_expr (mffs, 0);
+
+ /* Generates the equivalent of feholdexcept (&fenv_var)
+
+ *fenv_var = __builtin_mffs ();
+ double fenv_hold;
+ *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
+ __builtin_mtfsf (0xff, fenv_hold); */
+
+ /* Mask to clear everything except for the rounding modes and non-IEEE
+ arithmetic flag. */
+ const unsigned HOST_WIDE_INT hold_exception_mask =
+ HOST_WIDE_INT_C (0xffffffff00000007);
+
+ tree fenv_var = create_tmp_var (double_type_node);
+
+ tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
+
+ tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
+ tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
+ build_int_cst (uint64_type_node,
+ hold_exception_mask));
+
+ tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
+ fenv_llu_and);
+
+ tree hold_mtfsf = build_call_expr (mtfsf, 2,
+ build_int_cst (unsigned_type_node, 0xff),
+ fenv_hold_mtfsf);
+
+ *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
+
+ /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
+
+ double fenv_clear = __builtin_mffs ();
+ *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
+ __builtin_mtfsf (0xff, fenv_clear); */
+
+ /* Mask to clear everything except for the rounding modes and non-IEEE
+ arithmetic flag. */
+ const unsigned HOST_WIDE_INT clear_exception_mask =
+ HOST_WIDE_INT_C (0xffffffff00000000);
+
+ tree fenv_clear = create_tmp_var (double_type_node);
+
+ tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
+
+ tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
+ tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
+ fenv_clean_llu,
+ build_int_cst (uint64_type_node,
+ clear_exception_mask));
+
+ tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
+ fenv_clear_llu_and);
+
+ tree clear_mtfsf = build_call_expr (mtfsf, 2,
+ build_int_cst (unsigned_type_node, 0xff),
+ fenv_clear_mtfsf);
+
+ *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
+
+ /* Generates the equivalent of feupdateenv (&fenv_var)
+
+ double old_fenv = __builtin_mffs ();
+ double fenv_update;
+ *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
+ (*(uint64_t*)fenv_var 0x1ff80fff);
+ __builtin_mtfsf (0xff, fenv_update); */
+
+ const unsigned HOST_WIDE_INT update_exception_mask =
+ HOST_WIDE_INT_C (0xffffffff1fffff00);
+ const unsigned HOST_WIDE_INT new_exception_mask =
+ HOST_WIDE_INT_C (0x1ff80fff);
+
+ tree old_fenv = create_tmp_var (double_type_node);
+ tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
+
+ tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
+ tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
+ build_int_cst (uint64_type_node,
+ update_exception_mask));
+
+ tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
+ build_int_cst (uint64_type_node,
+ new_exception_mask));
+
+ tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
+ old_llu_and, new_llu_and);
+
+ tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
+ new_llu_mask);
+
+ tree update_mtfsf = build_call_expr (mtfsf, 2,
+ build_int_cst (unsigned_type_node, 0xff),
+ fenv_update_mtfsf);
+
+ *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
}