diff options
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 949 |
1 files changed, 717 insertions, 232 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 16a1eae2136..674ff6027ca 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -52,13 +52,13 @@ #include "cfgloop.h" #include "sched-int.h" #include "gimple.h" -#include "tree-flow.h" #include "intl.h" #include "params.h" #include "tm-constrs.h" #include "opts.h" #include "tree-vectorizer.h" #include "dumpfile.h" +#include "cgraph.h" #if TARGET_XCOFF #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ #endif @@ -189,9 +189,6 @@ unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; /* Map register number to register class. */ enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; -/* Reload functions based on the type and the vector unit. */ -static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2]; - static int dbg_cost_ctrl; /* Built in types. */ @@ -316,11 +313,77 @@ static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) -/* Direct moves to/from vsx/gpr registers that need an additional register to - do the move. */ -static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES]; -static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES]; -static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES]; + +/* Register classes we care about in secondary reload or go if legitimate + address. We only need to worry about GPR, FPR, and Altivec registers here, + along an ANY field that is the OR of the 3 register classes. */ + +enum rs6000_reload_reg_type { + RELOAD_REG_GPR, /* General purpose registers. */ + RELOAD_REG_FPR, /* Traditional floating point regs. */ + RELOAD_REG_VMX, /* Altivec (VMX) registers. */ + RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ + N_RELOAD_REG +}; + +/* For setting up register classes, loop through the 3 register classes mapping + into real registers, and skip the ANY class, which is just an OR of the + bits. */ +#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR +#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX + +/* Map reload register type to a register in the register class. */ +struct reload_reg_map_type { + const char *name; /* Register class name. */ + int reg; /* Register in the register class. */ +}; + +static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { + { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ + { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ + { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ + { "Any", -1 }, /* RELOAD_REG_ANY. */ +}; + +/* Mask bits for each register class, indexed per mode. Historically the + compiler has been more restrictive which types can do PRE_MODIFY instead of + PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ +typedef unsigned char addr_mask_type; + +#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ +#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ +#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ +#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ +#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ +#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ + +/* Register type masks based on the type, of valid addressing modes. */ +struct rs6000_reg_addr { + enum insn_code reload_load; /* INSN to reload for loading. */ + enum insn_code reload_store; /* INSN to reload for storing. */ + enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */ + enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */ + enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */ + addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */ +}; + +static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES]; + +/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */ +static inline bool +mode_supports_pre_incdec_p (enum machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC) + != 0); +} + +/* Helper function to say whether a mode supports PRE_MODIFY. */ +static inline bool +mode_supports_pre_modify_p (enum machine_mode mode) +{ + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY) + != 0); +} /* Target cpu costs. */ @@ -1624,19 +1687,28 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) /* VSX registers that overlap the FPR registers are larger than for non-VSX implementations. Don't allow an item to be split between a FP register - and an Altivec register. */ - if (VECTOR_MEM_VSX_P (mode)) + and an Altivec register. Allow TImode in all VSX registers if the user + asked for it. */ + if (TARGET_VSX && VSX_REGNO_P (regno) + && (VECTOR_MEM_VSX_P (mode) + || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode) + || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode)) + || (TARGET_VSX_TIMODE && mode == TImode))) { if (FP_REGNO_P (regno)) return FP_REGNO_P (last_regno); if (ALTIVEC_REGNO_P (regno)) - return ALTIVEC_REGNO_P (last_regno); - } + { + if (mode == SFmode && !TARGET_UPPER_REGS_SF) + return 0; - /* Allow TImode in all VSX registers if the user asked for it. */ - if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno)) - return 1; + if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF) + return 0; + + return ALTIVEC_REGNO_P (last_regno); + } + } /* The GPRs can hold any mode, but values bigger than one register cannot go past R31. */ @@ -1766,6 +1838,63 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) } } +static const char * +rs6000_debug_vector_unit (enum rs6000_vector v) +{ + const char *ret; + + switch (v) + { + case VECTOR_NONE: ret = "none"; break; + case VECTOR_ALTIVEC: ret = "altivec"; break; + case VECTOR_VSX: ret = "vsx"; break; + case VECTOR_P8_VECTOR: ret = "p8_vector"; break; + case VECTOR_PAIRED: ret = "paired"; break; + case VECTOR_SPE: ret = "spe"; break; + case VECTOR_OTHER: ret = "other"; break; + default: ret = "unknown"; break; + } + + return ret; +} + +/* Print the address masks in a human readble fashion. */ +DEBUG_FUNCTION void +rs6000_debug_print_mode (ssize_t m) +{ + ssize_t rc; + + fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m)); + for (rc = 0; rc < N_RELOAD_REG; rc++) + { + addr_mask_type mask = reg_addr[m].addr_mask[rc]; + fprintf (stderr, + " %s: %c%c%c%c%c%c", + reload_reg_map[rc].name, + (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ', + (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ', + (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ', + (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ', + (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ', + (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' '); + } + + if (rs6000_vector_unit[m] != VECTOR_NONE + || rs6000_vector_mem[m] != VECTOR_NONE + || (reg_addr[m].reload_store != CODE_FOR_nothing) + || (reg_addr[m].reload_load != CODE_FOR_nothing)) + { + fprintf (stderr, + " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c", + rs6000_debug_vector_unit (rs6000_vector_unit[m]), + rs6000_debug_vector_unit (rs6000_vector_mem[m]), + (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*', + (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*'); + } + + fputs ("\n", stderr); +} + #define DEBUG_FMT_ID "%-32s= " #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n" #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: " @@ -1789,17 +1918,6 @@ rs6000_debug_reg_global (void) const char *cmodel_str; struct cl_target_option cl_opts; - /* Map enum rs6000_vector to string. */ - static const char *rs6000_debug_vector_unit[] = { - "none", - "altivec", - "vsx", - "p8_vector", - "paired", - "spe", - "other" - }; - /* Modes we want tieable information on. */ static const enum machine_mode print_tieable_modes[] = { QImode, @@ -1891,8 +2009,11 @@ rs6000_debug_reg_global (void) "wr reg_class = %s\n" "ws reg_class = %s\n" "wt reg_class = %s\n" + "wu reg_class = %s\n" "wv reg_class = %s\n" + "ww reg_class = %s\n" "wx reg_class = %s\n" + "wy reg_class = %s\n" "wz reg_class = %s\n" "\n", reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], @@ -1907,28 +2028,18 @@ rs6000_debug_reg_global (void) reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]); + nl = "\n"; for (m = 0; m < NUM_MACHINE_MODES; ++m) - if (rs6000_vector_unit[m] || rs6000_vector_mem[m] - || (rs6000_vector_reload[m][0] != CODE_FOR_nothing) - || (rs6000_vector_reload[m][1] != CODE_FOR_nothing)) - { - nl = "\n"; - fprintf (stderr, - "Vector mode: %-5s arithmetic: %-10s move: %-10s " - "reload-out: %c reload-in: %c\n", - GET_MODE_NAME (m), - rs6000_debug_vector_unit[ rs6000_vector_unit[m] ], - rs6000_debug_vector_unit[ rs6000_vector_mem[m] ], - (rs6000_vector_reload[m][0] != CODE_FOR_nothing) ? 'y' : 'n', - (rs6000_vector_reload[m][1] != CODE_FOR_nothing) ? 'y' : 'n'); - } + rs6000_debug_print_mode (m); - if (nl) - fputs (nl, stderr); + fputs ("\n", stderr); for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++) { @@ -2164,11 +2275,106 @@ rs6000_debug_reg_global (void) (int)RS6000_BUILTIN_COUNT); } + +/* Update the addr mask bits in reg_addr to help secondary reload and go if + legitimate address support to figure out the appropriate addressing to + use. */ + +static void +rs6000_setup_reg_addr_masks (void) +{ + ssize_t rc, reg, m, nregs; + addr_mask_type any_addr_mask, addr_mask; + + for (m = 0; m < NUM_MACHINE_MODES; ++m) + { + /* SDmode is special in that we want to access it only via REG+REG + addressing on power7 and above, since we want to use the LFIWZX and + STFIWZX instructions to load it. */ + bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); + + any_addr_mask = 0; + for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) + { + addr_mask = 0; + reg = reload_reg_map[rc].reg; + + /* Can mode values go in the GPR/FPR/Altivec registers? */ + if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg]) + { + nregs = rs6000_hard_regno_nregs[m][reg]; + addr_mask |= RELOAD_REG_VALID; + + /* Indicate if the mode takes more than 1 physical register. If + it takes a single register, indicate it can do REG+REG + addressing. */ + if (nregs > 1 || m == BLKmode) + addr_mask |= RELOAD_REG_MULTIPLE; + else + addr_mask |= RELOAD_REG_INDEXED; + + /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY + addressing. Restrict addressing on SPE for 64-bit types + because of the SUBREG hackery used to address 64-bit floats in + '32-bit' GPRs. To simplify secondary reload, don't allow + update forms on scalar floating point types that can go in the + upper registers. */ + + if (TARGET_UPDATE + && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) + && GET_MODE_SIZE (m) <= 8 + && !VECTOR_MODE_P (m) + && !COMPLEX_MODE_P (m) + && !indexed_only_p + && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8) + && !(m == DFmode && TARGET_UPPER_REGS_DF) + && !(m == SFmode && TARGET_UPPER_REGS_SF)) + { + addr_mask |= RELOAD_REG_PRE_INCDEC; + + /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that + we don't allow PRE_MODIFY for some multi-register + operations. */ + switch (m) + { + default: + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + + case DImode: + if (TARGET_POWERPC64) + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + + case DFmode: + case DDmode: + if (TARGET_DF_INSN) + addr_mask |= RELOAD_REG_PRE_MODIFY; + break; + } + } + } + + /* GPR and FPR registers can do REG+OFFSET addressing, except + possibly for SDmode. */ + if ((addr_mask != 0) && !indexed_only_p + && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)) + addr_mask |= RELOAD_REG_OFFSET; + + reg_addr[m].addr_mask[rc] = addr_mask; + any_addr_mask |= addr_mask; + } + + reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask; + } +} + + /* Initialize the various global tables that are based on register size. */ static void rs6000_init_hard_regno_mode_ok (bool global_init_p) { - int r, m, c; + ssize_t r, m, c; int align64; int align32; @@ -2233,17 +2439,18 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; } - /* Precalculate vector information, this must be set up before the - rs6000_hard_regno_nregs_internal below. */ - for (m = 0; m < NUM_MACHINE_MODES; ++m) - { - rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE; - rs6000_vector_reload[m][0] = CODE_FOR_nothing; - rs6000_vector_reload[m][1] = CODE_FOR_nothing; - } + /* Precalculate the valid memory formats as well as the vector information, + this must be set up before the rs6000_hard_regno_nregs_internal calls + below. */ + gcc_assert ((int)VECTOR_NONE == 0); + memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit)); + memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit)); + + gcc_assert ((int)CODE_FOR_nothing == 0); + memset ((void *) ®_addr[0], '\0', sizeof (reg_addr)); - for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++) - rs6000_constraints[c] = NO_REGS; + gcc_assert ((int)NO_REGS == 0); + memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints)); /* The VSX hardware allows native alignment for vectors, but control whether the compiler believes it can use native alignment or still uses 128-bit alignment. */ @@ -2320,7 +2527,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) { rs6000_vector_unit[DFmode] = VECTOR_VSX; rs6000_vector_mem[DFmode] - = (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE); + = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE); rs6000_vector_align[DFmode] = align64; } @@ -2334,7 +2541,34 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) /* TODO add SPE and paired floating point vector support. */ /* Register class constraints for the constraints that depend on compile - switches. */ + switches. When the VSX code was added, different constraints were added + based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all + of the VSX registers are used. The register classes for scalar floating + point types is set, based on whether we allow that type into the upper + (Altivec) registers. GCC has register classes to target the Altivec + registers for load/store operations, to select using a VSX memory + operation instead of the traditional floating point operation. The + constraints are: + + d - Register class to use with traditional DFmode instructions. + f - Register class to use with traditional SFmode instructions. + v - Altivec register. + wa - Any VSX register. + wd - Preferred register class for V2DFmode. + wf - Preferred register class for V4SFmode. + wg - Float register for power6x move insns. + wl - Float register if we can do 32-bit signed int loads. + wm - VSX register for ISA 2.07 direct move operations. + wr - GPR if 64-bit mode is permitted. + ws - Register class to do ISA 2.06 DF operations. + wu - Altivec register for ISA 2.07 VSX SF/SI load/stores. + wv - Altivec register for ISA 2.06 VSX DF/DI load/stores. + wt - VSX register for TImode in VSX registers. + ww - Register class to do SF conversions in with VSX operations. + wx - Float register if we can do 32-bit int stores. + wy - Register class to do ISA 2.07 SF operations. + wz - Float register if we can do 32-bit unsigned int loads. */ + if (TARGET_HARD_FLOAT && TARGET_FPRS) rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; @@ -2343,19 +2577,20 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_VSX) { - /* At present, we just use VSX_REGS, but we have different constraints - based on the use, in case we want to fine tune the default register - class used. wa = any VSX register, wf = register class to use for - V4SF, wd = register class to use for V2DF, and ws = register classs to - use for DF scalars. */ rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; - rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; - rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY - ? VSX_REGS - : FLOAT_REGS); + rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; + if (TARGET_VSX_TIMODE) rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; + + if (TARGET_UPPER_REGS_DF) + { + rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; + } + else + rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS; } /* Add conditional constraints based on various options, to allow us to @@ -2375,8 +2610,19 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_POWERPC64) rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; - if (TARGET_P8_VECTOR) - rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; + if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) + { + rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS; + } + else if (TARGET_P8_VECTOR) + { + rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS; + rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; + } + else if (TARGET_VSX) + rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; if (TARGET_STFIWX) rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; @@ -2384,112 +2630,104 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_LFIWZX) rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; - /* Setup the direct move combinations. */ - for (m = 0; m < NUM_MACHINE_MODES; ++m) - { - reload_fpr_gpr[m] = CODE_FOR_nothing; - reload_gpr_vsx[m] = CODE_FOR_nothing; - reload_vsx_gpr[m] = CODE_FOR_nothing; - } - /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { if (TARGET_64BIT) { - rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store; - rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load; - rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_di_store; - rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_di_load; - rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_di_store; - rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_di_load; - rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_di_store; - rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_di_load; - rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_di_store; - rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_di_load; - rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_di_store; - rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_di_load; - if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY) + reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store; + reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; + reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; + reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; + reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; + reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; + reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; + reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load; + reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store; + reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; + reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; + reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load; + if (TARGET_VSX && TARGET_UPPER_REGS_DF) { - rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_di_store; - rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_di_load; - rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_di_store; - rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_di_load; + reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; + reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; + reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; + reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; } if (TARGET_P8_VECTOR) { - rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_di_store; - rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_di_load; - rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_di_store; - rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_di_load; + reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; + reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; + reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; + reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; } if (TARGET_VSX_TIMODE) { - rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_di_store; - rs6000_vector_reload[TImode][1] = CODE_FOR_reload_ti_di_load; + reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store; + reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; } if (TARGET_DIRECT_MOVE) { if (TARGET_POWERPC64) { - reload_gpr_vsx[TImode] = CODE_FOR_reload_gpr_from_vsxti; - reload_gpr_vsx[V2DFmode] = CODE_FOR_reload_gpr_from_vsxv2df; - reload_gpr_vsx[V2DImode] = CODE_FOR_reload_gpr_from_vsxv2di; - reload_gpr_vsx[V4SFmode] = CODE_FOR_reload_gpr_from_vsxv4sf; - reload_gpr_vsx[V4SImode] = CODE_FOR_reload_gpr_from_vsxv4si; - reload_gpr_vsx[V8HImode] = CODE_FOR_reload_gpr_from_vsxv8hi; - reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi; - reload_gpr_vsx[SFmode] = CODE_FOR_reload_gpr_from_vsxsf; - - reload_vsx_gpr[TImode] = CODE_FOR_reload_vsx_from_gprti; - reload_vsx_gpr[V2DFmode] = CODE_FOR_reload_vsx_from_gprv2df; - reload_vsx_gpr[V2DImode] = CODE_FOR_reload_vsx_from_gprv2di; - reload_vsx_gpr[V4SFmode] = CODE_FOR_reload_vsx_from_gprv4sf; - reload_vsx_gpr[V4SImode] = CODE_FOR_reload_vsx_from_gprv4si; - reload_vsx_gpr[V8HImode] = CODE_FOR_reload_vsx_from_gprv8hi; - reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi; - reload_vsx_gpr[SFmode] = CODE_FOR_reload_vsx_from_gprsf; + reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; + reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; + reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; + reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; + reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; + reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; + reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; + + reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; + reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; + reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; + reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; + reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; + reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; + reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; } else { - reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi; - reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd; - reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf; + reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; + reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; + reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; } } } else { - rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store; - rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load; - rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_si_store; - rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_si_load; - rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_si_store; - rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_si_load; - rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_si_store; - rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_si_load; - rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_si_store; - rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_si_load; - rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_si_store; - rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_si_load; - if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY) + reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store; + reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load; + reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store; + reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load; + reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store; + reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load; + reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store; + reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; + reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; + reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; + reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; + reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load; + if (TARGET_VSX && TARGET_UPPER_REGS_DF) { - rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_si_store; - rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_si_load; - rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_si_store; - rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_si_load; + reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; + reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; + reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; + reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; } if (TARGET_P8_VECTOR) { - rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_si_store; - rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_si_load; - rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_si_store; - rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_si_load; + reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; + reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; + reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; + reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; } if (TARGET_VSX_TIMODE) { - rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_si_store; - rs6000_vector_reload[TImode][1] = CODE_FOR_reload_ti_si_load; + reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store; + reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load; } } } @@ -2608,6 +2846,11 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) } } + /* Update the addr mask bits in reg_addr to help secondary reload and go if + legitimate address support to figure out the appropriate addressing to + use. */ + rs6000_setup_reg_addr_masks (); + if (global_init_p || TARGET_DEBUG_TARGET) { if (TARGET_DEBUG_REG) @@ -2744,6 +2987,10 @@ rs6000_option_override_internal (bool global_init_p) = ((global_init_p || target_option_default_node == NULL) ? NULL : TREE_TARGET_OPTION (target_option_default_node)); + /* Remember the explicit arguments. */ + if (global_init_p) + rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags; + /* On 64-bit Darwin, power alignment is ABI-incompatible with some C library functions, so warn about it. The flag may be useful for performance studies from time to time though, so don't disable it @@ -3664,7 +3911,7 @@ rs6000_option_override_internal (bool global_init_p) /* Save the initial options in case the user does function specific options */ if (global_init_p) target_option_default_node = target_option_current_node - = build_target_option_node (); + = build_target_option_node (&global_options); /* If not explicitly specified via option, decide whether to generate the extra blr's required to preserve the link stack on some cpus (eg, 476). */ @@ -4743,15 +4990,16 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) /* Check if VAL is present in every STEP-th element, and the other elements are filled with its most significant bit. */ - for (i = 0; i < nunits - 1; ++i) + for (i = 1; i < nunits; ++i) { HOST_WIDE_INT desired_val; - if (((BYTES_BIG_ENDIAN ? i + 1 : i) & (step - 1)) == 0) + unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; + if ((i & (step - 1)) == 0) desired_val = val; else desired_val = msb_val; - if (desired_val != const_vector_elt_as_int (op, i)) + if (desired_val != const_vector_elt_as_int (op, elt)) return false; } @@ -7116,17 +7364,9 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict) return 0; if (legitimate_indirect_address_p (x, reg_ok_strict)) return 1; - if ((GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC) - && !ALTIVEC_OR_VSX_VECTOR_MODE (mode) - && !SPE_VECTOR_MODE (mode) - && mode != TFmode - && mode != TDmode - && mode != TImode - && mode != PTImode - /* Restrict addressing for DI because of our SUBREG hackery. */ - && !(TARGET_E500_DOUBLE - && (mode == DFmode || mode == DDmode || mode == DImode)) - && TARGET_UPDATE + if (TARGET_UPDATE + && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC) + && mode_supports_pre_incdec_p (mode) && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)) return 1; if (virtual_stack_registers_memory_p (x)) @@ -7136,12 +7376,12 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict) if (reg_offset_p && legitimate_constant_pool_address_p (x, mode, reg_ok_strict)) return 1; - /* For TImode, if we have load/store quad, only allow register indirect - addresses. This will allow the values to go in either GPRs or VSX - registers without reloading. The vector types would tend to go into VSX - registers, so we allow REG+REG, while TImode seems somewhat split, in that - some uses are GPR based, and some VSX based. */ - if (mode == TImode && TARGET_QUAD_MEMORY) + /* For TImode, if we have load/store quad and TImode in VSX registers, only + allow register indirect addresses. This will allow the values to go in + either GPRs or VSX registers without reloading. The vector types would + tend to go into VSX registers, so we allow REG+REG, while TImode seems + somewhat split, in that some uses are GPR based, and some VSX based. */ + if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE) return 0; /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ if (! reg_ok_strict @@ -7166,21 +7406,8 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict) && !avoiding_indexed_address_p (mode) && legitimate_indexed_address_p (x, reg_ok_strict)) return 1; - if (GET_CODE (x) == PRE_MODIFY - && mode != TImode - && mode != PTImode - && mode != TFmode - && mode != TDmode - && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) - || TARGET_POWERPC64 - || ((mode != DFmode && mode != DDmode) || TARGET_E500_DOUBLE)) - && (TARGET_POWERPC64 || mode != DImode) - && !ALTIVEC_OR_VSX_VECTOR_MODE (mode) - && !SPE_VECTOR_MODE (mode) - /* Restrict addressing for DI because of our SUBREG hackery. */ - && !(TARGET_E500_DOUBLE - && (mode == DFmode || mode == DDmode || mode == DImode)) - && TARGET_UPDATE + if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY + && mode_supports_pre_modify_p (mode) && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict) && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1), reg_ok_strict, false) @@ -7201,10 +7428,13 @@ rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x, bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict); fprintf (stderr, "\nrs6000_legitimate_address_p: return = %s, mode = %s, " - "strict = %d, code = %s\n", + "strict = %d, reload = %s, code = %s\n", ret ? "true" : "false", GET_MODE_NAME (mode), reg_ok_strict, + (reload_completed + ? "after" + : (reload_in_progress ? "progress" : "before")), GET_RTX_NAME (GET_CODE (x))); debug_rtx (x); @@ -7616,6 +7846,106 @@ rs6000_eliminate_indexed_memrefs (rtx operands[2]) copy_addr_to_reg (XEXP (operands[1], 0))); } +/* Generate a vector of constants to permute MODE for a little-endian + storage operation by swapping the two halves of a vector. */ +static rtvec +rs6000_const_vec (enum machine_mode mode) +{ + int i, subparts; + rtvec v; + + switch (mode) + { + case V2DFmode: + case V2DImode: + subparts = 2; + break; + case V4SFmode: + case V4SImode: + subparts = 4; + break; + case V8HImode: + subparts = 8; + break; + case V16QImode: + subparts = 16; + break; + default: + gcc_unreachable(); + } + + v = rtvec_alloc (subparts); + + for (i = 0; i < subparts / 2; ++i) + RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2); + for (i = subparts / 2; i < subparts; ++i) + RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2); + + return v; +} + +/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi + for a VSX load or store operation. */ +rtx +rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode) +{ + rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode)); + return gen_rtx_VEC_SELECT (mode, source, par); +} + +/* Emit a little-endian load from vector memory location SOURCE to VSX + register DEST in mode MODE. The load is done with two permuting + insn's that represent an lxvd2x and xxpermdi. */ +void +rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode) +{ + rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; + rtx permute_mem = rs6000_gen_le_vsx_permute (source, mode); + rtx permute_reg = rs6000_gen_le_vsx_permute (tmp, mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem)); + emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg)); +} + +/* Emit a little-endian store to vector memory location DEST from VSX + register SOURCE in mode MODE. The store is done with two permuting + insn's that represent an xxpermdi and an stxvd2x. */ +void +rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode) +{ + rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; + rtx permute_src = rs6000_gen_le_vsx_permute (source, mode); + rtx permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src)); + emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp)); +} + +/* Emit a sequence representing a little-endian VSX load or store, + moving data from SOURCE to DEST in mode MODE. This is done + separately from rs6000_emit_move to ensure it is called only + during expand. LE VSX loads and stores introduced later are + handled with a split. The expand-time RTL generation allows + us to optimize away redundant pairs of register-permutes. */ +void +rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode) +{ + gcc_assert (!BYTES_BIG_ENDIAN + && VECTOR_MEM_VSX_P (mode) + && mode != TImode + && (MEM_P (source) ^ MEM_P (dest))); + + if (MEM_P (source)) + { + gcc_assert (REG_P (dest)); + rs6000_emit_le_vsx_load (dest, source, mode); + } + else + { + if (!REG_P (source)) + source = force_reg (mode, source); + rs6000_emit_le_vsx_store (dest, source, mode); + } +} + /* Emit a move from SOURCE to DEST in mode MODE. */ void rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) @@ -8176,7 +8506,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, { tree ret_type = TREE_TYPE (fntype); fprintf (stderr, " ret code = %s,", - tree_code_name[ (int)TREE_CODE (ret_type) ]); + get_tree_code_name (TREE_CODE (ret_type))); } if (cum->call_cookie & CALL_LONG) @@ -14702,7 +15032,7 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) { cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ - icode = reload_vsx_gpr[(int)mode]; + icode = reg_addr[mode].reload_vsx_gpr; } /* Handle moving 128-bit values from VSX point registers to GPRs on @@ -14711,7 +15041,7 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) { cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ - icode = reload_gpr_vsx[(int)mode]; + icode = reg_addr[mode].reload_gpr_vsx; } } @@ -14720,13 +15050,13 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) { cost = 3; /* xscvdpspn, mfvsrd, and. */ - icode = reload_gpr_vsx[(int)mode]; + icode = reg_addr[mode].reload_gpr_vsx; } else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) { cost = 2; /* mtvsrz, xscvspdpn. */ - icode = reload_vsx_gpr[(int)mode]; + icode = reg_addr[mode].reload_vsx_gpr; } } } @@ -14739,7 +15069,7 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) { cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ - icode = reload_vsx_gpr[(int)mode]; + icode = reg_addr[mode].reload_vsx_gpr; } /* Handle moving 128-bit values from VSX point registers to GPRs on @@ -14748,7 +15078,7 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) { cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ - icode = reload_gpr_vsx[(int)mode]; + icode = reg_addr[mode].reload_gpr_vsx; } } @@ -14764,7 +15094,7 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) { cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ - icode = reload_fpr_gpr[(int)mode]; + icode = reg_addr[mode].reload_fpr_gpr; } } @@ -14847,7 +15177,9 @@ rs6000_secondary_reload (bool in_p, bool default_p = false; sri->icode = CODE_FOR_nothing; - icode = rs6000_vector_reload[mode][in_p != false]; + icode = ((in_p) + ? reg_addr[mode].reload_load + : reg_addr[mode].reload_store); if (REG_P (x) || register_operand (x, mode)) { @@ -14862,6 +15194,7 @@ rs6000_secondary_reload (bool in_p, from_type = exchange; } + /* Can we do a direct move of some sort? */ if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, altivec_p)) { @@ -15835,21 +16168,21 @@ rs6000_output_move_128bit (rtx operands[]) enum machine_mode mode = GET_MODE (dest); int dest_regno; int src_regno; - bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p; - bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p; + bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p; + bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p; if (REG_P (dest)) { dest_regno = REGNO (dest); dest_gpr_p = INT_REGNO_P (dest_regno); dest_fp_p = FP_REGNO_P (dest_regno); - dest_av_p = ALTIVEC_REGNO_P (dest_regno); - dest_vsx_p = dest_fp_p | dest_av_p; + dest_vmx_p = ALTIVEC_REGNO_P (dest_regno); + dest_vsx_p = dest_fp_p | dest_vmx_p; } else { dest_regno = -1; - dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false; + dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false; } if (REG_P (src)) @@ -15857,13 +16190,13 @@ rs6000_output_move_128bit (rtx operands[]) src_regno = REGNO (src); src_gpr_p = INT_REGNO_P (src_regno); src_fp_p = FP_REGNO_P (src_regno); - src_av_p = ALTIVEC_REGNO_P (src_regno); - src_vsx_p = src_fp_p | src_av_p; + src_vmx_p = ALTIVEC_REGNO_P (src_regno); + src_vsx_p = src_fp_p | src_vmx_p; } else { src_regno = -1; - src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false; + src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false; } /* Register moves. */ @@ -15887,7 +16220,7 @@ rs6000_output_move_128bit (rtx operands[]) return "#"; } - else if (TARGET_ALTIVEC && dest_av_p && src_av_p) + else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p) return "vor %0,%1,%1"; else if (dest_fp_p && src_fp_p) @@ -15899,18 +16232,13 @@ rs6000_output_move_128bit (rtx operands[]) { if (dest_gpr_p) { - if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0 - && quad_memory_operand (src, mode) - && !reg_overlap_mentioned_p (dest, src)) - { - /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ - return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; - } + if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) + return "lq %0,%1"; else return "#"; } - else if (TARGET_ALTIVEC && dest_av_p + else if (TARGET_ALTIVEC && dest_vmx_p && altivec_indexed_or_indirect_operand (src, mode)) return "lvx %0,%y1"; @@ -15922,7 +16250,7 @@ rs6000_output_move_128bit (rtx operands[]) return "lxvd2x %x0,%y1"; } - else if (TARGET_ALTIVEC && dest_av_p) + else if (TARGET_ALTIVEC && dest_vmx_p) return "lvx %0,%y1"; else if (dest_fp_p) @@ -15934,17 +16262,13 @@ rs6000_output_move_128bit (rtx operands[]) { if (src_gpr_p) { - if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0 - && quad_memory_operand (dest, mode)) - { - /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ - return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; - } + if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) + return "stq %1,%0"; else return "#"; } - else if (TARGET_ALTIVEC && src_av_p + else if (TARGET_ALTIVEC && src_vmx_p && altivec_indexed_or_indirect_operand (src, mode)) return "stvx %1,%y0"; @@ -15956,7 +16280,7 @@ rs6000_output_move_128bit (rtx operands[]) return "stxvd2x %x1,%y0"; } - else if (TARGET_ALTIVEC && src_av_p) + else if (TARGET_ALTIVEC && src_vmx_p) return "stvx %1,%y0"; else if (src_fp_p) @@ -15976,7 +16300,7 @@ rs6000_output_move_128bit (rtx operands[]) else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode)) return "xxlxor %x0,%x0,%x0"; - else if (TARGET_ALTIVEC && dest_av_p) + else if (TARGET_ALTIVEC && dest_vmx_p) return output_vec_const_move (operands); } @@ -17538,7 +17862,7 @@ output_cbranch (rtx op, const char *label, int reversed, rtx insn) if (note != NULL_RTX) { /* PROB is the difference from 50%. */ - int prob = INTVAL (XEXP (note, 0)) - REG_BR_PROB_BASE / 2; + int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2; /* Only hint for highly probable/improbable branches on newer cpus as static prediction overrides processor dynamic @@ -18144,12 +18468,12 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) static void emit_unlikely_jump (rtx cond, rtx label) { - rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1); + int very_unlikely = REG_BR_PROB_BASE / 100 - 1; rtx x; x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x)); - add_reg_note (x, REG_BR_PROB, very_unlikely); + add_int_reg_note (x, REG_BR_PROB, very_unlikely); } /* A subroutine of the atomic operation splitters. Emit a load-locked @@ -28375,6 +28699,136 @@ rs6000_emit_parity (rtx dst, rtx src) } } +/* Expand an Altivec constant permutation for little endian mode. + There are two issues: First, the two input operands must be + swapped so that together they form a double-wide array in LE + order. Second, the vperm instruction has surprising behavior + in LE mode: it interprets the elements of the source vectors + in BE mode ("left to right") and interprets the elements of + the destination vector in LE mode ("right to left"). To + correct for this, we must subtract each element of the permute + control vector from 31. + + For example, suppose we want to concatenate vr10 = {0, 1, 2, 3} + with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm. + We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to + serve as the permute control vector. Then, in BE mode, + + vperm 9,10,11,12 + + places the desired result in vr9. However, in LE mode the + vector contents will be + + vr10 = 00000003 00000002 00000001 00000000 + vr11 = 00000007 00000006 00000005 00000004 + + The result of the vperm using the same permute control vector is + + vr9 = 05000000 07000000 01000000 03000000 + + That is, the leftmost 4 bytes of vr10 are interpreted as the + source for the rightmost 4 bytes of vr9, and so on. + + If we change the permute control vector to + + vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4} + + and issue + + vperm 9,11,10,12 + + we get the desired + + vr9 = 00000006 00000004 00000002 00000000. */ + +void +altivec_expand_vec_perm_const_le (rtx operands[4]) +{ + unsigned int i; + rtx perm[16]; + rtx constv, unspec; + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx sel = operands[3]; + + /* Unpack and adjust the constant selector. */ + for (i = 0; i < 16; ++i) + { + rtx e = XVECEXP (sel, 0, i); + unsigned int elt = 31 - (INTVAL (e) & 31); + perm[i] = GEN_INT (elt); + } + + /* Expand to a permute, swapping the inputs and using the + adjusted selector. */ + if (!REG_P (op0)) + op0 = force_reg (V16QImode, op0); + if (!REG_P (op1)) + op1 = force_reg (V16QImode, op1); + + constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); + constv = force_reg (V16QImode, constv); + unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv), + UNSPEC_VPERM); + if (!REG_P (target)) + { + rtx tmp = gen_reg_rtx (V16QImode); + emit_move_insn (tmp, unspec); + unspec = tmp; + } + + emit_move_insn (target, unspec); +} + +/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the + permute control vector. But here it's not a constant, so we must + generate a vector splat/subtract to do the adjustment. */ + +void +altivec_expand_vec_perm_le (rtx operands[4]) +{ + rtx splat, unspec; + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + rtx sel = operands[3]; + rtx tmp = target; + + /* Get everything in regs so the pattern matches. */ + if (!REG_P (op0)) + op0 = force_reg (V16QImode, op0); + if (!REG_P (op1)) + op1 = force_reg (V16QImode, op1); + if (!REG_P (sel)) + sel = force_reg (V16QImode, sel); + if (!REG_P (target)) + tmp = gen_reg_rtx (V16QImode); + + /* SEL = splat(31) - SEL. */ + /* We want to subtract from 31, but we can't vspltisb 31 since + it's out of range. -1 works as well because only the low-order + five bits of the permute control vector elements are used. */ + splat = gen_rtx_VEC_DUPLICATE (V16QImode, + gen_rtx_CONST_INT (QImode, -1)); + emit_move_insn (tmp, splat); + sel = gen_rtx_MINUS (V16QImode, tmp, sel); + emit_move_insn (tmp, sel); + + /* Permute with operands reversed and adjusted selector. */ + unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, tmp), + UNSPEC_VPERM); + + /* Copy into target, possibly by way of a register. */ + if (!REG_P (target)) + { + emit_move_insn (tmp, unspec); + unspec = tmp; + } + + emit_move_insn (target, unspec); +} + /* Expand an Altivec constant permutation. Return true if we match an efficient implementation; false to fall back to VPERM. */ @@ -28555,6 +29009,12 @@ altivec_expand_vec_perm_const (rtx operands[4]) } } + if (!BYTES_BIG_ENDIAN) + { + altivec_expand_vec_perm_const_le (operands); + return true; + } + return false; } @@ -28999,6 +29459,27 @@ rs6000_init_dwarf_reg_sizes_extra (tree address) emit_move_insn (adjust_address (mem, mode, offset), value); } } + + if (TARGET_MACHO && ! TARGET_ALTIVEC) + { + int i; + enum machine_mode mode = TYPE_MODE (char_type_node); + rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); + rtx mem = gen_rtx_MEM (BLKmode, addr); + rtx value = gen_int_mode (16, mode); + + /* On Darwin, libgcc may be built to run on both G3 and G4/5. + The unwinder still needs to know the size of Altivec registers. */ + + for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++) + { + int column = DWARF_REG_TO_UNWIND_COLUMN (i); + HOST_WIDE_INT offset + = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode); + + emit_move_insn (adjust_address (mem, mode, offset), value); + } + } } /* Map internal gcc register numbers to DWARF2 register numbers. */ @@ -29169,6 +29650,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, { "string", OPTION_MASK_STRING, false, true }, { "update", OPTION_MASK_NO_UPDATE, true , true }, + { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false }, + { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false }, { "vsx", OPTION_MASK_VSX, false, true }, { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true }, #ifdef OPTION_MASK_64BIT @@ -29441,7 +29924,7 @@ rs6000_valid_attribute_p (tree fndecl, { struct cl_target_option cur_target; bool ret; - tree old_optimize = build_optimization_node (); + tree old_optimize = build_optimization_node (&global_options); tree new_target, new_optimize; tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); @@ -29468,7 +29951,7 @@ rs6000_valid_attribute_p (tree fndecl, fprintf (stderr, "--------------------\n"); } - old_optimize = build_optimization_node (); + old_optimize = build_optimization_node (&global_options); func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); /* If the function changed the optimization levels as well as setting target @@ -29487,12 +29970,12 @@ rs6000_valid_attribute_p (tree fndecl, if (ret) { ret = rs6000_option_override_internal (false); - new_target = build_target_option_node (); + new_target = build_target_option_node (&global_options); } else new_target = NULL; - new_optimize = build_optimization_node (); + new_optimize = build_optimization_node (&global_options); if (!new_target) ret = false; @@ -29522,7 +30005,7 @@ rs6000_valid_attribute_p (tree fndecl, bool rs6000_pragma_target_parse (tree args, tree pop_target) { - tree prev_tree = build_target_option_node (); + tree prev_tree = build_target_option_node (&global_options); tree cur_tree; struct cl_target_option *prev_opt, *cur_opt; HOST_WIDE_INT prev_flags, cur_flags, diff_flags; @@ -29559,7 +30042,8 @@ rs6000_pragma_target_parse (tree args, tree pop_target) rs6000_cpu_index = rs6000_tune_index = -1; if (!rs6000_inner_target_options (args, false) || !rs6000_option_override_internal (false) - || (cur_tree = build_target_option_node ()) == NULL_TREE) + || (cur_tree = build_target_option_node (&global_options)) + == NULL_TREE) { if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) fprintf (stderr, "invalid pragma\n"); @@ -29684,19 +30168,22 @@ rs6000_set_current_function (tree fndecl) /* Save the current options */ static void -rs6000_function_specific_save (struct cl_target_option *ptr) +rs6000_function_specific_save (struct cl_target_option *ptr, + struct gcc_options *opts) { - ptr->x_rs6000_isa_flags = rs6000_isa_flags; - ptr->x_rs6000_isa_flags_explicit = rs6000_isa_flags_explicit; + ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags; + ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit; } /* Restore the current options */ static void -rs6000_function_specific_restore (struct cl_target_option *ptr) +rs6000_function_specific_restore (struct gcc_options *opts, + struct cl_target_option *ptr) + { - rs6000_isa_flags = ptr->x_rs6000_isa_flags; - rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit; + opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags; + opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit; (void) rs6000_option_override_internal (false); } @@ -29729,7 +30216,6 @@ rs6000_print_options_internal (FILE *file, size_t cur_column; size_t max_column = 76; const char *comma = ""; - const char *nl = "\n"; if (indent) start_column += fprintf (file, "%*s", indent, ""); @@ -29760,7 +30246,6 @@ rs6000_print_options_internal (FILE *file, fprintf (stderr, ", \\\n%*s", (int)start_column, ""); cur_column = start_column + len; comma = ""; - nl = "\n\n"; } fprintf (file, "%s%s%s%s", comma, prefix, no_str, @@ -29770,7 +30255,7 @@ rs6000_print_options_internal (FILE *file, } } - fputs (nl, file); + fputs ("\n", file); } /* Helper function to print the current isa options on a line. */ |