summaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r--gcc/config/i386/i386.c376
1 files changed, 287 insertions, 89 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index bdde693769f..548aec74093 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -52,6 +52,16 @@ along with GCC; see the file COPYING3. If not see
#include "langhooks.h"
#include "reload.h"
#include "cgraph.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "vec.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
#include "gimple.h"
#include "gimplify.h"
#include "dwarf2.h"
@@ -2038,8 +2048,6 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
/* Mask registers. */
MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
- /* MPX bound registers */
- BND_REGS, BND_REGS, BND_REGS, BND_REGS,
};
/* The "default" register map used in 32bit mode. */
@@ -2056,7 +2064,6 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
-1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
-1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
- 101, 102, 103, 104, /* bound registers */
};
/* The "default" register map used in 64bit mode. */
@@ -2073,7 +2080,6 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
- 126, 127, 128, 129, /* bound registers */
};
/* Define the register numbers to be used in Dwarf debugging information.
@@ -2142,7 +2148,6 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
-1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
-1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
- -1, -1, -1, -1, /* bound registers */
};
/* Define parameter passing and return registers. */
@@ -2574,7 +2579,6 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mrtm", OPTION_MASK_ISA_RTM },
{ "-mxsave", OPTION_MASK_ISA_XSAVE },
{ "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
- { "-mmpx", OPTION_MASK_ISA_MPX },
};
/* Flag options. */
@@ -3069,7 +3073,6 @@ ix86_option_override_internal (bool main_args_p,
#define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
#define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
#define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
-#define PTA_MPX (HOST_WIDE_INT_1 << 44)
/* if this reaches 64, need to widen struct pta flags below */
@@ -3138,8 +3141,8 @@ ix86_option_override_internal (bool main_args_p,
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR},
{"slm", PROCESSOR_SLM, CPU_SLM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
- | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT | PTA_MOVBE
- | PTA_FXSR},
+ | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT | PTA_AES
+ | PTA_PCLMUL | PTA_RDRND | PTA_MOVBE | PTA_FXSR},
{"geode", PROCESSOR_GEODE, CPU_GEODE,
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
@@ -3864,16 +3867,16 @@ ix86_option_override_internal (bool main_args_p,
ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
if (opts_set->x_ix86_incoming_stack_boundary_arg)
{
- if (ix86_incoming_stack_boundary_arg
+ if (opts->x_ix86_incoming_stack_boundary_arg
< (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
- || ix86_incoming_stack_boundary_arg > 12)
+ || opts->x_ix86_incoming_stack_boundary_arg > 12)
error ("-mincoming-stack-boundary=%d is not between %d and 12",
- ix86_incoming_stack_boundary_arg,
+ opts->x_ix86_incoming_stack_boundary_arg,
TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
else
{
ix86_user_incoming_stack_boundary
- = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
+ = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
ix86_incoming_stack_boundary
= ix86_user_incoming_stack_boundary;
}
@@ -4262,11 +4265,6 @@ ix86_conditional_register_usage (void)
for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
}
-
- /* If MPX is disabled, squash the registers. */
- if (! TARGET_MPX)
- for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
}
@@ -4285,6 +4283,30 @@ ix86_function_specific_save (struct cl_target_option *ptr,
ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
+ ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
+ ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
+ ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
+ ptr->x_ix86_abi = opts->x_ix86_abi;
+ ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
+ ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
+ ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
+ ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
+ ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
+ ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
+ ptr->x_ix86_pmode = opts->x_ix86_pmode;
+ ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
+ ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
+ ptr->x_ix86_regparm = opts->x_ix86_regparm;
+ ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
+ ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
+ ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
+ ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
+ ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
+ ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
+ ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
+ ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
+ ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
+ ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
/* The fields are char but the variables are not; make sure the
values fit in the fields. */
@@ -4314,6 +4336,30 @@ ix86_function_specific_restore (struct gcc_options *opts,
opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
+ opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
+ opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
+ opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
+ opts->x_ix86_abi = ptr->x_ix86_abi;
+ opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
+ opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
+ opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
+ opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
+ opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
+ opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
+ opts->x_ix86_pmode = ptr->x_ix86_pmode;
+ opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
+ opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
+ opts->x_ix86_regparm = ptr->x_ix86_regparm;
+ opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
+ opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
+ opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
+ opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
+ opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
+ opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
+ opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
+ opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
+ opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
+ opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
/* Recreate the arch feature tests if the arch changed */
if (old_arch != ix86_arch)
@@ -4628,8 +4674,8 @@ ix86_valid_target_attribute_tree (tree args,
struct gcc_options *opts,
struct gcc_options *opts_set)
{
- const char *orig_arch_string = ix86_arch_string;
- const char *orig_tune_string = ix86_tune_string;
+ const char *orig_arch_string = opts->x_ix86_arch_string;
+ const char *orig_tune_string = opts->x_ix86_tune_string;
enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
int orig_tune_defaulted = ix86_tune_defaulted;
int orig_arch_specified = ix86_arch_specified;
@@ -8989,7 +9035,7 @@ ix86_code_end (void)
xops[0] = gen_rtx_REG (Pmode, regno);
xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
- output_asm_insn ("%!ret", NULL);
+ fputs ("\tret\n", asm_out_file);
final_end_function ();
init_insn_lengths ();
free_after_compilation (cfun);
@@ -9047,7 +9093,7 @@ output_set_got (rtx dest, rtx label)
xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
xops[2] = gen_rtx_MEM (QImode, xops[2]);
- output_asm_insn ("%!call\t%X2", xops);
+ output_asm_insn ("call\t%X2", xops);
#if TARGET_MACHO
/* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
@@ -14415,7 +14461,7 @@ print_reg (rtx x, int code, FILE *file)
case 8:
case 4:
case 12:
- if (! ANY_FP_REG_P (x) && ! ANY_BND_REG_P (x))
+ if (! ANY_FP_REG_P (x))
putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
/* FALLTHRU */
case 16:
@@ -14538,7 +14584,6 @@ get_some_local_dynamic_name (void)
~ -- print "i" if TARGET_AVX2, "f" otherwise.
@ -- print a segment register of thread base pointer load
^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
- ! -- print MPX prefix for jxx/call/ret instructions if required.
*/
void
@@ -15038,11 +15083,6 @@ ix86_print_operand (FILE *file, rtx x, int code)
fputs ("addr32 ", file);
return;
- case '!':
- if (ix86_bnd_prefixed_insn_p (NULL_RTX))
- fputs ("bnd ", file);
- return;
-
default:
output_operand_lossage ("invalid operand code '%c'", code);
}
@@ -15185,7 +15225,7 @@ static bool
ix86_print_operand_punct_valid_p (unsigned char code)
{
return (code == '@' || code == '*' || code == '+' || code == '&'
- || code == ';' || code == '~' || code == '^' || code == '!');
+ || code == ';' || code == '~' || code == '^');
}
/* Print a memory operand whose address is ADDR. */
@@ -15215,25 +15255,6 @@ ix86_print_operand_address (FILE *file, rtx addr)
ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
code = 'q';
}
- else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
- {
- ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
- gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
- if (parts.base != NULL_RTX)
- {
- parts.index = parts.base;
- parts.scale = 1;
- }
- parts.base = XVECEXP (addr, 0, 0);
- addr = XVECEXP (addr, 0, 0);
- }
- else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
- {
- ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
- gcc_assert (parts.index == NULL_RTX);
- parts.index = XVECEXP (addr, 0, 1);
- addr = XVECEXP (addr, 0, 0);
- }
else
ok = ix86_decompose_address (addr, &parts);
@@ -22834,6 +22855,8 @@ emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
if (piece_size <= GET_MODE_SIZE (word_mode))
{
emit_insn (gen_strset (destptr, dst, promoted_val));
+ dst = adjust_automodify_address_nv (dst, move_mode, destptr,
+ piece_size);
continue;
}
@@ -22903,14 +22926,18 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
{
dest = change_address (destmem, DImode, destptr);
emit_insn (gen_strset (destptr, dest, value));
+ dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
emit_insn (gen_strset (destptr, dest, value));
}
else
{
dest = change_address (destmem, SImode, destptr);
emit_insn (gen_strset (destptr, dest, value));
+ dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
emit_insn (gen_strset (destptr, dest, value));
+ dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
emit_insn (gen_strset (destptr, dest, value));
+ dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
emit_insn (gen_strset (destptr, dest, value));
}
emit_label (label);
@@ -22928,6 +22955,7 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
{
dest = change_address (destmem, SImode, destptr);
emit_insn (gen_strset (destptr, dest, value));
+ dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
emit_insn (gen_strset (destptr, dest, value));
}
emit_label (label);
@@ -23444,7 +23472,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
/* If expected size is not known but max size is small enough
so inline version is a win, set expected size into
the range. */
- if (max > 1 && (unsigned HOST_WIDE_INT)max >= max_size && expected_size == -1)
+ if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
+ && expected_size == -1)
expected_size = min_size / 2 + max_size / 2;
/* If user specified the algorithm, honnor it if possible. */
@@ -23743,7 +23772,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
bool noalign;
enum machine_mode move_mode = VOIDmode;
int unroll_factor = 1;
- /* TODO: Once vlaue ranges are available, fill in proper data. */
+ /* TODO: Once value ranges are available, fill in proper data. */
unsigned HOST_WIDE_INT min_size = 0;
unsigned HOST_WIDE_INT max_size = -1;
unsigned HOST_WIDE_INT probable_max_size = -1;
@@ -23958,21 +23987,19 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
loop variant. */
if (issetmem && epilogue_size_needed > 2 && !promoted_val)
force_loopy_epilogue = true;
- if (count)
+ if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
+ || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
{
- if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
- {
- /* If main algorithm works on QImode, no epilogue is needed.
- For small sizes just don't align anything. */
- if (size_needed == 1)
- desired_align = align;
- else
- goto epilogue;
- }
+ /* If main algorithm works on QImode, no epilogue is needed.
+ For small sizes just don't align anything. */
+ if (size_needed == 1)
+ desired_align = align;
+ else
+ goto epilogue;
}
- else if (min_size < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+ else if (!count
+ && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
{
- gcc_assert (max_size >= (unsigned HOST_WIDE_INT)epilogue_size_needed);
label = gen_label_rtx ();
emit_cmp_and_jump_insns (count_exp,
GEN_INT (epilogue_size_needed),
@@ -24543,13 +24570,13 @@ ix86_output_call_insn (rtx insn, rtx call_op)
if (SIBLING_CALL_P (insn))
{
if (direct_p)
- xasm = "%!jmp\t%P0";
+ xasm = "jmp\t%P0";
/* SEH epilogue detection requires the indirect branch case
to include REX.W. */
else if (TARGET_SEH)
- xasm = "%!rex.W jmp %A0";
+ xasm = "rex.W jmp %A0";
else
- xasm = "%!jmp\t%A0";
+ xasm = "jmp\t%A0";
output_asm_insn (xasm, &call_op);
return "";
@@ -24586,9 +24613,9 @@ ix86_output_call_insn (rtx insn, rtx call_op)
}
if (direct_p)
- xasm = "%!call\t%P0";
+ xasm = "call\t%P0";
else
- xasm = "%!call\t%A0";
+ xasm = "call\t%A0";
output_asm_insn (xasm, &call_op);
@@ -29179,8 +29206,8 @@ static const struct builtin_description bdesc_multi_arg[] =
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
@@ -34753,7 +34780,6 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
case SSE_FIRST_REG:
case FP_TOP_REG:
case FP_SECOND_REG:
- case BND_REGS:
return true;
default:
@@ -35102,8 +35128,6 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
return VALID_FP_MODE_P (mode);
if (MASK_REGNO_P (regno))
return VALID_MASK_REG_MODE (mode);
- if (BND_REGNO_P (regno))
- return VALID_BND_REG_MODE (mode);
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
@@ -35917,10 +35941,6 @@ x86_order_regs_for_local_alloc (void)
for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
reg_alloc_order [pos++] = i;
- /* MPX bound registers. */
- for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
- reg_alloc_order [pos++] = i;
-
/* x87 registers. */
if (TARGET_SSE_MATH)
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
@@ -42419,18 +42439,6 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
gen_rtx_MULT (mode, op1, op2));
}
-/* Return 1 if control tansfer instruction INSN
- should be encoded with bnd prefix.
- If insn is NULL then return 1 when control
- transfer instructions should be prefixed with
- bnd by default for current function. */
-
-bool
-ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED)
-{
- return false;
-}
-
/* Calculate integer abs() using only SSE2 instructions. */
void
@@ -43681,6 +43689,184 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
return val;
}
+/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
+ CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
+ CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
+ or number of vecsize_mangle variants that should be emitted. */
+
+static int
+ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+ struct cgraph_simd_clone *clonei,
+ tree base_type, int num)
+{
+ int ret = 1;
+
+ if (clonei->simdlen
+ && (clonei->simdlen < 2
+ || clonei->simdlen > 16
+ || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
+ {
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %d\n", clonei->simdlen);
+ return 0;
+ }
+
+ tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
+ if (TREE_CODE (ret_type) != VOID_TYPE)
+ switch (TYPE_MODE (ret_type))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case SFmode:
+ case DFmode:
+ /* case SCmode: */
+ /* case DCmode: */
+ break;
+ default:
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported return type %qT for simd\n", ret_type);
+ return 0;
+ }
+
+ tree t;
+ int i;
+
+ for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
+ /* FIXME: Shouldn't we allow such arguments if they are uniform? */
+ switch (TYPE_MODE (TREE_TYPE (t)))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case SFmode:
+ case DFmode:
+ /* case SCmode: */
+ /* case DCmode: */
+ break;
+ default:
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported argument type %qT for simd\n", TREE_TYPE (t));
+ return 0;
+ }
+
+ if (clonei->cilk_elemental)
+ {
+ /* Parse here processor clause. If not present, default to 'b'. */
+ clonei->vecsize_mangle = 'b';
+ }
+ else if (!TREE_PUBLIC (node->decl))
+ {
+ /* If the function isn't exported, we can pick up just one ISA
+ for the clones. */
+ if (TARGET_AVX2)
+ clonei->vecsize_mangle = 'd';
+ else if (TARGET_AVX)
+ clonei->vecsize_mangle = 'c';
+ else
+ clonei->vecsize_mangle = 'b';
+ ret = 1;
+ }
+ else
+ {
+ clonei->vecsize_mangle = "bcd"[num];
+ ret = 3;
+ }
+ switch (clonei->vecsize_mangle)
+ {
+ case 'b':
+ clonei->vecsize_int = 128;
+ clonei->vecsize_float = 128;
+ break;
+ case 'c':
+ clonei->vecsize_int = 128;
+ clonei->vecsize_float = 256;
+ break;
+ case 'd':
+ clonei->vecsize_int = 256;
+ clonei->vecsize_float = 256;
+ break;
+ }
+ if (clonei->simdlen == 0)
+ {
+ if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
+ clonei->simdlen = clonei->vecsize_int;
+ else
+ clonei->simdlen = clonei->vecsize_float;
+ clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
+ if (clonei->simdlen > 16)
+ clonei->simdlen = 16;
+ }
+ return ret;
+}
+
+/* Add target attribute to SIMD clone NODE if needed. */
+
+static void
+ix86_simd_clone_adjust (struct cgraph_node *node)
+{
+ const char *str = NULL;
+ gcc_assert (node->decl == cfun->decl);
+ switch (node->simdclone->vecsize_mangle)
+ {
+ case 'b':
+ if (!TARGET_SSE2)
+ str = "sse2";
+ break;
+ case 'c':
+ if (!TARGET_AVX)
+ str = "avx";
+ break;
+ case 'd':
+ if (!TARGET_AVX2)
+ str = "avx2";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (str == NULL)
+ return;
+ push_cfun (NULL);
+ tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
+ bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
+ gcc_assert (ok);
+ pop_cfun ();
+ ix86_previous_fndecl = NULL_TREE;
+ ix86_set_current_function (node->decl);
+}
+
+/* If SIMD clone NODE can't be used in a vectorized loop
+ in current function, return -1, otherwise return a badness of using it
+ (0 if it is most desirable from vecsize_mangle point of view, 1
+ slightly less desirable, etc.). */
+
+static int
+ix86_simd_clone_usable (struct cgraph_node *node)
+{
+ switch (node->simdclone->vecsize_mangle)
+ {
+ case 'b':
+ if (!TARGET_SSE2)
+ return -1;
+ if (!TARGET_AVX)
+ return 0;
+ return TARGET_AVX2 ? 2 : 1;
+ case 'c':
+ if (!TARGET_AVX)
+ return -1;
+ return TARGET_AVX2 ? 1 : 0;
+ break;
+ case 'd':
+ if (!TARGET_AVX2)
+ return -1;
+ return 0;
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
static bool
@@ -44169,6 +44355,18 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#undef TARGET_SPILL_CLASS
#define TARGET_SPILL_CLASS ix86_spill_class
+#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
+#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
+ ix86_simd_clone_compute_vecsize_and_simdlen
+
+#undef TARGET_SIMD_CLONE_ADJUST
+#define TARGET_SIMD_CLONE_ADJUST \
+ ix86_simd_clone_adjust
+
+#undef TARGET_SIMD_CLONE_USABLE
+#define TARGET_SIMD_CLONE_USABLE \
+ ix86_simd_clone_usable
+
#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
ix86_float_exceptions_rounding_supported_p