diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r-- | gcc/config/i386/i386.c | 291 |
1 files changed, 216 insertions, 75 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d6c2de8198..7c28a559a4 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2374,6 +2374,7 @@ static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); +static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); @@ -2677,6 +2678,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, { "-mmpx", OPTION_MASK_ISA_MPX }, { "-mclwb", OPTION_MASK_ISA_CLWB }, { "-mpcommit", OPTION_MASK_ISA_PCOMMIT }, + { "-mmwaitx", OPTION_MASK_ISA_MWAITX }, }; /* Flag options. */ @@ -2988,6 +2990,17 @@ ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) return; } + if ((stringop_alg) i == rep_prefix_8_byte + && !TARGET_64BIT) + { + /* rep; movq isn't available in 32-bit code. */ + error ("stringop strategy name %s specified for option %s " + "not supported for 32-bit code", + alg_name, + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + input_ranges[n].max = maxs; input_ranges[n].alg = (stringop_alg) i; if (!strcmp (align, "align")) @@ -3179,6 +3192,7 @@ ix86_option_override_internal (bool main_args_p, #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54) #define PTA_CLWB (HOST_WIDE_INT_1 << 55) #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56) +#define PTA_MWAITX (HOST_WIDE_INT_1 << 57) #define PTA_CORE2 \ (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \ @@ -3332,7 +3346,7 @@ ix86_option_override_internal (bool main_args_p, | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND - | PTA_MOVBE}, + | PTA_MOVBE | PTA_MWAITX}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW @@ -3776,6 +3790,9 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) x86_prefetch_sse = true; + if (processor_alias_table[i].flags & PTA_MWAITX + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX; break; } @@ -4206,6 +4223,7 @@ ix86_option_override_internal (bool main_args_p, ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; ix86_gen_probe_stack_range = gen_probe_stack_rangedi; ix86_gen_monitor = gen_sse3_monitor_di; + ix86_gen_monitorx = gen_monitorx_di; } else { @@ -4218,6 +4236,7 @@ ix86_option_override_internal (bool main_args_p, ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; ix86_gen_probe_stack_range = gen_probe_stack_rangesi; ix86_gen_monitor = gen_sse3_monitor_si; + ix86_gen_monitorx = gen_monitorx_si; } #ifdef USE_IX86_CLD @@ -4742,6 +4761,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), IX86_ATTR_ISA ("clwb", OPT_mclwb), IX86_ATTR_ISA ("pcommit", OPT_mpcommit), + IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -5867,7 +5887,10 @@ ix86_function_regparm (const_tree type, const_tree decl) /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and DFmode (2) arguments in SSE registers for a function with the indicated TYPE and DECL. DECL may be NULL when calling function - indirectly or considering a libcall. Otherwise return 0. */ + indirectly or considering a libcall. Return -1 if any FP parameter + should be rejected by error. This is used in siutation we imply SSE + calling convetion but the function is called from another function with + SSE disabled. Otherwise return 0. */ static int ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) @@ -5916,14 +5939,13 @@ ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) { /* Refuse to produce wrong code when local function with SSE enabled is called from SSE disabled function. - We may work hard to work out these scenarios but hopefully - it doesnot matter in practice. */ + FIXME: We need a way to detect these cases cross-ltrans partition + and avoid using SSE calling conventions on local functions called + from function with SSE disabled. For now at least delay the + warning until we know we are going to produce wrong code. + See PR66047 */ if (!TARGET_SSE && warn) - { - error ("calling %qD with SSE caling convention without " - "SSE/SSE2 enabled", decl); - return 0; - } + return -1; return TARGET_SSE2_P (target_opts_for_fn (target->decl) ->x_ix86_isa_flags) ? 2 : 1; } @@ -6118,6 +6140,7 @@ bool ix86_function_arg_regno_p (int regno) { int i; + enum calling_abi call_abi; const int *parm_regs; if (TARGET_MPX && BND_REGNO_P (regno)) @@ -6143,16 +6166,18 @@ ix86_function_arg_regno_p (int regno) /* TODO: The function should depend on current function ABI but builtins.c would need updating then. Therefore we use the default ABI. */ + call_abi = ix86_cfun_abi (); /* RAX is used as hidden argument to va_arg functions. */ - if (ix86_abi == SYSV_ABI && regno == AX_REG) + if (call_abi == SYSV_ABI && regno == AX_REG) return true; - if (ix86_abi == MS_ABI) + if (call_abi == MS_ABI) parm_regs = x86_64_ms_abi_int_parameter_registers; else parm_regs = x86_64_int_parameter_registers; - for (i = 0; i < (ix86_abi == MS_ABI + + for (i = 0; i < (call_abi == MS_ABI ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) if (regno == parm_regs[i]) return true; @@ -6479,6 +6504,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ cum->bnd_regno = FIRST_BND_REG; cum->bnds_in_bt = 0; cum->force_bnd_pass = 0; + cum->decl = fndecl; if (!TARGET_64BIT) { @@ -7424,6 +7450,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, HOST_WIDE_INT words) { int res = 0; + bool error_p = NULL; switch (mode) { @@ -7456,9 +7483,13 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, gcc_unreachable (); case DFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 2) break; case SFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 1) break; /* FALLTHRU */ @@ -7514,6 +7545,14 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, } break; } + if (error_p) + { + cum->float_in_sse = 0; + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", cum->decl); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } return res; } @@ -7646,10 +7685,11 @@ ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, (otherwise it is an extra parameter matching an ellipsis). */ static rtx -function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode, +function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, machine_mode orig_mode, const_tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) { + bool error_p = false; /* Avoid the AL settings for the Unix64 ABI. */ if (mode == VOIDmode) return constm1_rtx; @@ -7690,9 +7730,13 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode, break; case DFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 2) break; case SFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 1) break; /* FALLTHRU */ @@ -7751,6 +7795,14 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode, } break; } + if (error_p) + { + cum->float_in_sse = 0; + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", cum->decl); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } return NULL_RTX; } @@ -8154,10 +8206,10 @@ ix86_function_value_regno_p (const unsigned int regno) case AX_REG: return true; case DX_REG: - return (!TARGET_64BIT || ix86_abi != MS_ABI); + return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); case DI_REG: case SI_REG: - return TARGET_64BIT && ix86_abi != MS_ABI; + return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; case FIRST_BND_REG: return chkp_function_instrumented_p (current_function_decl); @@ -8168,7 +8220,7 @@ ix86_function_value_regno_p (const unsigned int regno) /* TODO: The function should depend on current function ABI but builtins.c would need updating then. Therefore we use the default ABI. */ - if (TARGET_64BIT && ix86_abi == MS_ABI) + if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) return false; return TARGET_FLOAT_RETURNS_IN_80387; @@ -8230,8 +8282,15 @@ function_value_32 (machine_mode orig_mode, machine_mode mode, if ((fn || fntype) && (mode == SFmode || mode == DFmode)) { int sse_level = ix86_function_sseregparm (fntype, fn, false); - if ((sse_level >= 1 && mode == SFmode) - || (sse_level == 2 && mode == DFmode)) + if (sse_level == -1) + { + error ("calling %qD with SSE caling convention without " + "SSE/SSE2 enabled", fn); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } + else if ((sse_level >= 1 && mode == SFmode) + || (sse_level == 2 && mode == DFmode)) regno = FIRST_SSE_REG; } @@ -22943,7 +23002,7 @@ ix86_split_long_move (rtx operands[]) Do an lea to the last part and use only one colliding move. */ else if (collisions > 1) { - rtx base; + rtx base, addr, tls_base = NULL_RTX; collisions = 1; @@ -22954,10 +23013,50 @@ ix86_split_long_move (rtx operands[]) if (GET_MODE (base) != Pmode) base = gen_rtx_REG (Pmode, REGNO (base)); - emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); + addr = XEXP (part[1][0], 0); + if (TARGET_TLS_DIRECT_SEG_REFS) + { + struct ix86_address parts; + int ok = ix86_decompose_address (addr, &parts); + gcc_assert (ok); + if (parts.seg == DEFAULT_TLS_SEG_REG) + { + /* It is not valid to use %gs: or %fs: in + lea though, so we need to remove it from the + address used for lea and add it to each individual + memory loads instead. */ + addr = copy_rtx (addr); + rtx *x = &addr; + while (GET_CODE (*x) == PLUS) + { + for (i = 0; i < 2; i++) + { + rtx u = XEXP (*x, i); + if (GET_CODE (u) == ZERO_EXTEND) + u = XEXP (u, 0); + if (GET_CODE (u) == UNSPEC + && XINT (u, 1) == UNSPEC_TP) + { + tls_base = XEXP (*x, i); + *x = XEXP (*x, 1 - i); + break; + } + } + if (tls_base) + break; + x = &XEXP (*x, 0); + } + gcc_assert (tls_base); + } + } + emit_insn (gen_rtx_SET (VOIDmode, base, addr)); + if (tls_base) + base = gen_rtx_PLUS (GET_MODE (base), base, tls_base); part[1][0] = replace_equiv_address (part[1][0], base); for (i = 1; i < nparts; i++) { + if (tls_base) + base = copy_rtx (base); tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i); part[1][i] = replace_equiv_address (part[1][i], tmp); } @@ -30581,6 +30680,10 @@ enum ix86_builtins IX86_BUILTIN_CVTPS2PH, IX86_BUILTIN_CVTPS2PH256, + /* MONITORX and MWAITX instrucions. */ + IX86_BUILTIN_MONITORX, + IX86_BUILTIN_MWAITX, + /* CFString built-in for darwin */ IX86_BUILTIN_CFSTRING, @@ -34199,6 +34302,12 @@ ix86_init_mmx_sse_builtins (void) def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb", VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); + /* MONITORX and MWAITX. */ + def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx", + VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX); + def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx", + VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX); + /* Add FMA4 multi-arg argument instructions */ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) { @@ -38971,6 +39080,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, return 0; case IX86_BUILTIN_MONITOR: + case IX86_BUILTIN_MONITORX: arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); arg2 = CALL_EXPR_ARG (exp, 2); @@ -38983,7 +39093,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, op1 = copy_to_mode_reg (SImode, op1); if (!REG_P (op2)) op2 = copy_to_mode_reg (SImode, op2); - emit_insn (ix86_gen_monitor (op0, op1, op2)); + + emit_insn (fcode == IX86_BUILTIN_MONITOR + ? ix86_gen_monitor (op0, op1, op2) + : ix86_gen_monitorx (op0, op1, op2)); return 0; case IX86_BUILTIN_MWAIT: @@ -38998,6 +39111,22 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_insn (gen_sse3_mwait (op0, op1)); return 0; + case IX86_BUILTIN_MWAITX: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); + emit_insn (gen_mwaitx (op0, op1, op2)); + return 0; + case IX86_BUILTIN_VEC_INIT_V2SI: case IX86_BUILTIN_VEC_INIT_V4HI: case IX86_BUILTIN_VEC_INIT_V8QI: @@ -44740,6 +44869,8 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } }; int i, j, n; + machine_mode mmode = VOIDmode; + rtx (*gen_blendm) (rtx, rtx, rtx, rtx); switch (mode) { @@ -44956,81 +45087,65 @@ half: case V8DFmode: if (TARGET_AVX512F) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512f_blendmv8df (target, tmp, target, - force_reg (QImode, GEN_INT (1 << elt)))); - return; + mmode = QImode; + gen_blendm = gen_avx512f_blendmv8df; } - else - break; + break; + case V8DImode: if (TARGET_AVX512F) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512f_blendmv8di (target, tmp, target, - force_reg (QImode, GEN_INT (1 << elt)))); - return; + mmode = QImode; + gen_blendm = gen_avx512f_blendmv8di; } - else - break; + break; + case V16SFmode: if (TARGET_AVX512F) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512f_blendmv16sf (target, tmp, target, - force_reg (HImode, GEN_INT (1 << elt)))); - return; + mmode = HImode; + gen_blendm = gen_avx512f_blendmv16si; } - else - break; + break; + case V16SImode: if (TARGET_AVX512F) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512f_blendmv16si (target, tmp, target, - force_reg (HImode, GEN_INT (1 << elt)))); - return; + mmode = HImode; + gen_blendm = gen_avx512f_blendmv16si; } - else - break; + break; + case V32HImode: if (TARGET_AVX512F && TARGET_AVX512BW) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target, - force_reg (SImode, GEN_INT (1 << elt)))); - return; + mmode = SImode; + gen_blendm = gen_avx512bw_blendmv32hi; } - else - break; + break; + case V64QImode: if (TARGET_AVX512F && TARGET_AVX512BW) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target, - force_reg (DImode, GEN_INT (1 << elt)))); - return; + mmode = DImode; + gen_blendm = gen_avx512bw_blendmv64qi; } - else - break; + break; default: break; } - if (use_vec_merge) + if (mmode != VOIDmode) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_blendm (target, tmp, target, + force_reg (mmode, + gen_int_mode (1 << elt, mmode)))); + } + else if (use_vec_merge) { tmp = gen_rtx_VEC_DUPLICATE (mode, val); tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); @@ -46892,15 +47007,16 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1, static bool expand_vec_perm_blend (struct expand_vec_perm_d *d) { - machine_mode vmode = d->vmode; + machine_mode mmode, vmode = d->vmode; unsigned i, mask, nelt = d->nelt; - rtx target, op0, op1, x; + rtx target, op0, op1, maskop, x; rtx rperm[32], vperm; if (d->one_operand_p) return false; if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 - && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4) + && (TARGET_AVX512BW + || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)) ; else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) ; @@ -47074,8 +47190,33 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d) gcc_unreachable (); } + switch (vmode) + { + case V8DFmode: + case V8DImode: + mmode = QImode; + break; + case V16SFmode: + case V16SImode: + mmode = HImode; + break; + case V32HImode: + mmode = SImode; + break; + case V64QImode: + mmode = DImode; + break; + default: + mmode = VOIDmode; + } + + if (mmode != VOIDmode) + maskop = force_reg (mmode, gen_int_mode (mask, mmode)); + else + maskop = GEN_INT (mask); + /* This matches five different patterns with the different modes. */ - x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask)); + x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop); x = gen_rtx_SET (VOIDmode, target, x); emit_insn (x); if (target != d->target) @@ -51606,7 +51747,7 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop) for (i = 0; i < loop->num_nodes; i++) FOR_BB_INSNS (bbs[i], insn) if (NONDEBUG_INSN_P (insn)) - FOR_EACH_SUBRTX (iter, array, insn, NONCONST) + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) if (const_rtx x = *iter) if (MEM_P (x)) { |