diff options
Diffstat (limited to 'gcc/config/i386/i386.h')
-rw-r--r-- | gcc/config/i386/i386.h | 217 |
1 files changed, 101 insertions, 116 deletions
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7d940f98804..e820aa65ac5 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -51,6 +51,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_SSE4_2 TARGET_ISA_SSE4_2 #define TARGET_AVX TARGET_ISA_AVX #define TARGET_AVX2 TARGET_ISA_AVX2 +#define TARGET_AVX512F TARGET_ISA_AVX512F +#define TARGET_AVX512PF TARGET_ISA_AVX512PF +#define TARGET_AVX512ER TARGET_ISA_AVX512ER +#define TARGET_AVX512CD TARGET_ISA_AVX512CD #define TARGET_FMA TARGET_ISA_FMA #define TARGET_SSE4A TARGET_ISA_SSE4A #define TARGET_FMA4 TARGET_ISA_FMA4 @@ -170,7 +174,7 @@ struct processor_costs { const int fsqrt; /* cost of FSQRT instruction. */ /* Specify what algorithm to use for stringops on unknown size. */ - struct stringop_algs memcpy[2], memset[2]; + struct stringop_algs *memcpy, *memset; const int scalar_stmt_cost; /* Cost of any scalar operation, excluding load and store. */ const int scalar_load_cost; /* Cost of scalar load. */ @@ -261,81 +265,11 @@ extern const struct processor_costs ix86_size_cost; /* Feature tests against the various tunings. */ enum ix86_tune_indices { - X86_TUNE_USE_LEAVE, - X86_TUNE_PUSH_MEMORY, - X86_TUNE_ZERO_EXTEND_WITH_AND, - X86_TUNE_UNROLL_STRLEN, - X86_TUNE_BRANCH_PREDICTION_HINTS, - X86_TUNE_DOUBLE_WITH_ADD, - X86_TUNE_USE_SAHF, - X86_TUNE_MOVX, - X86_TUNE_PARTIAL_REG_STALL, - X86_TUNE_PARTIAL_FLAG_REG_STALL, - X86_TUNE_LCP_STALL, - X86_TUNE_USE_HIMODE_FIOP, - X86_TUNE_USE_SIMODE_FIOP, - X86_TUNE_USE_MOV0, - X86_TUNE_USE_CLTD, - X86_TUNE_USE_XCHGB, - X86_TUNE_SPLIT_LONG_MOVES, - X86_TUNE_READ_MODIFY_WRITE, - X86_TUNE_READ_MODIFY, - X86_TUNE_PROMOTE_QIMODE, - X86_TUNE_FAST_PREFIX, - X86_TUNE_SINGLE_STRINGOP, - X86_TUNE_QIMODE_MATH, - X86_TUNE_HIMODE_MATH, - X86_TUNE_PROMOTE_QI_REGS, - X86_TUNE_PROMOTE_HI_REGS, - X86_TUNE_SINGLE_POP, - X86_TUNE_DOUBLE_POP, - X86_TUNE_SINGLE_PUSH, - X86_TUNE_DOUBLE_PUSH, - X86_TUNE_INTEGER_DFMODE_MOVES, - X86_TUNE_PARTIAL_REG_DEPENDENCY, - X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, - X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, - X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, - X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, - X86_TUNE_SSE_SPLIT_REGS, - X86_TUNE_SSE_TYPELESS_STORES, - X86_TUNE_SSE_LOAD0_BY_PXOR, - X86_TUNE_MEMORY_MISMATCH_STALL, - X86_TUNE_PROLOGUE_USING_MOVE, - X86_TUNE_EPILOGUE_USING_MOVE, - X86_TUNE_SHIFT1, - X86_TUNE_USE_FFREEP, - X86_TUNE_INTER_UNIT_MOVES_TO_VEC, - X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, - X86_TUNE_INTER_UNIT_CONVERSIONS, - X86_TUNE_FOUR_JUMP_LIMIT, - X86_TUNE_SCHEDULE, - X86_TUNE_USE_BT, - X86_TUNE_USE_INCDEC, - X86_TUNE_PAD_RETURNS, - X86_TUNE_PAD_SHORT_FUNCTION, - X86_TUNE_EXT_80387_CONSTANTS, - X86_TUNE_AVOID_VECTOR_DECODE, - X86_TUNE_PROMOTE_HIMODE_IMUL, - X86_TUNE_SLOW_IMUL_IMM32_MEM, - X86_TUNE_SLOW_IMUL_IMM8, - X86_TUNE_MOVE_M1_VIA_OR, - X86_TUNE_NOT_UNPAIRABLE, - X86_TUNE_NOT_VECTORMODE, - X86_TUNE_USE_VECTOR_FP_CONVERTS, - X86_TUNE_USE_VECTOR_CONVERTS, - X86_TUNE_FUSE_CMP_AND_BRANCH, - X86_TUNE_OPT_AGU, - X86_TUNE_VECTORIZE_DOUBLE, - X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, - X86_TUNE_AVX128_OPTIMAL, - X86_TUNE_REASSOC_INT_TO_PARALLEL, - X86_TUNE_REASSOC_FP_TO_PARALLEL, - X86_TUNE_GENERAL_REGS_SSE_SPILL, - X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, - X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, - - X86_TUNE_LAST +#undef DEF_TUNE +#define DEF_TUNE(tune, name, selector) tune, +#include "x86-tune.def" +#undef DEF_TUNE +X86_TUNE_LAST }; extern unsigned char ix86_tune_features[X86_TUNE_LAST]; @@ -802,7 +736,8 @@ enum target_cpu_default Pentium+ prefers DFmode values to be aligned to 64 bit boundary and Pentium Pro XFmode values at 128 bit boundaries. */ -#define BIGGEST_ALIGNMENT (TARGET_AVX ? 256 : 128) +#define BIGGEST_ALIGNMENT \ + (TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128)) /* Maximum stack alignment. */ #define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT @@ -958,7 +893,7 @@ enum target_cpu_default eliminated during reloading in favor of either the stack or frame pointer. */ -#define FIRST_PSEUDO_REGISTER 53 +#define FIRST_PSEUDO_REGISTER 69 /* Number of hardware registers that go into the DWARF-2 unwind info. If not defined, equals FIRST_PSEUDO_REGISTER. */ @@ -984,6 +919,10 @@ enum target_cpu_default /* r8, r9, r10, r11, r12, r13, r14, r15*/ \ 0, 0, 0, 0, 0, 0, 0, 0, \ /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ +/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \ 0, 0, 0, 0, 0, 0, 0, 0 } /* 1 for registers not available across function calls. @@ -1012,7 +951,11 @@ enum target_cpu_default /* r8, r9, r10, r11, r12, r13, r14, r15*/ \ 1, 1, 1, 1, 2, 2, 2, 2, \ /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \ - 6, 6, 6, 6, 6, 6, 6, 6 } + 6, 6, 6, 6, 6, 6, 6, 6, \ +/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \ + 6, 6, 6, 6, 6, 6, 6, 6, \ +/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \ + 6, 6, 6, 6, 6, 6, 6, 6 } /* Order in which to allocate registers. Each register must be listed once, even those in FIXED_REGISTERS. List frame pointer @@ -1027,7 +970,8 @@ enum target_cpu_default { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \ - 48, 49, 50, 51, 52 } + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, \ + 63, 64, 65, 66, 67, 68 } /* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order to be rearranged based on a particular function. When using sse math, @@ -1073,6 +1017,14 @@ enum target_cpu_default #define VALID_AVX256_REG_OR_OI_MODE(MODE) \ (VALID_AVX256_REG_MODE (MODE) || (MODE) == OImode) +#define VALID_AVX512F_SCALAR_MODE(MODE) \ + ((MODE) == DImode || (MODE) == DFmode || (MODE) == SImode \ + || (MODE) == SFmode) + +#define VALID_AVX512F_REG_MODE(MODE) \ + ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \ + || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode) + #define VALID_SSE2_REG_MODE(MODE) \ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ || (MODE) == V2DImode || (MODE) == DFmode) @@ -1112,7 +1064,9 @@ enum target_cpu_default || (MODE) == V2DImode || (MODE) == V4SFmode || (MODE) == V4SImode \ || (MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \ || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \ - || (MODE) == V2TImode) + || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode \ + || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \ + || (MODE) == V16SFmode) /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ @@ -1175,15 +1129,18 @@ enum target_cpu_default #define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1) #define LAST_SSE_REG (FIRST_SSE_REG + 7) -#define FIRST_MMX_REG (LAST_SSE_REG + 1) +#define FIRST_MMX_REG (LAST_SSE_REG + 1) /*29*/ #define LAST_MMX_REG (FIRST_MMX_REG + 7) -#define FIRST_REX_INT_REG (LAST_MMX_REG + 1) +#define FIRST_REX_INT_REG (LAST_MMX_REG + 1) /*37*/ #define LAST_REX_INT_REG (FIRST_REX_INT_REG + 7) -#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1) +#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1) /*45*/ #define LAST_REX_SSE_REG (FIRST_REX_SSE_REG + 7) +#define FIRST_EXT_REX_SSE_REG (LAST_REX_SSE_REG + 1) /*53*/ +#define LAST_EXT_REX_SSE_REG (FIRST_EXT_REX_SSE_REG + 15) /*68*/ + /* Override this in other tm.h files to cope with various OS lossage requiring a frame pointer. */ #ifndef SUBTARGET_FRAME_POINTER_REQUIRED @@ -1263,6 +1220,8 @@ enum reg_class FLOAT_REGS, SSE_FIRST_REG, SSE_REGS, + EVEX_SSE_REGS, + ALL_SSE_REGS, MMX_REGS, FP_TOP_SSE_REGS, FP_SECOND_SSE_REGS, @@ -1280,7 +1239,7 @@ enum reg_class #define FLOAT_CLASS_P(CLASS) \ reg_class_subset_p ((CLASS), FLOAT_REGS) #define SSE_CLASS_P(CLASS) \ - reg_class_subset_p ((CLASS), SSE_REGS) + reg_class_subset_p ((CLASS), ALL_SSE_REGS) #define MMX_CLASS_P(CLASS) \ ((CLASS) == MMX_REGS) #define MAYBE_INTEGER_CLASS_P(CLASS) \ @@ -1288,13 +1247,16 @@ enum reg_class #define MAYBE_FLOAT_CLASS_P(CLASS) \ reg_classes_intersect_p ((CLASS), FLOAT_REGS) #define MAYBE_SSE_CLASS_P(CLASS) \ - reg_classes_intersect_p (SSE_REGS, (CLASS)) + reg_classes_intersect_p ((CLASS), ALL_SSE_REGS) #define MAYBE_MMX_CLASS_P(CLASS) \ - reg_classes_intersect_p (MMX_REGS, (CLASS)) + reg_classes_intersect_p ((CLASS), MMX_REGS) #define Q_CLASS_P(CLASS) \ reg_class_subset_p ((CLASS), Q_REGS) +#define MAYBE_NON_Q_CLASS_P(CLASS) \ + reg_classes_intersect_p ((CLASS), NON_Q_REGS) + /* Give names of register classes as strings for dump file. */ #define REG_CLASS_NAMES \ @@ -1311,6 +1273,8 @@ enum reg_class "FLOAT_REGS", \ "SSE_FIRST_REG", \ "SSE_REGS", \ + "EVEX_SSE_REGS", \ + "ALL_SSE_REGS", \ "MMX_REGS", \ "FP_TOP_SSE_REGS", \ "FP_SECOND_SSE_REGS", \ @@ -1326,30 +1290,36 @@ enum reg_class Note that CLOBBERED_REGS are calculated by TARGET_CONDITIONAL_REGISTER_USAGE. */ -#define REG_CLASS_CONTENTS \ -{ { 0x00, 0x0 }, \ - { 0x01, 0x0 }, { 0x02, 0x0 }, /* AREG, DREG */ \ - { 0x04, 0x0 }, { 0x08, 0x0 }, /* CREG, BREG */ \ - { 0x10, 0x0 }, { 0x20, 0x0 }, /* SIREG, DIREG */ \ - { 0x03, 0x0 }, /* AD_REGS */ \ - { 0x0f, 0x0 }, /* Q_REGS */ \ - { 0x1100f0, 0x1fe0 }, /* NON_Q_REGS */ \ - { 0x7f, 0x1fe0 }, /* INDEX_REGS */ \ - { 0x1100ff, 0x0 }, /* LEGACY_REGS */ \ - { 0x00, 0x0 }, /* CLOBBERED_REGS */ \ - { 0x1100ff, 0x1fe0 }, /* GENERAL_REGS */ \ - { 0x100, 0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\ - { 0xff00, 0x0 }, /* FLOAT_REGS */ \ - { 0x200000, 0x0 }, /* SSE_FIRST_REG */ \ -{ 0x1fe00000,0x1fe000 }, /* SSE_REGS */ \ -{ 0xe0000000, 0x1f }, /* MMX_REGS */ \ -{ 0x1fe00100,0x1fe000 }, /* FP_TOP_SSE_REG */ \ -{ 0x1fe00200,0x1fe000 }, /* FP_SECOND_SSE_REG */ \ -{ 0x1fe0ff00,0x1fe000 }, /* FLOAT_SSE_REGS */ \ - { 0x11ffff, 0x1fe0 }, /* FLOAT_INT_REGS */ \ -{ 0x1ff100ff,0x1fffe0 }, /* INT_SSE_REGS */ \ -{ 0x1ff1ffff,0x1fffe0 }, /* FLOAT_INT_SSE_REGS */ \ -{ 0xffffffff,0x1fffff } \ +#define REG_CLASS_CONTENTS \ +{ { 0x00, 0x0, 0x0 }, \ + { 0x01, 0x0, 0x0 }, /* AREG */ \ + { 0x02, 0x0, 0x0 }, /* DREG */ \ + { 0x04, 0x0, 0x0 }, /* CREG */ \ + { 0x08, 0x0, 0x0 }, /* BREG */ \ + { 0x10, 0x0, 0x0 }, /* SIREG */ \ + { 0x20, 0x0, 0x0 }, /* DIREG */ \ + { 0x03, 0x0, 0x0 }, /* AD_REGS */ \ + { 0x0f, 0x0, 0x0 }, /* Q_REGS */ \ + { 0x1100f0, 0x1fe0, 0x0 }, /* NON_Q_REGS */ \ + { 0x7f, 0x1fe0, 0x0 }, /* INDEX_REGS */ \ + { 0x1100ff, 0x0, 0x0 }, /* LEGACY_REGS */ \ + { 0x07, 0x0, 0x0 }, /* CLOBBERED_REGS */ \ + { 0x1100ff, 0x1fe0, 0x0 }, /* GENERAL_REGS */ \ + { 0x100, 0x0, 0x0 }, /* FP_TOP_REG */ \ + { 0x0200, 0x0, 0x0 }, /* FP_SECOND_REG */ \ + { 0xff00, 0x0, 0x0 }, /* FLOAT_REGS */ \ + { 0x200000, 0x0, 0x0 }, /* SSE_FIRST_REG */ \ +{ 0x1fe00000, 0x1fe000, 0x0 }, /* SSE_REGS */ \ + { 0x0,0xffe00000, 0x1f }, /* EVEX_SSE_REGS */ \ +{ 0x1fe00000,0xffffe000, 0x1f }, /* ALL_SSE_REGS */ \ +{ 0xe0000000, 0x1f, 0x0 }, /* MMX_REGS */ \ +{ 0x1fe00100,0xffffe000, 0x1f }, /* FP_TOP_SSE_REG */ \ +{ 0x1fe00200,0xffffe000, 0x1f }, /* FP_SECOND_SSE_REG */ \ +{ 0x1fe0ff00,0xffffe000, 0x1f }, /* FLOAT_SSE_REGS */ \ +{ 0x11ffff, 0x1fe0, 0x0 }, /* FLOAT_INT_REGS */ \ +{ 0x1ff100ff,0xffffffe0, 0x1f }, /* INT_SSE_REGS */ \ +{ 0x1ff1ffff,0xffffffe0, 0x1f }, /* FLOAT_INT_SSE_REGS */ \ +{ 0xffffffff,0xffffffff, 0x1f } \ } /* The same information, inverted: @@ -1393,13 +1363,20 @@ enum reg_class #define SSE_REG_P(X) (REG_P (X) && SSE_REGNO_P (REGNO (X))) #define SSE_REGNO_P(N) \ (IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG) \ - || REX_SSE_REGNO_P (N)) + || REX_SSE_REGNO_P (N) \ + || EXT_REX_SSE_REGNO_P (N)) #define REX_SSE_REGNO_P(N) \ IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG) +#define EXT_REX_SSE_REGNO_P(N) \ + IN_RANGE ((N), FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG) + #define SSE_REGNO(N) \ - ((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8) + ((N) < 8 ? FIRST_SSE_REG + (N) \ + : (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \ + : (FIRST_EXT_REX_SSE_REG + (N) - 16)) + #define SSE_FLOAT_MODE_P(MODE) \ ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode)) @@ -1952,7 +1929,11 @@ do { \ "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", \ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", \ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"} + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", \ + "xmm16", "xmm17", "xmm18", "xmm19", \ + "xmm20", "xmm21", "xmm22", "xmm23", \ + "xmm24", "xmm25", "xmm26", "xmm27", \ + "xmm28", "xmm29", "xmm30", "xmm31" } #define REGISTER_NAMES HI_REGISTER_NAMES @@ -2273,9 +2254,13 @@ enum avx_u128_state scheduling just increases amount of live registers at time and in the turn amount of fxch instructions needed. - ??? Maybe Pentium chips benefits from renaming, someone can try.... */ + ??? Maybe Pentium chips benefits from renaming, someone can try.... + + Don't rename evex to non-evex sse registers. */ -#define HARD_REGNO_RENAME_OK(SRC, TARGET) !STACK_REGNO_P (SRC) +#define HARD_REGNO_RENAME_OK(SRC, TARGET) (!STACK_REGNO_P (SRC) && \ + (EXT_REX_SSE_REGNO_P (SRC) == \ + EXT_REX_SSE_REGNO_P (TARGET))) #define FASTCALL_PREFIX '@' |