summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2021-08-05 12:05:02 +0100
committerSimon Pilgrim <llvm-dev@redking.me.uk>2021-08-05 13:09:23 +0100
commite78bf49a58ed0beca9fa594bddd61a0655fcedb0 (patch)
tree8a1c144ab6e22d62694bfc7a22e8d8c257badab4
parentcc947e29ea0afa6efa506ffbf74115879be64c6d (diff)
downloadllvm-e78bf49a58ed0beca9fa594bddd61a0655fcedb0.tar.gz
[X86] Rename Subtarget Tuning Feature Flag Prefix. NFC.
As suggested on D107370, this patch renames the tuning feature flags to start with 'Tuning' instead of 'Feature'. Differential Revision: https://reviews.llvm.org/D107459
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp6
-rw-r--r--llvm/lib/Target/X86/X86.td432
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp6
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h76
4 files changed, 261 insertions, 259 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 83f3614ded1a..d15fb54b37f1 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -1086,11 +1086,11 @@ unsigned X86AsmBackend::getMaximumNopSize() const {
return 4;
if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
return 1;
- if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
return 7;
- if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
return 15;
- if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
return 11;
// FIXME: handle 32-bit mode
// 15-bytes is the longest single NOP instruction, but 10-bytes is
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 213cddc287c7..cce126b01684 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -367,120 +367,120 @@ def FeatureLVILoadHardening
// X86 Subtarget Tuning features
//===----------------------------------------------------------------------===//
-def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
+def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
-def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
+def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
"PMULLD instruction is slow">;
-def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
+def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
"true",
"PMADDWD is slower than PMULLD">;
// FIXME: This should not apply to CPUs that do not have SSE.
-def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
"IsUAMem16Slow", "true",
"Slow unaligned 16-byte memory access">;
-def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
+def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
"IsUAMem32Slow", "true",
"Slow unaligned 32-byte memory access">;
-def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
-def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
+def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
"Use 8-bit divide for positive values less than 256">;
-def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
+def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
"HasSlowDivide64", "true",
"Use 32-bit divide for positive values less than 2^32">;
-def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
+def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
-def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
+def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
"SlowTwoMemOps", "true",
"Two memory operand instructions are slow">;
-def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
-def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
+def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
-def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
+def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
"LEA instruction with 3 ops or certain registers is slow">;
-def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
+def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
-def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
+def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
"HasPOPCNTFalseDeps", "true",
"POPCNT has a false dependency on dest register">;
-def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
+def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
"HasLZCNTFalseDeps", "true",
"LZCNT/TZCNT have a false dependency on dest register">;
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
-def FeatureFastVariableCrossLaneShuffle
+def TuningFastVariableCrossLaneShuffle
: SubtargetFeature<"fast-variable-crosslane-shuffle",
"HasFastVariableCrossLaneShuffle",
"true", "Cross-lane shuffles with variable masks are fast">;
-def FeatureFastVariablePerLaneShuffle
+def TuningFastVariablePerLaneShuffle
: SubtargetFeature<"fast-variable-perlane-shuffle",
"HasFastVariablePerLaneShuffle",
"true", "Per-lane shuffles with variable masks are fast">;
// On some X86 processors, a vzeroupper instruction should be inserted after
// using ymm/zmm registers before executing code that may use SSE instructions.
-def FeatureInsertVZEROUPPER
+def TuningInsertVZEROUPPER
: SubtargetFeature<"vzeroupper",
"InsertVZEROUPPER",
"true", "Should insert vzeroupper instructions">;
-// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
-// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
+// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
+// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
// vector FSQRT has higher throughput than the corresponding NR code.
// The idea is that throughput bound code is likely to be vectorized, so for
// vectorized code we should care about the throughput of SQRT operations.
// But if the code is scalar that probably means that the code has some kind of
// dependency and we should care more about reducing the latency.
-def FeatureFastScalarFSQRT
+def TuningFastScalarFSQRT
: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
-def FeatureFastVectorFSQRT
+def TuningFastVectorFSQRT
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
// be used to replace test/set sequences.
-def FeatureFastLZCNT
+def TuningFastLZCNT
: SubtargetFeature<
"fast-lzcnt", "HasFastLZCNT", "true",
"LZCNT instructions are as fast as most simple integer ops">;
// If the target can efficiently decode NOPs upto 7-bytes in length.
-def FeatureFast7ByteNOP
+def TuningFast7ByteNOP
: SubtargetFeature<
"fast-7bytenop", "HasFast7ByteNOP", "true",
"Target can quickly decode up to 7 byte NOPs">;
// If the target can efficiently decode NOPs upto 11-bytes in length.
-def FeatureFast11ByteNOP
+def TuningFast11ByteNOP
: SubtargetFeature<
"fast-11bytenop", "HasFast11ByteNOP", "true",
"Target can quickly decode up to 11 byte NOPs">;
// If the target can efficiently decode NOPs upto 15-bytes in length.
-def FeatureFast15ByteNOP
+def TuningFast15ByteNOP
: SubtargetFeature<
"fast-15bytenop", "HasFast15ByteNOP", "true",
"Target can quickly decode up to 15 byte NOPs">;
@@ -488,21 +488,21 @@ def FeatureFast15ByteNOP
// Sandy Bridge and newer processors can use SHLD with the same source on both
// inputs to implement rotate to avoid the partial flag update of the normal
// rotate instructions.
-def FeatureFastSHLDRotate
+def TuningFastSHLDRotate
: SubtargetFeature<
"fast-shld-rotate", "HasFastSHLDRotate", "true",
"SHLD can be used as a faster rotate">;
// Bulldozer and newer processors can merge CMP/TEST (but not other
// instructions) with conditional branches.
-def FeatureBranchFusion
+def TuningBranchFusion
: SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
"CMP/TEST can be fused with conditional branches">;
// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single
// operation.
-def FeatureMacroFusion
+def TuningMacroFusion
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
"Various instructions can be fused with conditional branches">;
@@ -510,50 +510,50 @@ def FeatureMacroFusion
// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
// Skylake Client processor has faster Gathers than HSW and performance is
// similar to Skylake Server (AVX-512).
-def FeatureFastGather
+def TuningFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast">;
-def FeaturePrefer128Bit
+def TuningPrefer128Bit
: SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
"Prefer 128-bit AVX instructions">;
-def FeaturePrefer256Bit
+def TuningPrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
-def FeaturePreferMaskRegisters
+def TuningPreferMaskRegisters
: SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
"Prefer AVX512 mask registers over PTEST/MOVMSK">;
-def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
+def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
"Indicates that the BEXTR instruction is implemented as a single uop "
"with good throughput">;
// Combine vector math operations with shuffles into horizontal math
// instructions if a CPU implements horizontal operations (introduced with
// SSE3) with better latency/throughput than the alternative sequence.
-def FeatureFastHorizontalOps
+def TuningFastHorizontalOps
: SubtargetFeature<
"fast-hops", "HasFastHorizontalOps", "true",
"Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
"normal vector instructions with shuffles">;
-def FeatureFastScalarShiftMasks
+def TuningFastScalarShiftMasks
: SubtargetFeature<
"fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
"Prefer a left/right scalar logical shift pair over a shift+and pair">;
-def FeatureFastVectorShiftMasks
+def TuningFastVectorShiftMasks
: SubtargetFeature<
"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
"Prefer a left/right vector logical shift pair over a shift+and pair">;
-def FeatureFastMOVBE
+def TuningFastMOVBE
: SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
"Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
-def FeatureUseGLMDivSqrtCosts
+def TuningUseGLMDivSqrtCosts
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
"Use Goldmont specific floating point div/sqrt costs">;
@@ -631,8 +631,8 @@ def ProcessorFeatures {
// Nehalem
list<SubtargetFeature> NHMFeatures = X86_64V2Features;
- list<SubtargetFeature> NHMTuning = [FeatureMacroFusion,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
+ TuningInsertVZEROUPPER];
// Westmere
list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
@@ -644,15 +644,15 @@ def ProcessorFeatures {
list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
FeatureXSAVE,
FeatureXSAVEOPT];
- list<SubtargetFeature> SNBTuning = [FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowUAMem32,
- FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowUAMem32,
+ TuningFastScalarFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SNBFeatures =
!listconcat(WSMFeatures, SNBAdditionalFeatures);
@@ -673,17 +673,17 @@ def ProcessorFeatures {
FeatureINVPCID,
FeatureLZCNT,
FeatureMOVBE];
- list<SubtargetFeature> HSWTuning = [FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePOPCNTFalseDeps,
- FeatureLZCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPOPCNTFalseDeps,
+ TuningLZCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> HSWFeatures =
!listconcat(IVBFeatures, HSWAdditionalFeatures);
@@ -700,18 +700,18 @@ def ProcessorFeatures {
FeatureXSAVEC,
FeatureXSAVES,
FeatureCLFLUSHOPT];
- list<SubtargetFeature> SKLTuning = [FeatureFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SKLTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SKLFeatures =
!listconcat(BDWFeatures, SKLAdditionalFeatures);
@@ -727,19 +727,19 @@ def ProcessorFeatures {
FeatureVLX,
FeaturePKU,
FeatureCLWB];
- list<SubtargetFeature> SKXTuning = [FeatureFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePrefer256Bit,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SKXTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SKXFeatures =
!listconcat(BDWFeatures, SKXAdditionalFeatures);
@@ -765,18 +765,18 @@ def ProcessorFeatures {
FeatureVBMI,
FeatureIFMA,
FeatureSHA];
- list<SubtargetFeature> CNLTuning = [FeatureFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePrefer256Bit,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> CNLTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> CNLFeatures =
!listconcat(SKLFeatures, CNLAdditionalFeatures);
@@ -846,14 +846,14 @@ def ProcessorFeatures {
FeatureMOVBE,
FeatureLAHFSAHF];
list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
- FeatureSlowUAMem16,
- FeatureLEAForSP,
- FeatureSlowDivide32,
- FeatureSlowDivide64,
- FeatureSlowTwoMemOps,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions,
- FeatureInsertVZEROUPPER];
+ TuningSlowUAMem16,
+ TuningLEAForSP,
+ TuningSlowDivide32,
+ TuningSlowDivide64,
+ TuningSlowTwoMemOps,
+ TuningLEAUsesAG,
+ TuningPadShortFunctions,
+ TuningInsertVZEROUPPER];
// Silvermont
list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
@@ -862,15 +862,15 @@ def ProcessorFeatures {
FeaturePRFCHW,
FeatureRDRAND];
list<SubtargetFeature> SLMTuning = [ProcIntelSLM,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureSlowDivide64,
- FeatureSlowPMULLD,
- FeatureFast7ByteNOP,
- FeatureFastMOVBE,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningSlowDivide64,
+ TuningSlowPMULLD,
+ TuningFast7ByteNOP,
+ TuningFastMOVBE,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SLMFeatures =
!listconcat(AtomFeatures, SLMAdditionalFeatures);
@@ -884,25 +884,25 @@ def ProcessorFeatures {
FeatureXSAVES,
FeatureCLFLUSHOPT,
FeatureFSGSBase];
- list<SubtargetFeature> GLMTuning = [FeatureUseGLMDivSqrtCosts,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureFastMOVBE,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningFastMOVBE,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> GLMFeatures =
!listconcat(SLMFeatures, GLMAdditionalFeatures);
// Goldmont Plus
list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
FeatureRDPID];
- list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureFastMOVBE,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningFastMOVBE,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> GLPFeatures =
!listconcat(GLMFeatures, GLPAdditionalFeatures);
@@ -969,14 +969,14 @@ def ProcessorFeatures {
FeatureBMI2,
FeatureFMA,
FeaturePRFCHW];
- list<SubtargetFeature> KNLTuning = [FeatureSlowDivide64,
- FeatureSlow3OpsLEA,
- FeatureSlowIncDec,
- FeatureSlowTwoMemOps,
- FeaturePreferMaskRegisters,
- FeatureFastGather,
- FeatureFastMOVBE,
- FeatureSlowPMADDWD];
+ list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
+ TuningSlow3OpsLEA,
+ TuningSlowIncDec,
+ TuningSlowTwoMemOps,
+ TuningPreferMaskRegisters,
+ TuningFastGather,
+ TuningFastMOVBE,
+ TuningSlowPMADDWD];
// TODO Add AVX5124FMAPS/AVX5124VNNIW features
list<SubtargetFeature> KNMFeatures =
!listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
@@ -995,9 +995,9 @@ def ProcessorFeatures {
FeatureLAHFSAHF,
FeatureCMOV,
Feature64Bit];
- list<SubtargetFeature> BarcelonaTuning = [FeatureFastScalarShiftMasks,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
// Bobcat
list<SubtargetFeature> BtVer1Features = [FeatureX87,
@@ -1014,11 +1014,11 @@ def ProcessorFeatures {
FeatureLZCNT,
FeaturePOPCNT,
FeatureLAHFSAHF];
- list<SubtargetFeature> BtVer1Tuning = [FeatureFast15ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureFastVectorShiftMasks,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningFastVectorShiftMasks,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
// Jaguar
list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
@@ -1029,14 +1029,14 @@ def ProcessorFeatures {
FeatureMOVBE,
FeatureXSAVE,
FeatureXSAVEOPT];
- list<SubtargetFeature> BtVer2Tuning = [FeatureFastLZCNT,
- FeatureFastBEXTR,
- FeatureFastHorizontalOps,
- FeatureFast15ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureFastVectorShiftMasks,
- FeatureFastMOVBE,
- FeatureSlowSHLD];
+ list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
+ TuningFastBEXTR,
+ TuningFastHorizontalOps,
+ TuningFast15ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningFastVectorShiftMasks,
+ TuningFastMOVBE,
+ TuningSlowSHLD];
list<SubtargetFeature> BtVer2Features =
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
@@ -1058,19 +1058,19 @@ def ProcessorFeatures {
FeatureXSAVE,
FeatureLWP,
FeatureLAHFSAHF];
- list<SubtargetFeature> BdVer1Tuning = [FeatureSlowSHLD,
- FeatureFast11ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureBranchFusion,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
+ TuningFast11ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningBranchFusion,
+ TuningInsertVZEROUPPER];
// PileDriver
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
FeatureBMI,
FeatureTBM,
FeatureFMA];
- list<SubtargetFeature> BdVer2AdditionalTuning = [FeatureFastBEXTR,
- FeatureFastMOVBE];
+ list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
+ TuningFastMOVBE];
list<SubtargetFeature> BdVer2Tuning =
!listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
list<SubtargetFeature> BdVer2Features =
@@ -1127,14 +1127,14 @@ def ProcessorFeatures {
FeatureXSAVEC,
FeatureXSAVEOPT,
FeatureXSAVES];
- list<SubtargetFeature> ZNTuning = [FeatureFastLZCNT,
- FeatureFastBEXTR,
- FeatureFast15ByteNOP,
- FeatureBranchFusion,
- FeatureFastScalarShiftMasks,
- FeatureFastMOVBE,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
+ TuningFastBEXTR,
+ TuningFast15ByteNOP,
+ TuningBranchFusion,
+ TuningFastScalarShiftMasks,
+ TuningFastMOVBE,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
FeatureWBNOINVD];
@@ -1147,8 +1147,8 @@ def ProcessorFeatures {
FeatureVAES,
FeatureVPCLMULQDQ];
list<SubtargetFeature> ZN3AdditionalTuning =
- [FeatureMacroFusion,
- FeatureFastVariablePerLaneShuffle];
+ [TuningMacroFusion,
+ TuningFastVariablePerLaneShuffle];
list<SubtargetFeature> ZN3Tuning =
!listconcat(ZNTuning, ZN3AdditionalTuning);
list<SubtargetFeature> ZN3Features =
@@ -1175,37 +1175,37 @@ class ProcModel<string Name, SchedMachineModel Model,
// It has no effect on code generation.
def : ProcModel<"generic", SandyBridgeModel,
[FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
- [FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowIncDec,
- FeatureMacroFusion,
- FeatureInsertVZEROUPPER]>;
+ [TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowIncDec,
+ TuningMacroFusion,
+ TuningInsertVZEROUPPER]>;
def : Proc<"i386", [FeatureX87],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i486", [FeatureX87],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
FeatureNOPL],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
FeatureFXSR, FeatureNOPL],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["pentium3", "pentium3m"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -1221,30 +1221,30 @@ foreach P = ["pentium3", "pentium3m"] in {
def : ProcModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcModel<P, GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
// Intel Quark.
def : Proc<"lakemont", [FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// Intel Core Duo.
def : ProcModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// NetBurst.
def : ProcModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : ProcModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureCMPXCHG8B,
@@ -1257,8 +1257,8 @@ def : ProcModel<"nocona", GenericPostRAModel, [
FeatureCMPXCHG16B,
],
[
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
// Intel Core 2 Solo/Duo.
@@ -1275,9 +1275,9 @@ def : ProcModel<"core2", SandyBridgeModel, [
FeatureLAHFSAHF
],
[
- FeatureMacroFusion,
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningMacroFusion,
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
def : ProcModel<"penryn", SandyBridgeModel, [
FeatureX87,
@@ -1292,9 +1292,9 @@ def : ProcModel<"penryn", SandyBridgeModel, [
FeatureLAHFSAHF
],
[
- FeatureMacroFusion,
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningMacroFusion,
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
// Atom CPUs.
@@ -1379,37 +1379,37 @@ def : ProcModel<"alderlake", SkylakeClientModel,
// AMD CPUs.
def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["athlon", "athlon-tbird"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
FeatureNOPL],
- [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
- [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
- [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER]>;
+ [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
+ TuningInsertVZEROUPPER]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
Feature64Bit],
- [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER]>;
+ [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
+ TuningInsertVZEROUPPER]>;
}
foreach P = ["amdfam10", "barcelona"] in {
@@ -1445,17 +1445,17 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"winchip2", [FeatureX87, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"c3", [FeatureX87, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1469,11 +1469,11 @@ def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
// forming a common base for them.
def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
[
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowIncDec,
- FeatureMacroFusion,
- FeatureInsertVZEROUPPER
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowIncDec,
+ TuningMacroFusion,
+ TuningInsertVZEROUPPER
]>;
// x86-64 micro-architecture levels.
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 7d916f917d5e..384c54aa4eb6 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1094,11 +1094,11 @@ static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
if (Subtarget->is64Bit()) {
// FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
// IndexReg/BaseReg below need to be updated.
- if (Subtarget->hasFeature(X86::FeatureFast7ByteNOP))
+ if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
MaxNopLength = 7;
- else if (Subtarget->hasFeature(X86::FeatureFast15ByteNOP))
+ else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
MaxNopLength = 15;
- else if (Subtarget->hasFeature(X86::FeatureFast11ByteNOP))
+ else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
MaxNopLength = 11;
else
MaxNopLength = 10;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 45257c33a009..a89206857ba8 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -45,48 +45,50 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::FeatureCMPXCHG16B,
X86::FeatureLAHFSAHF,
- // Codegen control options.
- X86::FeatureFast11ByteNOP,
- X86::FeatureFast15ByteNOP,
- X86::FeatureFastBEXTR,
- X86::FeatureFastHorizontalOps,
- X86::FeatureFastLZCNT,
- X86::FeatureFastScalarFSQRT,
- X86::FeatureFastSHLDRotate,
- X86::FeatureFastScalarShiftMasks,
- X86::FeatureFastVectorShiftMasks,
- X86::FeatureFastVariableCrossLaneShuffle,
- X86::FeatureFastVariablePerLaneShuffle,
- X86::FeatureFastVectorFSQRT,
- X86::FeatureLEAForSP,
- X86::FeatureLEAUsesAG,
- X86::FeatureLZCNTFalseDeps,
- X86::FeatureBranchFusion,
- X86::FeatureMacroFusion,
- X86::FeaturePadShortFunctions,
- X86::FeaturePOPCNTFalseDeps,
+ // Some older targets can be setup to fold unaligned loads.
X86::FeatureSSEUnalignedMem,
- X86::FeatureSlow3OpsLEA,
- X86::FeatureSlowDivide32,
- X86::FeatureSlowDivide64,
- X86::FeatureSlowIncDec,
- X86::FeatureSlowLEA,
- X86::FeatureSlowPMADDWD,
- X86::FeatureSlowPMULLD,
- X86::FeatureSlowSHLD,
- X86::FeatureSlowTwoMemOps,
- X86::FeatureSlowUAMem16,
- X86::FeaturePreferMaskRegisters,
- X86::FeatureInsertVZEROUPPER,
- X86::FeatureUseGLMDivSqrtCosts,
+
+ // Codegen control options.
+ X86::TuningFast11ByteNOP,
+ X86::TuningFast15ByteNOP,
+ X86::TuningFastBEXTR,
+ X86::TuningFastHorizontalOps,
+ X86::TuningFastLZCNT,
+ X86::TuningFastScalarFSQRT,
+ X86::TuningFastSHLDRotate,
+ X86::TuningFastScalarShiftMasks,
+ X86::TuningFastVectorShiftMasks,
+ X86::TuningFastVariableCrossLaneShuffle,
+ X86::TuningFastVariablePerLaneShuffle,
+ X86::TuningFastVectorFSQRT,
+ X86::TuningLEAForSP,
+ X86::TuningLEAUsesAG,
+ X86::TuningLZCNTFalseDeps,
+ X86::TuningBranchFusion,
+ X86::TuningMacroFusion,
+ X86::TuningPadShortFunctions,
+ X86::TuningPOPCNTFalseDeps,
+ X86::TuningSlow3OpsLEA,
+ X86::TuningSlowDivide32,
+ X86::TuningSlowDivide64,
+ X86::TuningSlowIncDec,
+ X86::TuningSlowLEA,
+ X86::TuningSlowPMADDWD,
+ X86::TuningSlowPMULLD,
+ X86::TuningSlowSHLD,
+ X86::TuningSlowTwoMemOps,
+ X86::TuningSlowUAMem16,
+ X86::TuningPreferMaskRegisters,
+ X86::TuningInsertVZEROUPPER,
+ X86::TuningUseGLMDivSqrtCosts,
// Perf-tuning flags.
- X86::FeatureFastGather,
- X86::FeatureSlowUAMem32,
+ X86::TuningFastGather,
+ X86::TuningSlowUAMem32,
// Based on whether user set the -mprefer-vector-width command line.
- X86::FeaturePrefer128Bit,
- X86::FeaturePrefer256Bit,
+ X86::TuningPrefer128Bit,
+ X86::TuningPrefer256Bit,
// CPU name enums. These just follow CPU string.
X86::ProcIntelAtom,