summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Christopherson <seanjc@google.com>2023-04-10 23:08:16 +0000
committerSean Christopherson <seanjc@google.com>2023-04-10 23:08:16 +0000
commit4d663d83a561344d237c77a77b651f55eb7f7356 (patch)
treede7bedfdfd31e9174796b8da7265307fc191cfc8
parentf45906b86ec8e7ec3bf0d5bbe7d206f1d2e34076 (diff)
parentdfdeda67ea2dac57d2d7506d65cfe5a0878ad285 (diff)
downloadlinux-next-4d663d83a561344d237c77a77b651f55eb7f7356.tar.gz
Merge branch 'pmu'
* pmu: (29 commits) KVM: x86/pmu: Prevent the PMU from counting disallowed events KVM: x86/pmu: Fix a typo in kvm_pmu_request_counter_reprogam() KVM: x86/pmu: Rewrite reprogram_counters() to improve performance KVM: VMX: Refactor intel_pmu_{g,}set_msr() to align with other helpers KVM: x86/pmu: Rename pmc_is_enabled() to pmc_is_globally_enabled() KVM: selftests: Verify LBRs are disabled if vPMU is disabled KVM: selftests: Add negative testcase for PEBS format in PERF_CAPABILITIES KVM: selftests: Refactor LBR_FMT test to avoid use of separate macro KVM: selftests: Drop "all done!" printf() from PERF_CAPABILITIES test KVM: selftests: Test post-KVM_RUN writes to PERF_CAPABILITIES KVM: selftests: Expand negative testing of guest writes to PERF_CAPABILITIES KVM: selftests: Test all immutable non-format bits in PERF_CAPABILITIES KVM: selftests: Test all fungible features in PERF_CAPABILITIES KVM: selftests: Drop now-redundant checks on PERF_CAPABILITIES writes KVM: selftests: Verify KVM preserves userspace writes to "durable" MSRs KVM: selftests: Print out failing MSR and value in vcpu_set_msr() KVM: selftests: Assert that full-width PMC writes are supported if PDCM=1 KVM: selftests: Move 0/initial value PERF_CAPS checks to dedicated sub-test KVM: x86/pmu: Zero out LBR capabilities during PMU refresh KVM: x86/pmu: WARN and bug the VM if PMU is refreshed after vCPU has run ...
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/pmu.c21
-rw-r--r--arch/x86/kvm/pmu.h2
-rw-r--r--arch/x86/kvm/svm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c135
-rw-r--r--arch/x86/kvm/vmx/vmx.c16
-rw-r--r--arch/x86/kvm/x86.c102
-rw-r--r--arch/x86/kvm/x86.h13
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h45
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c232
13 files changed, 419 insertions, 157 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 76464480ed8b..2865c3cb3501 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -513,6 +513,7 @@ struct kvm_pmc {
#define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
#define KVM_AMD_PMC_MAX_GENERIC 6
struct kvm_pmu {
+ u8 version;
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
unsigned available_event_types;
@@ -525,7 +526,6 @@ struct kvm_pmu {
u64 global_ovf_ctrl_mask;
u64 reserved_bits;
u64 raw_event_mask;
- u8 version;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6972e0be60fa..b944492faefa 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -414,7 +414,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
* KVM_SET_CPUID{,2} again. To support this legacy behavior, check
* whether the supplied CPUID data is equal to what's already set.
*/
- if (vcpu->arch.last_vmentry_cpu != -1) {
+ if (kvm_vcpu_has_run(vcpu)) {
r = kvm_cpuid_check_equal(vcpu, e2, nent);
if (r)
return r;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e3d02f059437..c8961f45e3b1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5476,7 +5476,7 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
* Changing guest CPUID after KVM_RUN is forbidden, see the comment in
* kvm_arch_vcpu_ioctl().
*/
- KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
+ KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm);
}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index bb1733bb5bf9..1690d41c1830 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -93,7 +93,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
-static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
+static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
{
return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
}
@@ -400,6 +400,12 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
return is_fixed_event_allowed(filter, pmc->idx);
}
+static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
+{
+ return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
+ check_pmu_event_filter(pmc);
+}
+
static void reprogram_counter(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -409,10 +415,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
pmc_pause_counter(pmc);
- if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
- goto reprogram_complete;
-
- if (!check_pmu_event_filter(pmc))
+ if (!pmc_event_is_allowed(pmc))
goto reprogram_complete;
if (pmc->counter < pmc->prev_counter)
@@ -589,6 +592,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
{
+ if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
+ return;
+
+ bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
static_call(kvm_x86_pmu_refresh)(vcpu);
}
@@ -646,7 +653,7 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
{
pmc->prev_counter = pmc->counter;
pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
- kvm_pmu_request_counter_reprogam(pmc);
+ kvm_pmu_request_counter_reprogram(pmc);
}
static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
@@ -684,7 +691,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
- if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
+ if (!pmc || !pmc_event_is_allowed(pmc))
continue;
/* Ignore checks for edge detect, pin control, invert and CMASK bits */
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index be62c16f2265..5c7bbf03b599 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -195,7 +195,7 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
KVM_PMC_MAX_FIXED);
}
-static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc)
+static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
{
set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index cc77a0681800..5fa939e411d8 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -161,7 +161,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
data &= ~pmu->reserved_bits;
if (data != pmc->eventsel) {
pmc->eventsel = data;
- kvm_pmu_request_counter_reprogam(pmc);
+ kvm_pmu_request_counter_reprogram(pmc);
}
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 3875f45c9452..eb308c9994f9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4173,7 +4173,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
return false;
case MSR_IA32_SMBASE:
if (!IS_ENABLED(CONFIG_KVM_SMM))
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index e8a3be0b9df9..741efe2c497b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -57,7 +57,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
- kvm_pmu_request_counter_reprogam(pmc);
+ kvm_pmu_request_counter_reprogram(pmc);
}
}
@@ -76,13 +76,13 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
{
int bit;
- struct kvm_pmc *pmc;
- for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
- pmc = intel_pmc_idx_to_pmc(pmu, bit);
- if (pmc)
- kvm_pmu_request_counter_reprogam(pmc);
- }
+ if (!diff)
+ return;
+
+ for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+ set_bit(bit, pmu->reprogram_pmi);
+ kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu));
}
static bool intel_hw_event_available(struct kvm_pmc *pmc)
@@ -351,45 +351,47 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
msr_info->data = pmu->fixed_ctr_ctrl;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_STATUS:
msr_info->data = pmu->global_status;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_CTRL:
msr_info->data = pmu->global_ctrl;
- return 0;
+ break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = 0;
- return 0;
+ break;
case MSR_IA32_PEBS_ENABLE:
msr_info->data = pmu->pebs_enable;
- return 0;
+ break;
case MSR_IA32_DS_AREA:
msr_info->data = pmu->ds_area;
- return 0;
+ break;
case MSR_PEBS_DATA_CFG:
msr_info->data = pmu->pebs_data_cfg;
- return 0;
+ break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
u64 val = pmc_read_counter(pmc);
msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_GP];
- return 0;
+ break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
u64 val = pmc_read_counter(pmc);
msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_FIXED];
- return 0;
+ break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
msr_info->data = pmc->eventsel;
- return 0;
- } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true))
- return 0;
+ break;
+ } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) {
+ break;
+ }
+ return 1;
}
- return 1;
+ return 0;
}
static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -402,44 +404,43 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
- if (pmu->fixed_ctr_ctrl == data)
- return 0;
- if (!(data & pmu->fixed_ctr_ctrl_mask)) {
+ if (data & pmu->fixed_ctr_ctrl_mask)
+ return 1;
+
+ if (pmu->fixed_ctr_ctrl != data)
reprogram_fixed_counters(pmu, data);
- return 0;
- }
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
- if (msr_info->host_initiated) {
- pmu->global_status = data;
- return 0;
- }
- break; /* RO MSR */
+ if (!msr_info->host_initiated)
+ return 1; /* RO MSR */
+
+ pmu->global_status = data;
+ break;
case MSR_CORE_PERF_GLOBAL_CTRL:
- if (pmu->global_ctrl == data)
- return 0;
- if (kvm_valid_perf_global_ctrl(pmu, data)) {
+ if (!kvm_valid_perf_global_ctrl(pmu, data))
+ return 1;
+
+ if (pmu->global_ctrl != data) {
diff = pmu->global_ctrl ^ data;
pmu->global_ctrl = data;
reprogram_counters(pmu, diff);
- return 0;
}
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if (!(data & pmu->global_ovf_ctrl_mask)) {
- if (!msr_info->host_initiated)
- pmu->global_status &= ~data;
- return 0;
- }
+ if (data & pmu->global_ovf_ctrl_mask)
+ return 1;
+
+ if (!msr_info->host_initiated)
+ pmu->global_status &= ~data;
break;
case MSR_IA32_PEBS_ENABLE:
- if (pmu->pebs_enable == data)
- return 0;
- if (!(data & pmu->pebs_enable_mask)) {
+ if (data & pmu->pebs_enable_mask)
+ return 1;
+
+ if (pmu->pebs_enable != data) {
diff = pmu->pebs_enable ^ data;
pmu->pebs_enable = data;
reprogram_counters(pmu, diff);
- return 0;
}
break;
case MSR_IA32_DS_AREA:
@@ -447,15 +448,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
if (is_noncanonical_address(data, vcpu))
return 1;
+
pmu->ds_area = data;
- return 0;
+ break;
case MSR_PEBS_DATA_CFG:
- if (pmu->pebs_data_cfg == data)
- return 0;
- if (!(data & pmu->pebs_data_cfg_mask)) {
- pmu->pebs_data_cfg = data;
- return 0;
- }
+ if (data & pmu->pebs_data_cfg_mask)
+ return 1;
+
+ pmu->pebs_data_cfg = data;
break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
@@ -463,33 +463,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
(data & ~pmu->counter_bitmask[KVM_PMC_GP]))
return 1;
+
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
- return 0;
+ break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
- return 0;
+ break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
- if (data == pmc->eventsel)
- return 0;
reserved_bits = pmu->reserved_bits;
if ((pmc->idx == 2) &&
(pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED))
reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
- if (!(data & reserved_bits)) {
+ if (data & reserved_bits)
+ return 1;
+
+ if (data != pmc->eventsel) {
pmc->eventsel = data;
- kvm_pmu_request_counter_reprogam(pmc);
- return 0;
+ kvm_pmu_request_counter_reprogram(pmc);
}
- } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
- return 0;
+ break;
+ } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) {
+ break;
+ }
+ /* Not a known PMU MSR. */
+ return 1;
}
- return 1;
+ return 0;
}
static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
@@ -531,6 +536,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
+ memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
+
+ /*
+ * Setting passthrough of LBR MSRs is done only in the VM-Entry loop,
+ * and PMU refresh is disallowed after the vCPU has run, i.e. this code
+ * should never be reached while KVM is passing through MSRs.
+ */
+ if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm))
+ return;
+
entry = kvm_find_cpuid_entry(vcpu, 0xa);
if (!entry || !vcpu->kvm->arch.enable_pmu)
return;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 829db017bbb2..44fb619803b8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1946,7 +1946,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
{
switch (msr->index) {
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested)
return 1;
return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
@@ -2031,7 +2031,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
break;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested_vmx_allowed(vcpu))
return 1;
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
@@ -2340,7 +2340,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->msr_ia32_sgxlepubkeyhash
[msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data;
break;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!msr_info->host_initiated)
return 1; /* they are read-only */
if (!nested_vmx_allowed(vcpu))
@@ -6930,7 +6930,7 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
* real mode.
*/
return enable_unrestricted_guest || emulate_invalid_guest_state;
- case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
return nested;
case MSR_AMD64_VIRT_SPEC_CTRL:
case MSR_AMD64_TSC_RATIO:
@@ -7756,9 +7756,11 @@ static u64 vmx_get_perf_capabilities(void)
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
- x86_perf_get_lbr(&lbr);
- if (lbr.nr)
- perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
+ x86_perf_get_lbr(&lbr);
+ if (lbr.nr)
+ perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+ }
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eb87a2e1ad71..d3da3acd99f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1556,39 +1556,41 @@ static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
static unsigned num_emulated_msrs;
/*
- * List of msr numbers which are used to expose MSR-based features that
- * can be used by a hypervisor to validate requested CPU features.
+ * List of MSRs that control the existence of MSR-based features, i.e. MSRs
+ * that are effectively CPUID leafs. VMX MSRs are also included in the set of
+ * feature MSRs, but are handled separately to allow expedited lookups.
*/
-static const u32 msr_based_features_all[] = {
- MSR_IA32_VMX_BASIC,
- MSR_IA32_VMX_TRUE_PINBASED_CTLS,
- MSR_IA32_VMX_PINBASED_CTLS,
- MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
- MSR_IA32_VMX_PROCBASED_CTLS,
- MSR_IA32_VMX_TRUE_EXIT_CTLS,
- MSR_IA32_VMX_EXIT_CTLS,
- MSR_IA32_VMX_TRUE_ENTRY_CTLS,
- MSR_IA32_VMX_ENTRY_CTLS,
- MSR_IA32_VMX_MISC,
- MSR_IA32_VMX_CR0_FIXED0,
- MSR_IA32_VMX_CR0_FIXED1,
- MSR_IA32_VMX_CR4_FIXED0,
- MSR_IA32_VMX_CR4_FIXED1,
- MSR_IA32_VMX_VMCS_ENUM,
- MSR_IA32_VMX_PROCBASED_CTLS2,
- MSR_IA32_VMX_EPT_VPID_CAP,
- MSR_IA32_VMX_VMFUNC,
-
+static const u32 msr_based_features_all_except_vmx[] = {
MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
};
-static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
+static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
+ (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
static unsigned int num_msr_based_features;
/*
+ * All feature MSRs except uCode revID, which tracks the currently loaded uCode
+ * patch, are immutable once the vCPU model is defined.
+ */
+static bool kvm_is_immutable_feature_msr(u32 msr)
+{
+ int i;
+
+ if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) {
+ if (msr == msr_based_features_all_except_vmx[i])
+ return msr != MSR_IA32_UCODE_REV;
+ }
+
+ return false;
+}
+
+/*
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
* does not yet virtualize. These include:
* 10 - MISC_PACKAGE_CTRLS
@@ -2205,6 +2207,22 @@ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
+ u64 val;
+
+ /*
+ * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does
+ * not support modifying the guest vCPU model on the fly, e.g. changing
+ * the nVMX capabilities while L2 is running is nonsensical. Ignore
+ * writes of the same value, e.g. to allow userspace to blindly stuff
+ * all MSRs when emulating RESET.
+ */
+ if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) {
+ if (do_get_msr(vcpu, index, &val) || *data != val)
+ return -EINVAL;
+
+ return 0;
+ }
+
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}
@@ -3627,9 +3645,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~kvm_caps.supported_perf_cap)
return 1;
+ /*
+ * Note, this is not just a performance optimization! KVM
+ * disallows changing feature MSRs after the vCPU has run; PMU
+ * refresh will bug the VM if called after the vCPU has run.
+ */
+ if (vcpu->arch.perf_capabilities == data)
+ break;
+
vcpu->arch.perf_capabilities = data;
kvm_pmu_refresh(vcpu);
- return 0;
+ break;
case MSR_IA32_PRED_CMD:
if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu))
return 1;
@@ -7048,6 +7074,18 @@ out:
return r;
}
+static void kvm_probe_feature_msr(u32 msr_index)
+{
+ struct kvm_msr_entry msr = {
+ .index = msr_index,
+ };
+
+ if (kvm_get_msr_feature(&msr))
+ return;
+
+ msr_based_features[num_msr_based_features++] = msr_index;
+}
+
static void kvm_probe_msr_to_save(u32 msr_index)
{
u32 dummy[2];
@@ -7123,7 +7161,7 @@ static void kvm_probe_msr_to_save(u32 msr_index)
msrs_to_save[num_msrs_to_save++] = msr_index;
}
-static void kvm_init_msr_list(void)
+static void kvm_init_msr_lists(void)
{
unsigned i;
@@ -7149,15 +7187,11 @@ static void kvm_init_msr_list(void)
emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
}
- for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
- struct kvm_msr_entry msr;
+ for (i = KVM_FIRST_EMULATED_VMX_MSR; i <= KVM_LAST_EMULATED_VMX_MSR; i++)
+ kvm_probe_feature_msr(i);
- msr.index = msr_based_features_all[i];
- if (kvm_get_msr_feature(&msr))
- continue;
-
- msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
- }
+ for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++)
+ kvm_probe_feature_msr(msr_based_features_all_except_vmx[i]);
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -9491,7 +9525,7 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_caps.max_guest_tsc_khz = max;
}
kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
- kvm_init_msr_list();
+ kvm_init_msr_lists();
return 0;
out_unwind_ops:
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 203fb6640b5b..fbef05c0bdeb 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -40,6 +40,14 @@ void kvm_spurious_fault(void);
failed; \
})
+/*
+ * The first...last VMX feature MSRs that are emulated by KVM. This may or may
+ * not cover all known VMX MSRs, as KVM doesn't emulate an MSR until there's an
+ * associated feature that KVM supports for nested virtualization.
+ */
+#define KVM_FIRST_EMULATED_VMX_MSR MSR_IA32_VMX_BASIC
+#define KVM_LAST_EMULATED_VMX_MSR MSR_IA32_VMX_VMFUNC
+
#define KVM_DEFAULT_PLE_GAP 128
#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
#define KVM_DEFAULT_PLE_WINDOW_GROW 2
@@ -83,6 +91,11 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
+static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.last_vmentry_cpu != -1;
+}
+
static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending ||
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 3538fa6db72d..d73a3daa8d32 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -942,14 +942,45 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
-static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index,
- uint64_t msr_value)
-{
- int r = _vcpu_set_msr(vcpu, msr_index, msr_value);
-
- TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
-}
+/*
+ * Assert on an MSR access(es) and pretty print the MSR name when possible.
+ * Note, the caller provides the stringified name so that the name of macro is
+ * printed, not the value the macro resolves to (due to macro expansion).
+ */
+#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \
+do { \
+ if (__builtin_constant_p(msr)) { \
+ TEST_ASSERT(cond, fmt, str, args); \
+ } else if (!(cond)) { \
+ char buf[16]; \
+ \
+ snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \
+ TEST_ASSERT(cond, fmt, buf, args); \
+ } \
+} while (0)
+/*
+ * Returns true if KVM should return the last written value when reading an MSR
+ * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
+ * is changing, etc. This is NOT an exhaustive list! The intent is to filter
+ * out MSRs that are not durable _and_ that a selftest wants to write.
+ */
+static inline bool is_durable_msr(uint32_t msr)
+{
+ return msr != MSR_IA32_TSC;
+}
+
+#define vcpu_set_msr(vcpu, msr, val) \
+do { \
+ uint64_t r, v = val; \
+ \
+ TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
+ "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
+ if (!is_durable_msr(msr)) \
+ break; \
+ r = vcpu_get_msr(vcpu, msr); \
+ TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
+} while (0)
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
index 2933b1bd754e..4c90f76930f9 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -14,12 +14,11 @@
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
#include "kvm_util.h"
#include "vmx.h"
-#define PMU_CAP_FW_WRITES (1ULL << 13)
-#define PMU_CAP_LBR_FMT 0x3f
-
union perf_capabilities {
struct {
u64 lbr_format:6;
@@ -36,60 +35,221 @@ union perf_capabilities {
u64 capabilities;
};
-static void guest_code(void)
+/*
+ * The LBR format and most PEBS features are immutable, all other features are
+ * fungible (if supported by the host and KVM).
+ */
+static const union perf_capabilities immutable_caps = {
+ .lbr_format = -1,
+ .pebs_trap = 1,
+ .pebs_arch_reg = 1,
+ .pebs_format = -1,
+ .pebs_baseline = 1,
+};
+
+static const union perf_capabilities format_caps = {
+ .lbr_format = -1,
+ .pebs_format = -1,
+};
+
+static void guest_code(uint64_t current_val)
{
- wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+ uint8_t vector;
+ int i;
+
+ vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val);
+ GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector);
+
+ vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0);
+ GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector);
+
+ for (i = 0; i < 64; i++) {
+ vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES,
+ current_val ^ BIT_ULL(i));
+ GUEST_ASSERT_2(vector == GP_VECTOR,
+ current_val ^ BIT_ULL(i), vector);
+ }
+
+ GUEST_DONE();
}
-int main(int argc, char *argv[])
+/*
+ * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ * written, that the guest always sees the userspace controlled value, and that
+ * PERF_CAPABILITIES is immutable after KVM_RUN.
+ */
+static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
{
- struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- int ret;
- union perf_capabilities host_cap;
- uint64_t val;
+ struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ struct ucall uc;
+ int r, i;
- host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
- host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
- TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+ vcpu_args_set(vcpu, 1, host_cap.capabilities);
+ vcpu_run(vcpu);
- TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
- TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu");
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
- /* testcase 1, set capabilities when we have PDCM bit */
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+ ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities);
- /* check capabilities can be retrieved with KVM_GET_MSR */
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
- /* check whatever we write with KVM_SET_MSR is _not_ modified */
- vcpu_run(vcpu);
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(i));
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+ host_cap.capabilities ^ BIT_ULL(i));
+ }
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ * enabled, as well as '0' (to disable all features).
+ */
+static void test_basic_perf_capabilities(union perf_capabilities host_cap)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
- /* testcase 2, check valid LBR formats are accepted */
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), 0);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ kvm_vm_free(vm);
+}
- vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
+static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
+{
+ const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ int bit;
+
+ for_each_set_bit(bit, &fungible_caps, 64) {
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities & ~BIT_ULL(bit));
+ }
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated
+ * separately as they are multi-bit values, e.g. toggling or setting a single
+ * bit can generate a false positive without dedicated safeguards.
+ */
+static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
+{
+ const uint64_t reserved_caps = (~host_cap.capabilities |
+ immutable_caps.capabilities) &
+ ~format_caps.capabilities;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ union perf_capabilities val = host_cap;
+ int r, bit;
+
+ for_each_set_bit(bit, &reserved_caps, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(bit));
+ TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+ host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+ BIT_ULL(bit), bit);
+ }
/*
- * Testcase 3, check that an "invalid" LBR format is rejected. Only an
- * exact match of the host's format (and 0/disabled) is allowed.
+ * KVM only supports the host's native LBR format, as well as '0' (to
+ * disable LBR support). Verify KVM rejects all other LBR formats.
*/
- for (val = 1; val <= PMU_CAP_LBR_FMT; val++) {
- if (val == (host_cap.capabilities & PMU_CAP_LBR_FMT))
+ for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+ if (val.lbr_format == host_cap.lbr_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+ val.lbr_format, host_cap.lbr_format);
+ }
+
+ /* Ditto for the PEBS format. */
+ for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+ if (val.pebs_format == host_cap.pebs_format)
continue;
- ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val);
- TEST_ASSERT(!ret, "Bad LBR FMT = 0x%lx didn't fail", val);
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+ val.pebs_format, host_cap.pebs_format);
}
- printf("Completed perf capability tests.\n");
kvm_vm_free(vm);
}
+
+/*
+ * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of
+ * LBR_TOS as those bits are writable across all uarch implementations (arch
+ * LBRs will need to poke a different MSR).
+ */
+static void test_lbr_perf_capabilities(union perf_capabilities host_cap)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int r;
+
+ if (!host_cap.lbr_format)
+ return;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+
+ vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+
+ r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+ TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ union perf_capabilities host_cap;
+
+ TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+
+ TEST_ASSERT(host_cap.full_width_write,
+ "Full-width writes should always be supported");
+
+ test_basic_perf_capabilities(host_cap);
+ test_fungible_perf_capabilities(host_cap);
+ test_immutable_perf_capabilities(host_cap);
+ test_guest_wrmsr_perf_capabilities(host_cap);
+ test_lbr_perf_capabilities(host_cap);
+}