Skip to content

Commit

Permalink
Merge branch 'kvm-5.16-fixes' into kvm-master
Browse files Browse the repository at this point in the history
* Fix misuse of gfn-to-pfn cache when recording guest steal time / preempted status

* Fix selftests on APICv machines

* Fix sparse warnings

* Fix detection of KVM features in CPUID

* Cleanups for bogus writes to MSR_KVM_PV_EOI_EN

* Fixes and cleanups for MSR bitmap handling

* Cleanups for INVPCID

* Make x86 KVM_SOFT_MAX_VCPUS consistent with other architectures
  • Loading branch information
bonzini committed Nov 11, 2021
2 parents 1f05833 + da1bfd5 commit f5396f2
Show file tree
Hide file tree
Showing 20 changed files with 317 additions and 264 deletions.
5 changes: 3 additions & 2 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS

#define KVM_MAX_VCPUS 1024
#define KVM_SOFT_MAX_VCPUS 710

/*
* In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
Expand Down Expand Up @@ -725,6 +724,7 @@ struct kvm_vcpu_arch {

int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
u32 kvm_cpuid_base;

u64 reserved_gpa_bits;
int maxphyaddr;
Expand All @@ -748,7 +748,7 @@ struct kvm_vcpu_arch {
u8 preempted;
u64 msr_val;
u64 last_steal;
struct gfn_to_pfn_cache cache;
struct gfn_to_hva_cache cache;
} st;

u64 l1_tsc_offset;
Expand Down Expand Up @@ -1034,6 +1034,7 @@ struct kvm_x86_msr_filter {
#define APICV_INHIBIT_REASON_IRQWIN 3
#define APICV_INHIBIT_REASON_PIT_REINJ 4
#define APICV_INHIBIT_REASON_X2APIC 5
#define APICV_INHIBIT_REASON_BLOCKIRQ 6

struct kvm_arch {
unsigned long n_used_mmu_pages;
Expand Down
5 changes: 4 additions & 1 deletion arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -806,11 +806,14 @@ static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
#endif

#define for_each_possible_hypervisor_cpuid_base(function) \
for (function = 0x40000000; function < 0x40010000; function += 0x100)

static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
{
uint32_t base, eax, signature[3];

for (base = 0x40000000; base < 0x40010000; base += 0x100) {
for_each_possible_hypervisor_cpuid_base(base) {
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);

if (!memcmp(sig, signature, 12) &&
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/uapi/asm/kvm_para.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"

/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
* a particular paravirtualization, the appropriate feature bit should
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/kvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
return 0; /* So we don't blow up on old processors */

if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
return hypervisor_cpuid_base(KVM_SIGNATURE, 0);

return 0;
}
Expand Down
93 changes: 64 additions & 29 deletions arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,45 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
return 0;
}

void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
u32 function;
struct kvm_cpuid_entry2 *entry;

vcpu->arch.kvm_cpuid_base = 0;

for_each_possible_hypervisor_cpuid_base(function) {
entry = kvm_find_cpuid_entry(vcpu, function, 0);

best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
if (entry) {
u32 signature[3];

signature[0] = entry->ebx;
signature[1] = entry->ecx;
signature[2] = entry->edx;

BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
vcpu->arch.kvm_cpuid_base = function;
break;
}
}
}
}

struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
{
u32 base = vcpu->arch.kvm_cpuid_base;

if (!base)
return NULL;

return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
}

void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);

/*
* save the feature bitmap to avoid cpuid lookup for every PV
Expand Down Expand Up @@ -142,7 +176,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
best = kvm_find_kvm_cpuid_features(vcpu);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
Expand Down Expand Up @@ -239,6 +273,26 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
}

static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
{
int r;

r = kvm_check_cpuid(e2, nent);
if (r)
return r;

kvfree(vcpu->arch.cpuid_entries);
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = nent;

kvm_update_kvm_cpuid_base(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);

return 0;
}

/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
Expand Down Expand Up @@ -275,18 +329,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
e2[i].padding[2] = 0;
}

r = kvm_check_cpuid(e2, cpuid->nent);
if (r) {
r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
if (r)
kvfree(e2);
goto out_free_cpuid;
}

kvfree(vcpu->arch.cpuid_entries);
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = cpuid->nent;

kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);

out_free_cpuid:
kvfree(e);
Expand All @@ -310,20 +355,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
return PTR_ERR(e2);
}

r = kvm_check_cpuid(e2, cpuid->nent);
if (r) {
r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
if (r)
kvfree(e2);
return r;
}

kvfree(vcpu->arch.cpuid_entries);
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = cpuid->nent;

kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);

return 0;
return r;
}

int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
Expand Down Expand Up @@ -871,8 +907,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case KVM_CPUID_SIGNATURE: {
static const char signature[12] = "KVMKVMKVM\0\0";
const u32 *sigptr = (const u32 *)signature;
const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kvm/hyperv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1472,7 +1472,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)

if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv_vcpu->hv_vapic = data;
if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
return 1;
break;
}
Expand All @@ -1490,7 +1490,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
hv_vcpu->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
if (kvm_lapic_enable_pv_eoi(vcpu,
if (kvm_lapic_set_pv_eoi(vcpu,
gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
sizeof(struct hv_vp_assist_page)))
return 1;
Expand Down
23 changes: 14 additions & 9 deletions arch/x86/kvm/lapic.c
Original file line number Diff line number Diff line change
Expand Up @@ -2856,25 +2856,30 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}

int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
unsigned long new_len;
int ret;

if (!IS_ALIGNED(addr, 4))
return 1;

vcpu->arch.pv_eoi.msr_val = data;
if (!pv_eoi_enabled(vcpu))
return 0;
if (data & KVM_MSR_ENABLED) {
if (addr == ghc->gpa && len <= ghc->len)
new_len = ghc->len;
else
new_len = len;

if (addr == ghc->gpa && len <= ghc->len)
new_len = ghc->len;
else
new_len = len;
ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
if (ret)
return ret;
}

vcpu->arch.pv_eoi.msr_val = data;

return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
return 0;
}

int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/lapic.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);

int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_exit(void);

#define VEC_POS(v) ((v) & (32 - 1))
Expand Down
10 changes: 5 additions & 5 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3191,17 +3191,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
new_spte |= PT_WRITABLE_MASK;

/*
* Do not fix write-permission on the large spte. Since
* we only dirty the first page into the dirty-bitmap in
* Do not fix write-permission on the large spte when
* dirty logging is enabled. Since we only dirty the
* first page into the dirty-bitmap in
* fast_pf_fix_direct_spte(), other pages are missed
* if its slot has dirty logging enabled.
*
* Instead, we let the slow page fault path create a
* normal spte to fix the access.
*
* See the comments in kvm_arch_commit_memory_region().
*/
if (sp->role.level > PG_LEVEL_4K)
if (sp->role.level > PG_LEVEL_4K &&
kvm_slot_dirty_track_enabled(fault->slot))
break;
}

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/mmu/tdp_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault,
struct tdp_iter *iter)
{
struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep);
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
u64 new_spte;
int ret = RET_PF_FIXED;
bool wrprot = false;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
}

/* check if idx is a valid index to access PMU */
int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
}
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kvm/pmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct kvm_pmu_ops {
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask);
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
Expand Down Expand Up @@ -149,7 +149,7 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kvm/svm/avic.c
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,8 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
BIT(APICV_INHIBIT_REASON_NESTED) |
BIT(APICV_INHIBIT_REASON_IRQWIN) |
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
BIT(APICV_INHIBIT_REASON_X2APIC);
BIT(APICV_INHIBIT_REASON_X2APIC) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ);

return supported & BIT(bit);
}
Expand Down
5 changes: 2 additions & 3 deletions arch/x86/kvm/svm/pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,13 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
}

/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);

idx &= ~(3u << 30);

return (idx >= pmu->nr_arch_gp_counters);
return idx < pmu->nr_arch_gp_counters;
}

/* idx is the ECX register of RDPMC instruction */
Expand Down
5 changes: 0 additions & 5 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -3121,11 +3121,6 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
type = svm->vmcb->control.exit_info_2;
gva = svm->vmcb->control.exit_info_1;

if (type > 3) {
kvm_inject_gp(vcpu, 0);
return 1;
}

return kvm_handle_invpcid(vcpu, type, gva);
}

Expand Down
Loading

0 comments on commit f5396f2

Please sign in to comment.