mirror of https://github.com/torvalds/linux.git
KVM x86 MMU changes for 6.18
- Recover possible NX huge pages within the TDP MMU under read lock to
reduce guest jitter when restoring NX huge pages.
- Return -EAGAIN during prefault if userspace concurrently deletes/moves the
relevant memslot to fix an issue where prefaulting could deadlock with the
memslot update.
- Don't retry in TDX's anti-zero-step mitigation if the target memslot is
invalid, i.e. is being deleted or moved, to fix a deadlock scenario similar
to the aforementioned prefaulting case.
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEEKTobbabEP7vbhhN9OlYIJqCjN/0FAmjXHaEACgkQOlYIJqCj
N/1uDxAAxGMl1q1Hg0tpVPw7PdcourXlVYJjFzsrK6CdtZpL7n2GJPVhEFBDovud
oIM9IIiP5f2UDtWeRb6b/mm9INqwTB8lyswbJk/tO+CshBiBdE7PfDbzDzvj9lAv
Uecc6tQhv+CDpJcSf7t5OqgiRo5gEBTXZZj0l5GOdtiaOU09eq4ttZTME5S1jQgh
kBddFd3glWeMLv67cTNCxdHsOFnaVWIBoupfw7Fv7LVJ1k6cgKyHAhjfq8A9elEK
3CyDo8DZ8MG4aguhHzAUQuEM9ELMxOTyJG8xS2BWtFA/glbvUBnOfGeyTmHgo/nN
qKyjytlpmO0yIlehTd/5tLfpidL8l30VN7+nDpqwTjCDEz9bC39zC9zBmKni84Dt
wItfmELb6lbvprA+FOseiRwk7/2quLrgc4y21GI29Zqbf6wMoQEnRHF/moFZ3cqg
C/SP1Ev6N5ENM2BZG9mFSRWr8e2yyan8YWs+AUtsBEM82KaeJrMlZ4yqA1m33a5T
YK5eL3DablObdfvvz1YXCVxByQ7aIbVCpE3VVigeyHrqoR/EFwZMzYLouOI34jjN
Nj5+Qck6VMhI+OetUlcXS1D/DIHgpDgZFPcgeLURiwO0l62H/gYLHuoCek4YmkIi
30ZwVXubBWDg5TcxEi5oIbVfyZfHNi+MyeLMWLEy6hEdnFsTsZU=
=6qMx
-----END PGP SIGNATURE-----
Merge tag 'kvm-x86-mmu-6.18' of https://github.com/kvm-x86/linux into HEAD
KVM x86 MMU changes for 6.18
- Recover possible NX huge pages within the TDP MMU under read lock to
reduce guest jitter when restoring NX huge pages.
- Return -EAGAIN during prefault if userspace concurrently deletes/moves the
relevant memslot to fix an issue where prefaulting could deadlock with the
memslot update.
- Don't retry in TDX's anti-zero-step mitigation if the target memslot is
invalid, i.e. is being deleted or moved, to fix a deadlock scenario similar
to the aforementioned prefaulting case.
This commit is contained in:
commit
5b0d0d8542
|
|
@ -1348,6 +1348,30 @@ enum kvm_apicv_inhibit {
|
|||
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \
|
||||
__APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
|
||||
|
||||
struct kvm_possible_nx_huge_pages {
|
||||
/*
|
||||
* A list of kvm_mmu_page structs that, if zapped, could possibly be
|
||||
* replaced by an NX huge page. A shadow page is on this list if its
|
||||
* existence disallows an NX huge page (nx_huge_page_disallowed is set)
|
||||
* and there are no other conditions that prevent a huge page, e.g.
|
||||
* the backing host page is huge, dirtly logging is not enabled for its
|
||||
* memslot, etc... Note, zapping shadow pages on this list doesn't
|
||||
* guarantee an NX huge page will be created in its stead, e.g. if the
|
||||
* guest attempts to execute from the region then KVM obviously can't
|
||||
* create an NX huge page (without hanging the guest).
|
||||
*/
|
||||
struct list_head pages;
|
||||
u64 nr_pages;
|
||||
};
|
||||
|
||||
enum kvm_mmu_type {
|
||||
KVM_SHADOW_MMU,
|
||||
#ifdef CONFIG_X86_64
|
||||
KVM_TDP_MMU,
|
||||
#endif
|
||||
KVM_NR_MMU_TYPES,
|
||||
};
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned long n_used_mmu_pages;
|
||||
unsigned long n_requested_mmu_pages;
|
||||
|
|
@ -1360,18 +1384,7 @@ struct kvm_arch {
|
|||
bool pre_fault_allowed;
|
||||
struct hlist_head *mmu_page_hash;
|
||||
struct list_head active_mmu_pages;
|
||||
/*
|
||||
* A list of kvm_mmu_page structs that, if zapped, could possibly be
|
||||
* replaced by an NX huge page. A shadow page is on this list if its
|
||||
* existence disallows an NX huge page (nx_huge_page_disallowed is set)
|
||||
* and there are no other conditions that prevent a huge page, e.g.
|
||||
* the backing host page is huge, dirtly logging is not enabled for its
|
||||
* memslot, etc... Note, zapping shadow pages on this list doesn't
|
||||
* guarantee an NX huge page will be created in its stead, e.g. if the
|
||||
* guest attempts to execute from the region then KVM obviously can't
|
||||
* create an NX huge page (without hanging the guest).
|
||||
*/
|
||||
struct list_head possible_nx_huge_pages;
|
||||
struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES];
|
||||
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
||||
struct kvm_page_track_notifier_head track_notifier_head;
|
||||
#endif
|
||||
|
|
@ -1526,7 +1539,7 @@ struct kvm_arch {
|
|||
* is held in read mode:
|
||||
* - tdp_mmu_roots (above)
|
||||
* - the link field of kvm_mmu_page structs used by the TDP MMU
|
||||
* - possible_nx_huge_pages;
|
||||
* - possible_nx_huge_pages[KVM_TDP_MMU];
|
||||
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
|
||||
* by the TDP MMU
|
||||
* Because the lock is only taken within the MMU lock, strictly
|
||||
|
|
|
|||
|
|
@ -776,7 +776,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K);
|
||||
}
|
||||
|
||||
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
enum kvm_mmu_type mmu_type)
|
||||
{
|
||||
/*
|
||||
* If it's possible to replace the shadow page with an NX huge page,
|
||||
|
|
@ -790,8 +791,9 @@ void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
return;
|
||||
|
||||
++kvm->stat.nx_lpage_splits;
|
||||
++kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages;
|
||||
list_add_tail(&sp->possible_nx_huge_page_link,
|
||||
&kvm->arch.possible_nx_huge_pages);
|
||||
&kvm->arch.possible_nx_huge_pages[mmu_type].pages);
|
||||
}
|
||||
|
||||
static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
|
|
@ -800,7 +802,7 @@ static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
|||
sp->nx_huge_page_disallowed = true;
|
||||
|
||||
if (nx_huge_page_possible)
|
||||
track_possible_nx_huge_page(kvm, sp);
|
||||
track_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU);
|
||||
}
|
||||
|
||||
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
|
|
@ -819,12 +821,14 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
||||
}
|
||||
|
||||
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
enum kvm_mmu_type mmu_type)
|
||||
{
|
||||
if (list_empty(&sp->possible_nx_huge_page_link))
|
||||
return;
|
||||
|
||||
--kvm->stat.nx_lpage_splits;
|
||||
--kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages;
|
||||
list_del_init(&sp->possible_nx_huge_page_link);
|
||||
}
|
||||
|
||||
|
|
@ -832,7 +836,7 @@ static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
{
|
||||
sp->nx_huge_page_disallowed = false;
|
||||
|
||||
untrack_possible_nx_huge_page(kvm, sp);
|
||||
untrack_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU);
|
||||
}
|
||||
|
||||
static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu,
|
||||
|
|
@ -4663,10 +4667,16 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
|
|||
/*
|
||||
* Retry the page fault if the gfn hit a memslot that is being deleted
|
||||
* or moved. This ensures any existing SPTEs for the old memslot will
|
||||
* be zapped before KVM inserts a new MMIO SPTE for the gfn.
|
||||
* be zapped before KVM inserts a new MMIO SPTE for the gfn. Punt the
|
||||
* error to userspace if this is a prefault, as KVM's prefaulting ABI
|
||||
* doesn't provide the same forward progress guarantees as KVM_RUN.
|
||||
*/
|
||||
if (slot->flags & KVM_MEMSLOT_INVALID)
|
||||
if (slot->flags & KVM_MEMSLOT_INVALID) {
|
||||
if (fault->prefetch)
|
||||
return -EAGAIN;
|
||||
|
||||
return RET_PF_RETRY;
|
||||
}
|
||||
|
||||
if (slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) {
|
||||
/*
|
||||
|
|
@ -6751,11 +6761,12 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
|
|||
|
||||
int kvm_mmu_init_vm(struct kvm *kvm)
|
||||
{
|
||||
int r;
|
||||
int r, i;
|
||||
|
||||
kvm->arch.shadow_mmio_value = shadow_mmio_value;
|
||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
|
||||
for (i = 0; i < KVM_NR_MMU_TYPES; ++i)
|
||||
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages[i].pages);
|
||||
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
|
||||
|
||||
if (tdp_mmu_enabled) {
|
||||
|
|
@ -7596,19 +7607,64 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
|
|||
return err;
|
||||
}
|
||||
|
||||
static void kvm_recover_nx_huge_pages(struct kvm *kvm)
|
||||
static unsigned long nx_huge_pages_to_zap(struct kvm *kvm,
|
||||
enum kvm_mmu_type mmu_type)
|
||||
{
|
||||
unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages);
|
||||
unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
|
||||
|
||||
return ratio ? DIV_ROUND_UP(pages, ratio) : 0;
|
||||
}
|
||||
|
||||
static bool kvm_mmu_sp_dirty_logging_enabled(struct kvm *kvm,
|
||||
struct kvm_mmu_page *sp)
|
||||
{
|
||||
unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
|
||||
struct kvm_memory_slot *slot;
|
||||
int rcu_idx;
|
||||
|
||||
/*
|
||||
* Skip the memslot lookup if dirty tracking can't possibly be enabled,
|
||||
* as memslot lookups are relatively expensive.
|
||||
*
|
||||
* If a memslot update is in progress, reading an incorrect value of
|
||||
* kvm->nr_memslots_dirty_logging is not a problem: if it is becoming
|
||||
* zero, KVM will do an unnecessary memslot lookup; if it is becoming
|
||||
* nonzero, the page will be zapped unnecessarily. Either way, this
|
||||
* only affects efficiency in racy situations, and not correctness.
|
||||
*/
|
||||
if (!atomic_read(&kvm->nr_memslots_dirty_logging))
|
||||
return false;
|
||||
|
||||
slot = __gfn_to_memslot(kvm_memslots_for_spte_role(kvm, sp->role), sp->gfn);
|
||||
if (WARN_ON_ONCE(!slot))
|
||||
return false;
|
||||
|
||||
return kvm_slot_dirty_track_enabled(slot);
|
||||
}
|
||||
|
||||
static void kvm_recover_nx_huge_pages(struct kvm *kvm,
|
||||
const enum kvm_mmu_type mmu_type)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
const bool is_tdp_mmu = mmu_type == KVM_TDP_MMU;
|
||||
spinlock_t *tdp_mmu_pages_lock = &kvm->arch.tdp_mmu_pages_lock;
|
||||
#else
|
||||
const bool is_tdp_mmu = false;
|
||||
spinlock_t *tdp_mmu_pages_lock = NULL;
|
||||
#endif
|
||||
unsigned long to_zap = nx_huge_pages_to_zap(kvm, mmu_type);
|
||||
struct list_head *nx_huge_pages;
|
||||
struct kvm_mmu_page *sp;
|
||||
unsigned int ratio;
|
||||
LIST_HEAD(invalid_list);
|
||||
bool flush = false;
|
||||
ulong to_zap;
|
||||
int rcu_idx;
|
||||
|
||||
nx_huge_pages = &kvm->arch.possible_nx_huge_pages[mmu_type].pages;
|
||||
|
||||
rcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
write_lock(&kvm->mmu_lock);
|
||||
if (is_tdp_mmu)
|
||||
read_lock(&kvm->mmu_lock);
|
||||
else
|
||||
write_lock(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
* Zapping TDP MMU shadow pages, including the remote TLB flush, must
|
||||
|
|
@ -7617,11 +7673,15 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
|
|||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
|
||||
to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
|
||||
for ( ; to_zap; --to_zap) {
|
||||
if (list_empty(&kvm->arch.possible_nx_huge_pages))
|
||||
if (is_tdp_mmu)
|
||||
spin_lock(tdp_mmu_pages_lock);
|
||||
|
||||
if (list_empty(nx_huge_pages)) {
|
||||
if (is_tdp_mmu)
|
||||
spin_unlock(tdp_mmu_pages_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We use a separate list instead of just using active_mmu_pages
|
||||
|
|
@ -7630,56 +7690,44 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
|
|||
* the total number of shadow pages. And because the TDP MMU
|
||||
* doesn't use active_mmu_pages.
|
||||
*/
|
||||
sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
|
||||
sp = list_first_entry(nx_huge_pages,
|
||||
struct kvm_mmu_page,
|
||||
possible_nx_huge_page_link);
|
||||
WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
|
||||
WARN_ON_ONCE(!sp->role.direct);
|
||||
|
||||
/*
|
||||
* Unaccount and do not attempt to recover any NX Huge Pages
|
||||
* that are being dirty tracked, as they would just be faulted
|
||||
* back in as 4KiB pages. The NX Huge Pages in this slot will be
|
||||
* recovered, along with all the other huge pages in the slot,
|
||||
* when dirty logging is disabled.
|
||||
*
|
||||
* Since gfn_to_memslot() is relatively expensive, it helps to
|
||||
* skip it if it the test cannot possibly return true. On the
|
||||
* other hand, if any memslot has logging enabled, chances are
|
||||
* good that all of them do, in which case unaccount_nx_huge_page()
|
||||
* is much cheaper than zapping the page.
|
||||
*
|
||||
* If a memslot update is in progress, reading an incorrect value
|
||||
* of kvm->nr_memslots_dirty_logging is not a problem: if it is
|
||||
* becoming zero, gfn_to_memslot() will be done unnecessarily; if
|
||||
* it is becoming nonzero, the page will be zapped unnecessarily.
|
||||
* Either way, this only affects efficiency in racy situations,
|
||||
* and not correctness.
|
||||
*/
|
||||
slot = NULL;
|
||||
if (atomic_read(&kvm->nr_memslots_dirty_logging)) {
|
||||
struct kvm_memslots *slots;
|
||||
unaccount_nx_huge_page(kvm, sp);
|
||||
|
||||
if (is_tdp_mmu)
|
||||
spin_unlock(tdp_mmu_pages_lock);
|
||||
|
||||
/*
|
||||
* Do not attempt to recover any NX Huge Pages that are being
|
||||
* dirty tracked, as they would just be faulted back in as 4KiB
|
||||
* pages. The NX Huge Pages in this slot will be recovered,
|
||||
* along with all the other huge pages in the slot, when dirty
|
||||
* logging is disabled.
|
||||
*/
|
||||
if (!kvm_mmu_sp_dirty_logging_enabled(kvm, sp)) {
|
||||
if (is_tdp_mmu)
|
||||
flush |= kvm_tdp_mmu_zap_possible_nx_huge_page(kvm, sp);
|
||||
else
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
|
||||
slots = kvm_memslots_for_spte_role(kvm, sp->role);
|
||||
slot = __gfn_to_memslot(slots, sp->gfn);
|
||||
WARN_ON_ONCE(!slot);
|
||||
}
|
||||
|
||||
if (slot && kvm_slot_dirty_track_enabled(slot))
|
||||
unaccount_nx_huge_page(kvm, sp);
|
||||
else if (is_tdp_mmu_page(sp))
|
||||
flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
|
||||
else
|
||||
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
|
||||
WARN_ON_ONCE(sp->nx_huge_page_disallowed);
|
||||
|
||||
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
|
||||
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
|
||||
rcu_read_unlock();
|
||||
|
||||
cond_resched_rwlock_write(&kvm->mmu_lock);
|
||||
flush = false;
|
||||
if (is_tdp_mmu)
|
||||
cond_resched_rwlock_read(&kvm->mmu_lock);
|
||||
else
|
||||
cond_resched_rwlock_write(&kvm->mmu_lock);
|
||||
|
||||
flush = false;
|
||||
rcu_read_lock();
|
||||
}
|
||||
}
|
||||
|
|
@ -7687,7 +7735,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
|
|||
|
||||
rcu_read_unlock();
|
||||
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
if (is_tdp_mmu)
|
||||
read_unlock(&kvm->mmu_lock);
|
||||
else
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
srcu_read_unlock(&kvm->srcu, rcu_idx);
|
||||
}
|
||||
|
||||
|
|
@ -7698,9 +7749,10 @@ static void kvm_nx_huge_page_recovery_worker_kill(void *data)
|
|||
static bool kvm_nx_huge_page_recovery_worker(void *data)
|
||||
{
|
||||
struct kvm *kvm = data;
|
||||
long remaining_time;
|
||||
bool enabled;
|
||||
uint period;
|
||||
long remaining_time;
|
||||
int i;
|
||||
|
||||
enabled = calc_nx_huge_pages_recovery_period(&period);
|
||||
if (!enabled)
|
||||
|
|
@ -7715,7 +7767,8 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
|
|||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
kvm_recover_nx_huge_pages(kvm);
|
||||
for (i = 0; i < KVM_NR_MMU_TYPES; ++i)
|
||||
kvm_recover_nx_huge_pages(kvm, i);
|
||||
kvm->arch.nx_huge_page_last = get_jiffies_64();
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -416,7 +416,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_page_fault *fault,
|
|||
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
|
||||
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
|
||||
|
||||
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
|
||||
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
|
||||
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
enum kvm_mmu_type mmu_type);
|
||||
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
||||
enum kvm_mmu_type mmu_type);
|
||||
|
||||
#endif /* __KVM_X86_MMU_INTERNAL_H */
|
||||
|
|
|
|||
|
|
@ -355,7 +355,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
|||
|
||||
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
sp->nx_huge_page_disallowed = false;
|
||||
untrack_possible_nx_huge_page(kvm, sp);
|
||||
untrack_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU);
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
}
|
||||
|
||||
|
|
@ -925,23 +925,52 @@ static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||
bool kvm_tdp_mmu_zap_possible_nx_huge_page(struct kvm *kvm,
|
||||
struct kvm_mmu_page *sp)
|
||||
{
|
||||
u64 old_spte;
|
||||
struct tdp_iter iter = {
|
||||
.old_spte = sp->ptep ? kvm_tdp_mmu_read_spte(sp->ptep) : 0,
|
||||
.sptep = sp->ptep,
|
||||
.level = sp->role.level + 1,
|
||||
.gfn = sp->gfn,
|
||||
.as_id = kvm_mmu_page_as_id(sp),
|
||||
};
|
||||
|
||||
lockdep_assert_held_read(&kvm->mmu_lock);
|
||||
|
||||
if (WARN_ON_ONCE(!is_tdp_mmu_page(sp)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* This helper intentionally doesn't allow zapping a root shadow page,
|
||||
* which doesn't have a parent page table and thus no associated entry.
|
||||
* Root shadow pages don't have a parent page table and thus no
|
||||
* associated entry, but they can never be possible NX huge pages.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!sp->ptep))
|
||||
return false;
|
||||
|
||||
old_spte = kvm_tdp_mmu_read_spte(sp->ptep);
|
||||
if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
|
||||
/*
|
||||
* Since mmu_lock is held in read mode, it's possible another task has
|
||||
* already modified the SPTE. Zap the SPTE if and only if the SPTE
|
||||
* points at the SP's page table, as checking shadow-present isn't
|
||||
* sufficient, e.g. the SPTE could be replaced by a leaf SPTE, or even
|
||||
* another SP. Note, spte_to_child_pt() also checks that the SPTE is
|
||||
* shadow-present, i.e. guards against zapping a frozen SPTE.
|
||||
*/
|
||||
if ((tdp_ptep_t)sp->spt != spte_to_child_pt(iter.old_spte, iter.level))
|
||||
return false;
|
||||
|
||||
tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte,
|
||||
SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1);
|
||||
/*
|
||||
* If a different task modified the SPTE, then it should be impossible
|
||||
* for the SPTE to still be used for the to-be-zapped SP. Non-leaf
|
||||
* SPTEs don't have Dirty bits, KVM always sets the Accessed bit when
|
||||
* creating non-leaf SPTEs, and all other bits are immutable for non-
|
||||
* leaf SPTEs, i.e. the only legal operations for non-leaf SPTEs are
|
||||
* zapping and replacement.
|
||||
*/
|
||||
if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE)) {
|
||||
WARN_ON_ONCE((tdp_ptep_t)sp->spt == spte_to_child_pt(iter.old_spte, iter.level));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1303,7 +1332,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
fault->req_level >= iter.level) {
|
||||
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
if (sp->nx_huge_page_disallowed)
|
||||
track_possible_nx_huge_page(kvm, sp);
|
||||
track_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU);
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,7 +64,8 @@ static inline struct kvm_mmu_page *tdp_mmu_get_root(struct kvm_vcpu *vcpu,
|
|||
}
|
||||
|
||||
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
|
||||
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
|
||||
bool kvm_tdp_mmu_zap_possible_nx_huge_page(struct kvm *kvm,
|
||||
struct kvm_mmu_page *sp);
|
||||
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
|
||||
void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm,
|
||||
enum kvm_tdp_mmu_root_types root_types);
|
||||
|
|
|
|||
|
|
@ -2002,6 +2002,8 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
|
|||
* handle retries locally in their EPT violation handlers.
|
||||
*/
|
||||
while (1) {
|
||||
struct kvm_memory_slot *slot;
|
||||
|
||||
ret = __vmx_handle_ept_violation(vcpu, gpa, exit_qual);
|
||||
|
||||
if (ret != RET_PF_RETRY || !local_retry)
|
||||
|
|
@ -2015,6 +2017,15 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Bail if the memslot is invalid, i.e. is being deleted, as
|
||||
* faulting in will never succeed and this task needs to drop
|
||||
* SRCU in order to let memslot deletion complete.
|
||||
*/
|
||||
slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(gpa));
|
||||
if (slot && slot->flags & KVM_MEMSLOT_INVALID)
|
||||
break;
|
||||
|
||||
cond_resched();
|
||||
}
|
||||
return ret;
|
||||
|
|
|
|||
|
|
@ -2670,6 +2670,7 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
|
|||
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot);
|
||||
|
||||
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue