- Invalidate nested MMUs upon freeing the PGD to avoid WARNs when
   visiting from an MMU notifier
 
 - Fixes to the TLB match process and TLB invalidation range for
   managing the VCNR pseudo-TLB
 
 - Prevent SPE from erroneously profiling guests due to UNKNOWN reset
   values in PMSCR_EL1
 
 - Fix save/restore of host MDCR_EL2 to account for eagerly programming
   at vcpu_load() on VHE systems
 
 - Correct lock ordering when dealing with VGIC LPIs, avoiding scenarios
   where an xarray's spinlock was nested with a *raw* spinlock
 
 - Permit stage-2 read permission aborts which are possible in the case
   of NV depending on the guest hypervisor's stage-2 translation
 
 - Call raw_spin_unlock() instead of the internal spinlock API
 
 - Fix parameter ordering when assigning VBAR_EL1
 
 - Reverted a couple of fixes for RCU stalls when destroying a stage-2
   page table. There appears to be some nasty refcounting / UAF issues
   lurking in those patches and the band-aid we tried to apply didn't
   hold.
 
 s390:
 
 - mm fixes, including userfaultfd bug fix
 
 x86:
 
 - Sync the vTPR from the local APIC to the VMCB even when AVIC is active.
   This fixes a bug where host updates to the vTPR, e.g. via KVM_SET_LAPIC or
   emulation of a guest access, are lost and result in interrupt delivery
   issues in the guest.
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCgAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmjK9ngUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroN16Af+LVx1/VtMOv60W9FuM92SYfmDbW4N
 onLEykJnMKH1+MwDgOYcfz8lKl2Q6alcWNpjaBkNkwpulVonXg6/GLkrN1PE6V5C
 i1ZeKW0bqhegtx/E/lYAOBeXXQsssiX0wVxKMSw14ICG29AL47fIJgWCIdz9nMc3
 itbUbWR0qa3N1o1AcCaWNhLUUWU+niXaWmDWaJtGfG7to9a2ylfCUnxej9JNuQEC
 nXkfsCfWvgvpuChpi1DUOlVHcePSWIqc8wrIUkmv6eR8Olvr/PZtUBXD0Y2YOyL6
 q7UVZSK19A3a1KYVKEcCLlDxjLONIUnsXaqFbJE2aCcJJbB+tDhEhAluBw==
 =UP8p
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "These are mostly Oliver's Arm changes: lock ordering fixes for the
  vGIC, and reverts for a buggy attempt to avoid RCU stalls on large
  VMs.

  Arm:

   - Invalidate nested MMUs upon freeing the PGD to avoid WARNs when
     visiting from an MMU notifier

   - Fixes to the TLB match process and TLB invalidation range for
     managing the VCNR pseudo-TLB

   - Prevent SPE from erroneously profiling guests due to UNKNOWN reset
     values in PMSCR_EL1

   - Fix save/restore of host MDCR_EL2 to account for eagerly
     programming at vcpu_load() on VHE systems

   - Correct lock ordering when dealing with VGIC LPIs, avoiding
     scenarios where an xarray's spinlock was nested with a *raw*
     spinlock

   - Permit stage-2 read permission aborts which are possible in the
     case of NV depending on the guest hypervisor's stage-2 translation

   - Call raw_spin_unlock() instead of the internal spinlock API

   - Fix parameter ordering when assigning VBAR_EL1

   - Reverted a couple of fixes for RCU stalls when destroying a stage-2
     page table.

     There appears to be some nasty refcounting / UAF issues lurking in
     those patches and the band-aid we tried to apply didn't hold.

  s390:

   - mm fixes, including userfaultfd bug fix

  x86:

   - Sync the vTPR from the local APIC to the VMCB even when AVIC is
     active.

     This fixes a bug where host updates to the vTPR, e.g. via
     KVM_SET_LAPIC or emulation of a guest access, are lost and result
     in interrupt delivery issues in the guest"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: SVM: Sync TPR from LAPIC into VMCB::V_TPR even if AVIC is active
  Revert "KVM: arm64: Split kvm_pgtable_stage2_destroy()"
  Revert "KVM: arm64: Reschedule as needed when destroying the stage-2 page-tables"
  KVM: arm64: vgic: fix incorrect spinlock API usage
  KVM: arm64: Remove stage 2 read fault check
  KVM: arm64: Fix parameter ordering for VBAR_EL1 assignment
  KVM: arm64: nv: Fix incorrect VNCR invalidation range calculation
  KVM: arm64: vgic-v3: Indicate vgic_put_irq() may take LPI xarray lock
  KVM: arm64: vgic-v3: Don't require IRQs be disabled for LPI xarray lock
  KVM: arm64: vgic-v3: Erase LPIs from xarray outside of raw spinlocks
  KVM: arm64: Spin off release helper from vgic_put_irq()
  KVM: arm64: vgic-v3: Use bare refcount for VGIC LPIs
  KVM: arm64: vgic: Drop stale comment on IRQ active state
  KVM: arm64: VHE: Save and restore host MDCR_EL2 value correctly
  KVM: arm64: Initialize PMSCR_EL1 when in VHE
  KVM: arm64: nv: fix VNCR TLB ASID match logic for non-Global entries
  KVM: s390: Fix FOLL_*/FAULT_FLAG_* confusion
  KVM: s390: Fix incorrect usage of mmu_notifier_register()
  KVM: s390: Fix access to unavailable adapter indicator pages during postcopy
  KVM: arm64: Mark freed S2 MMUs as invalid
This commit is contained in:
Linus Torvalds 2025-09-18 09:42:55 -07:00
commit 86cc796e5e
23 changed files with 155 additions and 177 deletions

View File

@ -1369,6 +1369,7 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
} }
void kvm_init_host_debug_data(void); void kvm_init_host_debug_data(void);
void kvm_debug_init_vhe(void);
void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu); void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu); void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu); void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu);

View File

@ -355,11 +355,6 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return pteref; return pteref;
} }
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
{
return pteref;
}
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{ {
/* /*
@ -389,11 +384,6 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
} }
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
{
return rcu_dereference_raw(pteref);
}
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{ {
if (walker->flags & KVM_PGTABLE_WALK_SHARED) if (walker->flags & KVM_PGTABLE_WALK_SHARED)
@ -561,26 +551,6 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
*/ */
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
* kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address at which to place the mapping.
* @size: Size of the mapping.
*
* The page-table is assumed to be unreachable by any hardware walkers prior
* to freeing and therefore no TLB invalidation is performed.
*/
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size);
/**
* kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
*
* It is assumed that the rest of the page-table is freed before this operation.
*/
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
/** /**
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure. * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
* @mm_ops: Memory management callbacks. * @mm_ops: Memory management callbacks.

View File

@ -179,9 +179,7 @@ struct pkvm_mapping {
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops); struct kvm_pgtable_mm_ops *mm_ops);
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
u64 addr, u64 size);
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot, void *mc, enum kvm_pgtable_prot prot, void *mc,
enum kvm_pgtable_walk_flags flags); enum kvm_pgtable_walk_flags flags);

View File

@ -2113,8 +2113,10 @@ static void cpu_hyp_init_features(void)
{ {
cpu_set_hyp_vector(); cpu_set_hyp_vector();
if (is_kernel_in_hyp_mode()) if (is_kernel_in_hyp_mode()) {
kvm_timer_init_vhe(); kvm_timer_init_vhe();
kvm_debug_init_vhe();
}
if (vgic_present) if (vgic_present)
kvm_vgic_init_cpu_hardware(); kvm_vgic_init_cpu_hardware();

View File

@ -96,6 +96,13 @@ void kvm_init_host_debug_data(void)
} }
} }
void kvm_debug_init_vhe(void)
{
/* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */
if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1)))
write_sysreg_el1(0, SYS_PMSCR);
}
/* /*
* Configures the 'external' MDSCR_EL1 value for the guest, i.e. when the host * Configures the 'external' MDSCR_EL1 value for the guest, i.e. when the host
* has taken over MDSCR_EL1. * has taken over MDSCR_EL1.
@ -138,6 +145,9 @@ void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu)
/* Must be called before kvm_vcpu_load_vhe() */ /* Must be called before kvm_vcpu_load_vhe() */
KVM_BUG_ON(vcpu_get_flag(vcpu, SYSREGS_ON_CPU), vcpu->kvm); KVM_BUG_ON(vcpu_get_flag(vcpu, SYSREGS_ON_CPU), vcpu->kvm);
if (has_vhe())
*host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
/* /*
* Determine which of the possible debug states we're in: * Determine which of the possible debug states we're in:
* *
@ -184,6 +194,9 @@ void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu)
void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu) void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu)
{ {
if (has_vhe())
write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
if (likely(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) if (likely(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
return; return;

View File

@ -431,9 +431,6 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
} }
*host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
if (cpus_have_final_cap(ARM64_HAS_HCX)) { if (cpus_have_final_cap(ARM64_HAS_HCX)) {
u64 hcrx = vcpu->arch.hcrx_el2; u64 hcrx = vcpu->arch.hcrx_el2;
if (is_nested_ctxt(vcpu)) { if (is_nested_ctxt(vcpu)) {
@ -454,8 +451,6 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{ {
struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
write_sysreg(0, hstr_el2); write_sysreg(0, hstr_el2);
if (system_supports_pmuv3()) { if (system_supports_pmuv3()) {
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);

View File

@ -50,6 +50,10 @@ extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
static void __activate_traps(struct kvm_vcpu *vcpu) static void __activate_traps(struct kvm_vcpu *vcpu)
{ {
___activate_traps(vcpu, vcpu->arch.hcr_el2); ___activate_traps(vcpu, vcpu->arch.hcr_el2);
*host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
__activate_traps_common(vcpu); __activate_traps_common(vcpu);
__activate_cptr_traps(vcpu); __activate_cptr_traps(vcpu);
@ -93,6 +97,8 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
isb(); isb();
} }
write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
__deactivate_traps_common(vcpu); __deactivate_traps_common(vcpu);
write_sysreg_hcr(this_cpu_ptr(&kvm_init_params)->hcr_el2); write_sysreg_hcr(this_cpu_ptr(&kvm_init_params)->hcr_el2);

View File

@ -253,7 +253,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
*vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
__vcpu_assign_sys_reg(vcpu, read_sysreg_el1(SYS_VBAR), VBAR_EL1); __vcpu_assign_sys_reg(vcpu, VBAR_EL1, read_sysreg_el1(SYS_VBAR));
kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC);

View File

@ -1551,38 +1551,21 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
return 0; return 0;
} }
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
u64 addr, u64 size)
{ {
size_t pgd_sz;
struct kvm_pgtable_walker walker = { struct kvm_pgtable_walker walker = {
.cb = stage2_free_walker, .cb = stage2_free_walker,
.flags = KVM_PGTABLE_WALK_LEAF | .flags = KVM_PGTABLE_WALK_LEAF |
KVM_PGTABLE_WALK_TABLE_POST, KVM_PGTABLE_WALK_TABLE_POST,
}; };
WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
}
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
{
size_t pgd_sz;
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
/*
* Since the pgtable is unlinked at this point, and not shared with
* other walkers, safely deference pgd with kvm_dereference_pteref_raw()
*/
pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz);
pgt->pgd = NULL; pgt->pgd = NULL;
} }
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits));
kvm_pgtable_stage2_destroy_pgd(pgt);
}
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{ {
kvm_pteref_t ptep = (kvm_pteref_t)pgtable; kvm_pteref_t ptep = (kvm_pteref_t)pgtable;

View File

@ -904,38 +904,6 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
return 0; return 0;
} }
/*
* Assume that @pgt is valid and unlinked from the KVM MMU to free the
* page-table without taking the kvm_mmu_lock and without performing any
* TLB invalidations.
*
* Also, the range of addresses can be large enough to cause need_resched
* warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke
* cond_resched() periodically to prevent hogging the CPU for a long time
* and schedule something else, if required.
*/
static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr,
phys_addr_t end)
{
u64 next;
do {
next = stage2_range_addr_end(addr, end);
KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr,
next - addr);
if (next != end)
cond_resched();
} while (addr = next, addr != end);
}
static void kvm_stage2_destroy(struct kvm_pgtable *pgt)
{
unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr);
stage2_destroy_range(pgt, 0, BIT(ia_bits));
KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt);
}
/** /**
* kvm_init_stage2_mmu - Initialise a S2 MMU structure * kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure * @kvm: The pointer to the KVM structure
@ -1012,7 +980,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
return 0; return 0;
out_destroy_pgtable: out_destroy_pgtable:
kvm_stage2_destroy(pgt); KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
out_free_pgtable: out_free_pgtable:
kfree(pgt); kfree(pgt);
return err; return err;
@ -1106,10 +1074,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
mmu->pgt = NULL; mmu->pgt = NULL;
free_percpu(mmu->last_vcpu_ran); free_percpu(mmu->last_vcpu_ran);
} }
if (kvm_is_nested_s2_mmu(kvm, mmu))
kvm_init_nested_s2_mmu(mmu);
write_unlock(&kvm->mmu_lock); write_unlock(&kvm->mmu_lock);
if (pgt) { if (pgt) {
kvm_stage2_destroy(pgt); KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
kfree(pgt); kfree(pgt);
} }
} }
@ -1541,11 +1513,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault); VM_BUG_ON(write_fault && exec_fault);
if (fault_is_perm && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
if (!is_protected_kvm_enabled()) if (!is_protected_kvm_enabled())
memcache = &vcpu->arch.mmu_page_cache; memcache = &vcpu->arch.mmu_page_cache;
else else

View File

@ -847,7 +847,7 @@ static void kvm_invalidate_vncr_ipa(struct kvm *kvm, u64 start, u64 end)
ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, ipa_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
vt->wr.level)); vt->wr.level));
ipa_start = vt->wr.pa & (ipa_size - 1); ipa_start = vt->wr.pa & ~(ipa_size - 1);
ipa_end = ipa_start + ipa_size; ipa_end = ipa_start + ipa_size;
if (ipa_end <= start || ipa_start >= end) if (ipa_end <= start || ipa_start >= end)
@ -887,7 +887,7 @@ static void invalidate_vncr_va(struct kvm *kvm,
va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift, va_size = ttl_to_size(pgshift_level_to_ttl(vt->wi.pgshift,
vt->wr.level)); vt->wr.level));
va_start = vt->gva & (va_size - 1); va_start = vt->gva & ~(va_size - 1);
va_end = va_start + va_size; va_end = va_start + va_size;
switch (scope->type) { switch (scope->type) {
@ -1276,7 +1276,7 @@ static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu)
!(tcr & TCR_ASID16)) !(tcr & TCR_ASID16))
asid &= GENMASK(7, 0); asid &= GENMASK(7, 0);
return asid != vt->wr.asid; return asid == vt->wr.asid;
} }
return true; return true;

View File

@ -316,16 +316,9 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
return 0; return 0;
} }
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
u64 addr, u64 size)
{ {
__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
}
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
{
/* Expected to be called after all pKVM mappings have been released. */
WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
} }
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,

View File

@ -69,7 +69,7 @@ static int iter_mark_lpis(struct kvm *kvm)
int nr_lpis = 0; int nr_lpis = 0;
xa_for_each(&dist->lpi_xa, intid, irq) { xa_for_each(&dist->lpi_xa, intid, irq) {
if (!vgic_try_get_irq_kref(irq)) if (!vgic_try_get_irq_ref(irq))
continue; continue;
xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);

View File

@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
{ {
struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_dist *dist = &kvm->arch.vgic;
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); xa_init(&dist->lpi_xa);
} }
/* CREATION */ /* CREATION */
@ -208,7 +208,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
raw_spin_lock_init(&irq->irq_lock); raw_spin_lock_init(&irq->irq_lock);
irq->vcpu = NULL; irq->vcpu = NULL;
irq->target_vcpu = vcpu0; irq->target_vcpu = vcpu0;
kref_init(&irq->refcount); refcount_set(&irq->refcount, 0);
switch (dist->vgic_model) { switch (dist->vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V2: case KVM_DEV_TYPE_ARM_VGIC_V2:
irq->targets = 0; irq->targets = 0;
@ -277,7 +277,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
irq->intid = i; irq->intid = i;
irq->vcpu = NULL; irq->vcpu = NULL;
irq->target_vcpu = vcpu; irq->target_vcpu = vcpu;
kref_init(&irq->refcount); refcount_set(&irq->refcount, 0);
if (vgic_irq_is_sgi(i)) { if (vgic_irq_is_sgi(i)) {
/* SGIs */ /* SGIs */
irq->enabled = 1; irq->enabled = 1;

View File

@ -78,7 +78,6 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
{ {
struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq;
unsigned long flags;
int ret; int ret;
/* In this case there is no put, since we keep the reference. */ /* In this case there is no put, since we keep the reference. */
@ -89,7 +88,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (!irq) if (!irq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
if (ret) { if (ret) {
kfree(irq); kfree(irq);
return ERR_PTR(ret); return ERR_PTR(ret);
@ -99,19 +98,19 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
raw_spin_lock_init(&irq->irq_lock); raw_spin_lock_init(&irq->irq_lock);
irq->config = VGIC_CONFIG_EDGE; irq->config = VGIC_CONFIG_EDGE;
kref_init(&irq->refcount); refcount_set(&irq->refcount, 1);
irq->intid = intid; irq->intid = intid;
irq->target_vcpu = vcpu; irq->target_vcpu = vcpu;
irq->group = 1; irq->group = 1;
xa_lock_irqsave(&dist->lpi_xa, flags); xa_lock(&dist->lpi_xa);
/* /*
* There could be a race with another vgic_add_lpi(), so we need to * There could be a race with another vgic_add_lpi(), so we need to
* check that we don't add a second list entry with the same LPI. * check that we don't add a second list entry with the same LPI.
*/ */
oldirq = xa_load(&dist->lpi_xa, intid); oldirq = xa_load(&dist->lpi_xa, intid);
if (vgic_try_get_irq_kref(oldirq)) { if (vgic_try_get_irq_ref(oldirq)) {
/* Someone was faster with adding this LPI, lets use that. */ /* Someone was faster with adding this LPI, lets use that. */
kfree(irq); kfree(irq);
irq = oldirq; irq = oldirq;
@ -126,7 +125,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
} }
out_unlock: out_unlock:
xa_unlock_irqrestore(&dist->lpi_xa, flags); xa_unlock(&dist->lpi_xa);
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
@ -547,7 +546,7 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
rcu_read_lock(); rcu_read_lock();
irq = xa_load(&its->translation_cache, cache_key); irq = xa_load(&its->translation_cache, cache_key);
if (!vgic_try_get_irq_kref(irq)) if (!vgic_try_get_irq_ref(irq))
irq = NULL; irq = NULL;
rcu_read_unlock(); rcu_read_unlock();
@ -571,7 +570,7 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
* its_lock, as the ITE (and the reference it holds) cannot be freed. * its_lock, as the ITE (and the reference it holds) cannot be freed.
*/ */
lockdep_assert_held(&its->its_lock); lockdep_assert_held(&its->its_lock);
vgic_get_irq_kref(irq); vgic_get_irq_ref(irq);
old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT); old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);

View File

@ -518,7 +518,7 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)
if (!irq->hw || irq->host_irq != host_irq) if (!irq->hw || irq->host_irq != host_irq)
continue; continue;
if (!vgic_try_get_irq_kref(irq)) if (!vgic_try_get_irq_ref(irq))
return NULL; return NULL;
return irq; return irq;

View File

@ -28,8 +28,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* kvm->arch.config_lock (mutex) * kvm->arch.config_lock (mutex)
* its->cmd_lock (mutex) * its->cmd_lock (mutex)
* its->its_lock (mutex) * its->its_lock (mutex)
* vgic_cpu->ap_list_lock must be taken with IRQs disabled * vgic_dist->lpi_xa.xa_lock
* vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled * vgic_cpu->ap_list_lock must be taken with IRQs disabled
* vgic_irq->irq_lock must be taken with IRQs disabled * vgic_irq->irq_lock must be taken with IRQs disabled
* *
* As the ap_list_lock might be taken from the timer interrupt handler, * As the ap_list_lock might be taken from the timer interrupt handler,
@ -71,7 +71,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
rcu_read_lock(); rcu_read_lock();
irq = xa_load(&dist->lpi_xa, intid); irq = xa_load(&dist->lpi_xa, intid);
if (!vgic_try_get_irq_kref(irq)) if (!vgic_try_get_irq_ref(irq))
irq = NULL; irq = NULL;
rcu_read_unlock(); rcu_read_unlock();
@ -114,37 +114,66 @@ struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
return vgic_get_irq(vcpu->kvm, intid); return vgic_get_irq(vcpu->kvm, intid);
} }
/* static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq)
* We can't do anything in here, because we lack the kvm pointer to
* lock and remove the item from the lpi_list. So we keep this function
* empty and use the return value of kref_put() to trigger the freeing.
*/
static void vgic_irq_release(struct kref *ref)
{ {
lockdep_assert_held(&dist->lpi_xa.xa_lock);
__xa_erase(&dist->lpi_xa, irq->intid);
kfree_rcu(irq, rcu);
}
static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
if (irq->intid < VGIC_MIN_LPI)
return false;
return refcount_dec_and_test(&irq->refcount);
}
static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq *irq)
{
if (!__vgic_put_irq(kvm, irq))
return false;
irq->pending_release = true;
return true;
} }
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{ {
struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long flags;
if (irq->intid < VGIC_MIN_LPI) if (irq->intid >= VGIC_MIN_LPI)
might_lock(&dist->lpi_xa.xa_lock);
if (!__vgic_put_irq(kvm, irq))
return; return;
if (!kref_put(&irq->refcount, vgic_irq_release)) xa_lock(&dist->lpi_xa);
return; vgic_release_lpi_locked(dist, irq);
xa_unlock(&dist->lpi_xa);
}
xa_lock_irqsave(&dist->lpi_xa, flags); static void vgic_release_deleted_lpis(struct kvm *kvm)
__xa_erase(&dist->lpi_xa, irq->intid); {
xa_unlock_irqrestore(&dist->lpi_xa, flags); struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long intid;
struct vgic_irq *irq;
kfree_rcu(irq, rcu); xa_lock(&dist->lpi_xa);
xa_for_each(&dist->lpi_xa, intid, irq) {
if (irq->pending_release)
vgic_release_lpi_locked(dist, irq);
}
xa_unlock(&dist->lpi_xa);
} }
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
{ {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq, *tmp; struct vgic_irq *irq, *tmp;
bool deleted = false;
unsigned long flags; unsigned long flags;
raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
@ -155,11 +184,14 @@ void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
list_del(&irq->ap_list); list_del(&irq->ap_list);
irq->vcpu = NULL; irq->vcpu = NULL;
raw_spin_unlock(&irq->irq_lock); raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq); deleted |= vgic_put_irq_norelease(vcpu->kvm, irq);
} }
} }
raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
if (deleted)
vgic_release_deleted_lpis(vcpu->kvm);
} }
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
@ -399,7 +431,7 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
* now in the ap_list. This is safe as the caller must already hold a * now in the ap_list. This is safe as the caller must already hold a
* reference on the irq. * reference on the irq.
*/ */
vgic_get_irq_kref(irq); vgic_get_irq_ref(irq);
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
irq->vcpu = vcpu; irq->vcpu = vcpu;
@ -630,6 +662,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
{ {
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq, *tmp; struct vgic_irq *irq, *tmp;
bool deleted_lpis = false;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
@ -657,12 +690,12 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
/* /*
* This vgic_put_irq call matches the * This vgic_put_irq call matches the
* vgic_get_irq_kref in vgic_queue_irq_unlock, * vgic_get_irq_ref in vgic_queue_irq_unlock,
* where we added the LPI to the ap_list. As * where we added the LPI to the ap_list. As
* we remove the irq from the list, we drop * we remove the irq from the list, we drop
* also drop the refcount. * also drop the refcount.
*/ */
vgic_put_irq(vcpu->kvm, irq); deleted_lpis |= vgic_put_irq_norelease(vcpu->kvm, irq);
continue; continue;
} }
@ -725,6 +758,9 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
} }
raw_spin_unlock(&vgic_cpu->ap_list_lock); raw_spin_unlock(&vgic_cpu->ap_list_lock);
if (unlikely(deleted_lpis))
vgic_release_deleted_lpis(vcpu->kvm);
} }
static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
@ -818,7 +854,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
* the AP list has been sorted already. * the AP list has been sorted already.
*/ */
if (multi_sgi && irq->priority > prio) { if (multi_sgi && irq->priority > prio) {
_raw_spin_unlock(&irq->irq_lock); raw_spin_unlock(&irq->irq_lock);
break; break;
} }

View File

@ -267,7 +267,7 @@ void vgic_v2_put(struct kvm_vcpu *vcpu);
void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq) static inline bool vgic_try_get_irq_ref(struct vgic_irq *irq)
{ {
if (!irq) if (!irq)
return false; return false;
@ -275,12 +275,12 @@ static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq)
if (irq->intid < VGIC_MIN_LPI) if (irq->intid < VGIC_MIN_LPI)
return true; return true;
return kref_get_unless_zero(&irq->refcount); return refcount_inc_not_zero(&irq->refcount);
} }
static inline void vgic_get_irq_kref(struct vgic_irq *irq) static inline void vgic_get_irq_ref(struct vgic_irq *irq)
{ {
WARN_ON_ONCE(!vgic_try_get_irq_kref(irq)); WARN_ON_ONCE(!vgic_try_get_irq_ref(irq));
} }
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);

View File

@ -2778,12 +2778,19 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
static struct page *get_map_page(struct kvm *kvm, u64 uaddr) static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
{ {
struct mm_struct *mm = kvm->mm;
struct page *page = NULL; struct page *page = NULL;
int locked = 1;
if (mmget_not_zero(mm)) {
mmap_read_lock(mm);
get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE,
&page, &locked);
if (locked)
mmap_read_unlock(mm);
mmput(mm);
}
mmap_read_lock(kvm->mm);
get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
&page, NULL);
mmap_read_unlock(kvm->mm);
return page; return page;
} }

View File

@ -4864,12 +4864,12 @@ static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu)
* @vcpu: the vCPU whose gmap is to be fixed up * @vcpu: the vCPU whose gmap is to be fixed up
* @gfn: the guest frame number used for memslots (including fake memslots) * @gfn: the guest frame number used for memslots (including fake memslots)
* @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps
* @flags: FOLL_* flags * @foll: FOLL_* flags
* *
* Return: 0 on success, < 0 in case of error. * Return: 0 on success, < 0 in case of error.
* Context: The mm lock must not be held before calling. May sleep. * Context: The mm lock must not be held before calling. May sleep.
*/ */
int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int foll)
{ {
struct kvm_memory_slot *slot; struct kvm_memory_slot *slot;
unsigned int fault_flags; unsigned int fault_flags;
@ -4883,13 +4883,13 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u
if (!slot || slot->flags & KVM_MEMSLOT_INVALID) if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return vcpu_post_run_addressing_exception(vcpu); return vcpu_post_run_addressing_exception(vcpu);
fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; fault_flags = foll & FOLL_WRITE ? FAULT_FLAG_WRITE : 0;
if (vcpu->arch.gmap->pfault_enabled) if (vcpu->arch.gmap->pfault_enabled)
flags |= FOLL_NOWAIT; foll |= FOLL_NOWAIT;
vmaddr = __gfn_to_hva_memslot(slot, gfn); vmaddr = __gfn_to_hva_memslot(slot, gfn);
try_again: try_again:
pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); pfn = __kvm_faultin_pfn(slot, gfn, foll, &writable, &page);
/* Access outside memory, inject addressing exception */ /* Access outside memory, inject addressing exception */
if (is_noslot_pfn(pfn)) if (is_noslot_pfn(pfn))
@ -4905,7 +4905,7 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u
return 0; return 0;
vcpu->stat.pfault_sync++; vcpu->stat.pfault_sync++;
/* Could not setup async pfault, try again synchronously */ /* Could not setup async pfault, try again synchronously */
flags &= ~FOLL_NOWAIT; foll &= ~FOLL_NOWAIT;
goto try_again; goto try_again;
} }
/* Any other error */ /* Any other error */
@ -4925,7 +4925,7 @@ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, u
return rc; return rc;
} }
static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int foll)
{ {
unsigned long gaddr_tmp; unsigned long gaddr_tmp;
gfn_t gfn; gfn_t gfn;
@ -4950,18 +4950,18 @@ static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, un
} }
gfn = gpa_to_gfn(gaddr_tmp); gfn = gpa_to_gfn(gaddr_tmp);
} }
return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, foll);
} }
static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
{ {
unsigned int flags = 0; unsigned int foll = 0;
unsigned long gaddr; unsigned long gaddr;
int rc; int rc;
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE;
if (kvm_s390_cur_gmap_fault_is_write()) if (kvm_s390_cur_gmap_fault_is_write())
flags = FAULT_FLAG_WRITE; foll = FOLL_WRITE;
switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) { switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) {
case 0: case 0:
@ -5003,7 +5003,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
send_sig(SIGSEGV, current, 0); send_sig(SIGSEGV, current, 0);
if (rc != -ENXIO) if (rc != -ENXIO)
break; break;
flags = FAULT_FLAG_WRITE; foll = FOLL_WRITE;
fallthrough; fallthrough;
case PGM_PROTECTION: case PGM_PROTECTION:
case PGM_SEGMENT_TRANSLATION: case PGM_SEGMENT_TRANSLATION:
@ -5013,7 +5013,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
case PGM_REGION_SECOND_TRANS: case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS: case PGM_REGION_THIRD_TRANS:
kvm_s390_assert_primary_as(vcpu); kvm_s390_assert_primary_as(vcpu);
return vcpu_dat_fault_handler(vcpu, gaddr, flags); return vcpu_dat_fault_handler(vcpu, gaddr, foll);
default: default:
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
current->thread.gmap_int_code, current->thread.gmap_teid.val); current->thread.gmap_int_code, current->thread.gmap_teid.val);

View File

@ -624,6 +624,17 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
int cc, ret; int cc, ret;
u16 dummy; u16 dummy;
/* Add the notifier only once. No races because we hold kvm->lock */
if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
/* The notifier will be unregistered when the VM is destroyed */
kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
if (ret) {
kvm->arch.pv.mmu_notifier.ops = NULL;
return ret;
}
}
ret = kvm_s390_pv_alloc_vm(kvm); ret = kvm_s390_pv_alloc_vm(kvm);
if (ret) if (ret)
return ret; return ret;
@ -659,11 +670,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
return -EIO; return -EIO;
} }
kvm->arch.gmap->guest_handle = uvcb.guest_handle; kvm->arch.gmap->guest_handle = uvcb.guest_handle;
/* Add the notifier only once. No races because we hold kvm->lock */
if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
}
return 0; return 0;
} }

View File

@ -4046,8 +4046,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
u64 cr8; u64 cr8;
if (nested_svm_virtualize_tpr(vcpu) || if (nested_svm_virtualize_tpr(vcpu))
kvm_vcpu_apicv_active(vcpu))
return; return;
cr8 = kvm_get_cr8(vcpu); cr8 = kvm_get_cr8(vcpu);

View File

@ -8,8 +8,8 @@
#include <linux/bits.h> #include <linux/bits.h>
#include <linux/kvm.h> #include <linux/kvm.h>
#include <linux/irqreturn.h> #include <linux/irqreturn.h>
#include <linux/kref.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/refcount.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/static_key.h> #include <linux/static_key.h>
#include <linux/types.h> #include <linux/types.h>
@ -139,10 +139,13 @@ struct vgic_irq {
bool pending_latch; /* The pending latch state used to calculate bool pending_latch; /* The pending latch state used to calculate
* the pending state for both level * the pending state for both level
* and edge triggered IRQs. */ * and edge triggered IRQs. */
bool active; /* not used for LPIs */ bool active;
bool pending_release; /* Used for LPIs only, unreferenced IRQ
* pending a release */
bool enabled; bool enabled;
bool hw; /* Tied to HW IRQ */ bool hw; /* Tied to HW IRQ */
struct kref refcount; /* Used for LPIs */ refcount_t refcount; /* Used for LPIs */
u32 hwintid; /* HW INTID number */ u32 hwintid; /* HW INTID number */
unsigned int host_irq; /* linux irq corresponding to hwintid */ unsigned int host_irq; /* linux irq corresponding to hwintid */
union { union {