mirror of https://github.com/torvalds/linux.git
* Avoid direct HLT instruction execution in TDX guests
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEV76QKkVc4xCGURexaDWVMHDJkrAFAmfsa8QACgkQaDWVMHDJ krBCAhAAodPYiIEy+qpad1Q8HPhaKYUJ5jzkIdt1GYXCBf2dfY6Zj8w7edSApUhA 7og9gK8ku8hwpf6oCGmp2Lm74FgATIj7q0ac07XBW3OsrfFQc73DfPJn6WMDYjRV ec9baSzX5GcqUyezq7woyJayZT9LRLBexF/vk7dAQ7nuecCOUhqLXWBN5eUT0e+K 58kFjZoZZx/4Y9zh7UIxBQyCbL88IeI6rclW5tZJlRHNuD7B64x606ETwQJKK9GK YHPhqRKtjJRzSOn/xGYT4AQDPbF9u14Q4WGVO+bvgv8Z6BtmiYV2fG0q5GU14h0z +gwjja3Edo+F6zSIIZonQbrSVHspwm1IPJQQZHljhFOEt7Ezu3hLIYouUWVlNRgl mRzubZBmhQUfJOAtfGmHktdg6j+QinYDQr+/CjoXoeh8EknL+KtqamXJnyb8KAMN qH6X+N2coaCcl334zW44m6YTmTipdIhmHFj6edYwqdR3Ux6DDaX9PKopIIpiZEcb GH1o++4JMp9OBIaTu0Yp1WgWJ+EyUSWDJbydqCMOdthuESqKW45IQkLhPxZpIhB4 5Wra4Ot7AdsThyPqNPaEu3ND+BXu4tAAa8r8GK+AP7DqRxXz/bbWTHqNepm9wSvP pnOlLyVTri/difMWWsJJPK6QRYbNnemrny3Do3PbIZVKS08vgLs= =XvoD -----END PGP SIGNATURE----- Merge tag 'x86_tdx_for_6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 TDX updates from Dave Hansen: "Avoid direct HLT instruction execution in TDX guests. TDX guests aren't expected to use the HLT instruction directly. It causes a virtualization exception (#VE). While the #VE _can_ be handled, the current handling is slow and buggy and the easiest thing is just to avoid HLT in the first place. Plus, the kernel already has paravirt infrastructure that makes it relatively painless. Make TDX guests require paravirt and add some TDX-specific paravirt handlers which avoid HLT in the normal halt routines. Also add a warning in case another HLT sneaks in. There was a report that this leads to a "major performance improvement" on specjbb2015, probably because of the extra #VE overhead or missed wakeups from the buggy HLT handling" * tag 'x86_tdx_for_6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tdx: Emit warning if IRQs are enabled during HLT #VE handling x86/tdx: Fix arch_safe_halt() execution for TDX VMs x86/paravirt: Move halt paravirt calls under CONFIG_PARAVIRT
This commit is contained in:
commit
6cb094583a
|
|
@ -889,6 +889,7 @@ config INTEL_TDX_GUEST
|
|||
depends on X86_64 && CPU_SUP_INTEL
|
||||
depends on X86_X2APIC
|
||||
depends on EFI_STUB
|
||||
depends on PARAVIRT
|
||||
select ARCH_HAS_CC_PLATFORM
|
||||
select X86_MEM_ENCRYPT
|
||||
select X86_MCE
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#include <asm/ia32.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/insn-eval.h>
|
||||
#include <asm/paravirt_types.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/traps.h>
|
||||
|
|
@ -392,13 +393,21 @@ static int handle_halt(struct ve_info *ve)
|
|||
{
|
||||
const bool irq_disabled = irqs_disabled();
|
||||
|
||||
/*
|
||||
* HLT with IRQs enabled is unsafe, as an IRQ that is intended to be a
|
||||
* wake event may be consumed before requesting HLT emulation, leaving
|
||||
* the vCPU blocking indefinitely.
|
||||
*/
|
||||
if (WARN_ONCE(!irq_disabled, "HLT emulation with IRQs enabled"))
|
||||
return -EIO;
|
||||
|
||||
if (__halt(irq_disabled))
|
||||
return -EIO;
|
||||
|
||||
return ve_instr_len(ve);
|
||||
}
|
||||
|
||||
void __cpuidle tdx_safe_halt(void)
|
||||
void __cpuidle tdx_halt(void)
|
||||
{
|
||||
const bool irq_disabled = false;
|
||||
|
||||
|
|
@ -409,6 +418,16 @@ void __cpuidle tdx_safe_halt(void)
|
|||
WARN_ONCE(1, "HLT instruction emulation failed\n");
|
||||
}
|
||||
|
||||
static void __cpuidle tdx_safe_halt(void)
|
||||
{
|
||||
tdx_halt();
|
||||
/*
|
||||
* "__cpuidle" section doesn't support instrumentation, so stick
|
||||
* with raw_* variant that avoids tracing hooks.
|
||||
*/
|
||||
raw_local_irq_enable();
|
||||
}
|
||||
|
||||
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
|
||||
{
|
||||
struct tdx_module_args args = {
|
||||
|
|
@ -1109,6 +1128,19 @@ void __init tdx_early_init(void)
|
|||
x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
|
||||
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
|
||||
|
||||
/*
|
||||
* Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
|
||||
* will enable interrupts before HLT TDCALL invocation if executed
|
||||
* in STI-shadow, possibly resulting in missed wakeup events.
|
||||
*
|
||||
* Modify all possible HLT execution paths to use TDX specific routines
|
||||
* that directly execute TDCALL and toggle the interrupt state as
|
||||
* needed after TDCALL completion. This also reduces HLT related #VEs
|
||||
* in addition to having a reliable halt logic execution.
|
||||
*/
|
||||
pv_ops.irq.safe_halt = tdx_safe_halt;
|
||||
pv_ops.irq.halt = tdx_halt;
|
||||
|
||||
/*
|
||||
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
|
||||
* bringup low level code. That raises #VE which cannot be handled
|
||||
|
|
|
|||
|
|
@ -76,6 +76,28 @@ static __always_inline void native_local_irq_restore(unsigned long flags)
|
|||
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
#ifndef __ASSEMBLY__
|
||||
/*
|
||||
* Used in the idle loop; sti takes one instruction cycle
|
||||
* to complete:
|
||||
*/
|
||||
static __always_inline void arch_safe_halt(void)
|
||||
{
|
||||
native_safe_halt();
|
||||
}
|
||||
|
||||
/*
|
||||
* Used when interrupts are already enabled or to
|
||||
* shutdown the processor:
|
||||
*/
|
||||
static __always_inline void halt(void)
|
||||
{
|
||||
native_halt();
|
||||
}
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
#include <asm/paravirt.h>
|
||||
#else
|
||||
|
|
@ -97,24 +119,6 @@ static __always_inline void arch_local_irq_enable(void)
|
|||
native_irq_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Used in the idle loop; sti takes one instruction cycle
|
||||
* to complete:
|
||||
*/
|
||||
static __always_inline void arch_safe_halt(void)
|
||||
{
|
||||
native_safe_halt();
|
||||
}
|
||||
|
||||
/*
|
||||
* Used when interrupts are already enabled or to
|
||||
* shutdown the processor:
|
||||
*/
|
||||
static __always_inline void halt(void)
|
||||
{
|
||||
native_halt();
|
||||
}
|
||||
|
||||
/*
|
||||
* For spinlocks, etc:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -102,6 +102,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn,
|
|||
PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
|
||||
}
|
||||
|
||||
static __always_inline void arch_safe_halt(void)
|
||||
{
|
||||
PVOP_VCALL0(irq.safe_halt);
|
||||
}
|
||||
|
||||
static inline void halt(void)
|
||||
{
|
||||
PVOP_VCALL0(irq.halt);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
static inline void load_sp0(unsigned long sp0)
|
||||
{
|
||||
|
|
@ -165,16 +175,6 @@ static inline void __write_cr4(unsigned long x)
|
|||
PVOP_VCALL1(cpu.write_cr4, x);
|
||||
}
|
||||
|
||||
static __always_inline void arch_safe_halt(void)
|
||||
{
|
||||
PVOP_VCALL0(irq.safe_halt);
|
||||
}
|
||||
|
||||
static inline void halt(void)
|
||||
{
|
||||
PVOP_VCALL0(irq.halt);
|
||||
}
|
||||
|
||||
static inline u64 paravirt_read_msr(unsigned msr)
|
||||
{
|
||||
return PVOP_CALL1(u64, cpu.read_msr, msr);
|
||||
|
|
|
|||
|
|
@ -120,10 +120,9 @@ struct pv_irq_ops {
|
|||
struct paravirt_callee_save save_fl;
|
||||
struct paravirt_callee_save irq_disable;
|
||||
struct paravirt_callee_save irq_enable;
|
||||
|
||||
#endif
|
||||
void (*safe_halt)(void);
|
||||
void (*halt)(void);
|
||||
#endif
|
||||
} __no_randomize_layout;
|
||||
|
||||
struct pv_mmu_ops {
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve);
|
|||
|
||||
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
|
||||
|
||||
void tdx_safe_halt(void);
|
||||
void tdx_halt(void);
|
||||
|
||||
bool tdx_early_handle_ve(struct pt_regs *regs);
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls);
|
|||
#else
|
||||
|
||||
static inline void tdx_early_init(void) { };
|
||||
static inline void tdx_safe_halt(void) { };
|
||||
static inline void tdx_halt(void) { };
|
||||
|
||||
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
|
||||
|
||||
|
|
|
|||
|
|
@ -75,6 +75,11 @@ void paravirt_set_sched_clock(u64 (*func)(void))
|
|||
static_call_update(pv_sched_clock, func);
|
||||
}
|
||||
|
||||
static noinstr void pv_native_safe_halt(void)
|
||||
{
|
||||
native_safe_halt();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_XXL
|
||||
static noinstr void pv_native_write_cr2(unsigned long val)
|
||||
{
|
||||
|
|
@ -100,11 +105,6 @@ static noinstr void pv_native_set_debugreg(int regno, unsigned long val)
|
|||
{
|
||||
native_set_debugreg(regno, val);
|
||||
}
|
||||
|
||||
static noinstr void pv_native_safe_halt(void)
|
||||
{
|
||||
native_safe_halt();
|
||||
}
|
||||
#endif
|
||||
|
||||
struct pv_info pv_info = {
|
||||
|
|
@ -161,9 +161,11 @@ struct paravirt_patch_template pv_ops = {
|
|||
.irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl),
|
||||
.irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
|
||||
.irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
|
||||
#endif /* CONFIG_PARAVIRT_XXL */
|
||||
|
||||
/* Irq HLT ops. */
|
||||
.irq.safe_halt = pv_native_safe_halt,
|
||||
.irq.halt = native_halt,
|
||||
#endif /* CONFIG_PARAVIRT_XXL */
|
||||
|
||||
/* Mmu ops. */
|
||||
.mmu.flush_tlb_user = native_flush_tlb_local,
|
||||
|
|
|
|||
|
|
@ -939,7 +939,7 @@ void __init select_idle_routine(void)
|
|||
static_call_update(x86_idle, mwait_idle);
|
||||
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
|
||||
pr_info("using TDX aware idle routine\n");
|
||||
static_call_update(x86_idle, tdx_safe_halt);
|
||||
static_call_update(x86_idle, tdx_halt);
|
||||
} else {
|
||||
static_call_update(x86_idle, default_idle);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue