mirror of https://github.com/torvalds/linux.git
x86/mm/64: Make 5-level paging support unconditional
Both Intel and AMD CPUs support 5-level paging, which is expected to become more widely adopted in the future. All major x86 Linux distributions have the feature enabled. Remove CONFIG_X86_5LEVEL and related #ifdeffery for it to make it more readable. Suggested-by: Borislav Petkov <bp@alien8.de> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lore.kernel.org/r/20250516123306.3812286-4-kirill.shutemov@linux.intel.com
This commit is contained in:
parent
cba5d9b3e9
commit
7212b58d6d
|
|
@ -173,10 +173,10 @@ For example, when an old kernel is running on new hardware.
|
|||
The kernel disabled support for it at compile-time
|
||||
--------------------------------------------------
|
||||
|
||||
For example, if 5-level-paging is not enabled when building (i.e.,
|
||||
CONFIG_X86_5LEVEL is not selected) the flag "la57" will not show up [#f1]_.
|
||||
For example, if Linear Address Masking (LAM) is not enabled when building (i.e.,
|
||||
CONFIG_ADDRESS_MASKING is not selected) the flag "lam" will not show up.
|
||||
Even though the feature will still be detected via CPUID, the kernel disables
|
||||
it by clearing via setup_clear_cpu_cap(X86_FEATURE_LA57).
|
||||
it by clearing via setup_clear_cpu_cap(X86_FEATURE_LAM).
|
||||
|
||||
The feature is disabled at boot-time
|
||||
------------------------------------
|
||||
|
|
@ -200,5 +200,3 @@ missing at runtime. For example, AVX flags will not show up if XSAVE feature
|
|||
is disabled since they depend on XSAVE feature. Another example would be broken
|
||||
CPUs and them missing microcode patches. Due to that, the kernel decides not to
|
||||
enable a feature.
|
||||
|
||||
.. [#f1] 5-level paging uses linear address of 57 bits.
|
||||
|
|
|
|||
|
|
@ -22,15 +22,6 @@ QEMU 2.9 and later support 5-level paging.
|
|||
Virtual memory layout for 5-level paging is described in
|
||||
Documentation/arch/x86/x86_64/mm.rst
|
||||
|
||||
|
||||
Enabling 5-level paging
|
||||
=======================
|
||||
CONFIG_X86_5LEVEL=y enables the feature.
|
||||
|
||||
Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
|
||||
In this case additional page table level -- p4d -- will be folded at
|
||||
runtime.
|
||||
|
||||
User-space and large virtual address space
|
||||
==========================================
|
||||
On x86, 5-level paging enables 56-bit userspace virtual address space.
|
||||
|
|
|
|||
|
|
@ -427,8 +427,7 @@ config DYNAMIC_PHYSICAL_MASK
|
|||
|
||||
config PGTABLE_LEVELS
|
||||
int
|
||||
default 5 if X86_5LEVEL
|
||||
default 4 if X86_64
|
||||
default 5 if X86_64
|
||||
default 3 if X86_PAE
|
||||
default 2
|
||||
|
||||
|
|
@ -1464,25 +1463,6 @@ config X86_PAE
|
|||
has the cost of more pagetable lookup overhead, and also
|
||||
consumes more pagetable space per process.
|
||||
|
||||
config X86_5LEVEL
|
||||
bool "Enable 5-level page tables support"
|
||||
default y
|
||||
depends on X86_64
|
||||
help
|
||||
5-level paging enables access to larger address space:
|
||||
up to 128 PiB of virtual address space and 4 PiB of
|
||||
physical address space.
|
||||
|
||||
It will be supported by future Intel CPUs.
|
||||
|
||||
A kernel with the option enabled can be booted on machines that
|
||||
support 4- or 5-level paging.
|
||||
|
||||
See Documentation/arch/x86/x86_64/5level-paging.rst for more
|
||||
information.
|
||||
|
||||
Say N if unsure.
|
||||
|
||||
config X86_DIRECT_GBPAGES
|
||||
def_bool y
|
||||
depends on X86_64
|
||||
|
|
|
|||
|
|
@ -132,10 +132,6 @@ config X86_DISABLED_FEATURE_OSPKE
|
|||
def_bool y
|
||||
depends on !X86_INTEL_MEMORY_PROTECTION_KEYS
|
||||
|
||||
config X86_DISABLED_FEATURE_LA57
|
||||
def_bool y
|
||||
depends on !X86_5LEVEL
|
||||
|
||||
config X86_DISABLED_FEATURE_PTI
|
||||
def_bool y
|
||||
depends on !MITIGATION_PAGE_TABLE_ISOLATION
|
||||
|
|
|
|||
|
|
@ -10,12 +10,10 @@
|
|||
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
|
||||
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
|
||||
unsigned int __section(".data") __pgtable_l5_enabled;
|
||||
unsigned int __section(".data") pgdir_shift = 39;
|
||||
unsigned int __section(".data") ptrs_per_p4d = 1;
|
||||
#endif
|
||||
|
||||
/* Buffer to preserve trampoline memory */
|
||||
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
|
||||
|
|
@ -114,18 +112,13 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
|
|||
* Check if LA57 is desired and supported.
|
||||
*
|
||||
* There are several parts to the check:
|
||||
* - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
|
||||
* - if user asked to disable 5-level paging: no5lvl in cmdline
|
||||
* - if the machine supports 5-level paging:
|
||||
* + CPUID leaf 7 is supported
|
||||
* + the leaf has the feature bit set
|
||||
*
|
||||
* That's substitute for boot_cpu_has() in early boot code.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
|
||||
!cmdline_find_option_bool("no5lvl") &&
|
||||
native_cpuid_eax(0) >= 7 &&
|
||||
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
|
||||
if (!cmdline_find_option_bool("no5lvl") &&
|
||||
native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & BIT(16))) {
|
||||
l5_required = true;
|
||||
|
||||
/* Initialize variables for 5-level paging */
|
||||
|
|
|
|||
|
|
@ -361,12 +361,8 @@ xloadflags:
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED)
|
||||
#else
|
||||
#define XLF56 XLF_5LEVEL
|
||||
#endif
|
||||
#else
|
||||
#define XLF56 0
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -16,9 +16,6 @@ extern unsigned int next_early_pgt;
|
|||
|
||||
static inline bool check_la57_support(void)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* 5-level paging is detected and enabled at kernel decompression
|
||||
* stage. Only check if it has been enabled there.
|
||||
|
|
@ -129,7 +126,7 @@ unsigned long __head __startup_64(unsigned long p2v_offset,
|
|||
pgd = rip_rel_ptr(early_top_pgt);
|
||||
pgd[pgd_index(__START_KERNEL_map)] += load_delta;
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) {
|
||||
if (la57) {
|
||||
p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
|
||||
p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
|
||||
|
||||
|
|
|
|||
|
|
@ -341,9 +341,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
|
|||
pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
|
||||
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
|
||||
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER));
|
||||
#endif
|
||||
pud = pud_offset(p4d, VSYSCALL_ADDR);
|
||||
set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
|
||||
pmd = pmd_offset(pud, VSYSCALL_ADDR);
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ static inline void clear_page(void *page)
|
|||
void copy_page(void *to, void *from);
|
||||
KCFI_REFERENCE(copy_page);
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
/*
|
||||
* User space process size. This is the first address outside the user range.
|
||||
* There are a few constraints that determine this:
|
||||
|
|
@ -93,7 +92,6 @@ static __always_inline unsigned long task_size_max(void)
|
|||
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_X86_5LEVEL */
|
||||
|
||||
#endif /* !__ASSEMBLER__ */
|
||||
|
||||
|
|
|
|||
|
|
@ -48,14 +48,7 @@
|
|||
/* See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. */
|
||||
|
||||
#define __PHYSICAL_MASK_SHIFT 52
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
|
||||
/* See task_size_max() in <asm/page_64.h> */
|
||||
#else
|
||||
#define __VIRTUAL_MASK_SHIFT 47
|
||||
#define task_size_max() ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
#define TASK_SIZE_MAX task_size_max()
|
||||
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
|
||||
|
|
|
|||
|
|
@ -41,11 +41,9 @@ static inline void sync_initial_page_table(void) { }
|
|||
pr_err("%s:%d: bad pud %p(%016lx)\n", \
|
||||
__FILE__, __LINE__, &(e), pud_val(e))
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
#define p4d_ERROR(e) \
|
||||
pr_err("%s:%d: bad p4d %p(%016lx)\n", \
|
||||
__FILE__, __LINE__, &(e), p4d_val(e))
|
||||
#endif
|
||||
|
||||
#define pgd_ERROR(e) \
|
||||
pr_err("%s:%d: bad pgd %p(%016lx)\n", \
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ typedef struct { pmdval_t pmd; } pmd_t;
|
|||
|
||||
extern unsigned int __pgtable_l5_enabled;
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#ifdef USE_EARLY_PGTABLE_L5
|
||||
/*
|
||||
* cpu_feature_enabled() is not available in early boot code.
|
||||
|
|
@ -37,17 +36,11 @@ static inline bool pgtable_l5_enabled(void)
|
|||
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
|
||||
#endif /* USE_EARLY_PGTABLE_L5 */
|
||||
|
||||
#else
|
||||
#define pgtable_l5_enabled() 0
|
||||
#endif /* CONFIG_X86_5LEVEL */
|
||||
|
||||
extern unsigned int pgdir_shift;
|
||||
extern unsigned int ptrs_per_p4d;
|
||||
|
||||
#endif /* !__ASSEMBLER__ */
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
|
||||
/*
|
||||
* PGDIR_SHIFT determines what a top-level page table entry can map
|
||||
*/
|
||||
|
|
@ -65,17 +58,6 @@ extern unsigned int ptrs_per_p4d;
|
|||
|
||||
#define MAX_POSSIBLE_PHYSMEM_BITS 52
|
||||
|
||||
#else /* CONFIG_X86_5LEVEL */
|
||||
|
||||
/*
|
||||
* PGDIR_SHIFT determines what a top-level page table entry can map
|
||||
*/
|
||||
#define PGDIR_SHIFT 39
|
||||
#define PTRS_PER_PGD 512
|
||||
#define MAX_PTRS_PER_P4D 1
|
||||
|
||||
#endif /* CONFIG_X86_5LEVEL */
|
||||
|
||||
/*
|
||||
* 3rd level page
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -590,7 +590,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
|
|||
DPRINTK(ALT, "alt table %px, -> %px", start, end);
|
||||
|
||||
/*
|
||||
* In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using
|
||||
* KASAN_SHADOW_START is defined using
|
||||
* cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here.
|
||||
* During the process, KASAN becomes confused seeing partial LA57
|
||||
* conversion and triggers a false-positive out-of-bound report.
|
||||
|
|
|
|||
|
|
@ -51,13 +51,11 @@ unsigned int __initdata next_early_pgt;
|
|||
SYM_PIC_ALIAS(next_early_pgt);
|
||||
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
unsigned int __pgtable_l5_enabled __ro_after_init;
|
||||
unsigned int pgdir_shift __ro_after_init = 39;
|
||||
EXPORT_SYMBOL(pgdir_shift);
|
||||
unsigned int ptrs_per_p4d __ro_after_init = 1;
|
||||
EXPORT_SYMBOL(ptrs_per_p4d);
|
||||
#endif
|
||||
|
||||
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
|
||||
EXPORT_SYMBOL(page_offset_base);
|
||||
|
|
|
|||
|
|
@ -649,13 +649,11 @@ SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
|
|||
SYM_DATA_END(init_top_pgt)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt)
|
||||
.fill 511,8,0
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
|
||||
SYM_DATA_END(level4_kernel_pgt)
|
||||
SYM_PIC_ALIAS(level4_kernel_pgt)
|
||||
#endif
|
||||
|
||||
SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt)
|
||||
.fill L3_START_KERNEL,8,0
|
||||
|
|
|
|||
|
|
@ -174,11 +174,7 @@ __ref void *alloc_low_pages(unsigned int num)
|
|||
* randomization is enabled.
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_X86_5LEVEL
|
||||
#define INIT_PGD_PAGE_TABLES 3
|
||||
#else
|
||||
#define INIT_PGD_PAGE_TABLES 4
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_RANDOMIZE_MEMORY
|
||||
#define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES)
|
||||
|
|
|
|||
|
|
@ -592,7 +592,7 @@ void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
|
|||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
#if CONFIG_PGTABLE_LEVELS > 4
|
||||
/**
|
||||
* p4d_set_huge - Set up kernel P4D mapping
|
||||
* @p4d: Pointer to the P4D entry
|
||||
|
|
|
|||
|
|
@ -578,7 +578,6 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
|
|||
xen_mc_issue(XEN_LAZY_MMU);
|
||||
}
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
__visible p4dval_t xen_p4d_val(p4d_t p4d)
|
||||
{
|
||||
return pte_mfn_to_pfn(p4d.p4d);
|
||||
|
|
@ -592,7 +591,6 @@ __visible p4d_t xen_make_p4d(p4dval_t p4d)
|
|||
return native_make_p4d(p4d);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
|
||||
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
|
||||
|
||||
static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
|
||||
void (*func)(struct mm_struct *mm, struct page *,
|
||||
|
|
@ -2222,10 +2220,8 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = {
|
|||
.alloc_pud = xen_alloc_pmd_init,
|
||||
.release_pud = xen_release_pmd_init,
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
|
||||
.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
|
||||
#endif
|
||||
|
||||
.enter_mmap = xen_enter_mmap,
|
||||
.exit_mmap = xen_exit_mmap,
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ efi_status_t efi_setup_5level_paging(void)
|
|||
|
||||
void efi_5level_switch(void)
|
||||
{
|
||||
bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
|
||||
bool want_la57 = !efi_no5lvl;
|
||||
bool have_la57 = native_read_cr4() & X86_CR4_LA57;
|
||||
bool need_toggle = want_la57 ^ have_la57;
|
||||
u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
|
||||
|
|
|
|||
Loading…
Reference in New Issue