From 7b5d4416964c07c902163822a30a622111172b01 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Sep 2025 11:32:13 +0200 Subject: [PATCH 01/33] um: init cpu_tasks[] earlier This is currently done in uml_finishsetup(), but e.g. with KCOV enabled we'll crash because some init code can call into e.g. memparse(), which has coverage annotations, and then the checks in check_kcov_mode() crash because current is NULL. Simply initialize the cpu_tasks[] array statically, which fixes the crash. For the later SMP work, it seems to have not really caused any problems yet, but initialize all of the entries anyway. Link: https://patch.msgid.link/20250924113214.c76cd74d0583.I974f691ebb1a2b47915bd2b04cc38e5263b9447f@changeid Signed-off-by: Johannes Berg --- arch/um/kernel/process.c | 4 +++- arch/um/kernel/um_arch.c | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 9c9c66dc45f0..13d461712c99 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -43,7 +43,9 @@ * cares about its entry, so it's OK if another processor is modifying its * entry. */ -struct task_struct *cpu_tasks[NR_CPUS]; +struct task_struct *cpu_tasks[NR_CPUS] = { + [0 ... NR_CPUS - 1] = &init_task, +}; EXPORT_SYMBOL(cpu_tasks); void free_stack(unsigned long stack, int order) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index cfbbbf8500c3..ed2f67848a50 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -239,8 +239,6 @@ static struct notifier_block panic_exit_notifier = { void uml_finishsetup(void) { - cpu_tasks[0] = &init_task; - atomic_notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); From f11839c16c3f03570097f0bda61fd90272a00cb8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Oct 2025 09:14:52 +0200 Subject: [PATCH 02/33] um/hostfs: define HOSTFS_ATTR_* via asm-offsets The HOSTFS_ATTR_* values were meant to be standalone for communication between hostfs's kernel and user code parts. However, it's easy to forget that HOSTFS_ATTR_* should be used even on the kernel side, and that wasn't consistently done. As a result, the values need to match ATTR_* values, which is not useful to maintain by hand. Instead, generate them via asm-offsets like other constants that UML needs in user-side code that aren't otherwise available in any header files that can be included there. Signed-off-by: Johannes Berg Reviewed-by: Hongbo Li Link: https://patch.msgid.link/20251007071452.367989-3-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- arch/um/include/shared/common-offsets.h | 10 +++++++ arch/x86/um/shared/sysdep/kernel-offsets.h | 1 + fs/hostfs/hostfs.h | 34 +--------------------- 3 files changed, 12 insertions(+), 33 deletions(-) diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h index 8ca66a1918c3..fcec75a93e0c 100644 --- a/arch/um/include/shared/common-offsets.h +++ b/arch/um/include/shared/common-offsets.h @@ -18,3 +18,13 @@ DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE); + +DEFINE(HOSTFS_ATTR_MODE, ATTR_MODE); +DEFINE(HOSTFS_ATTR_UID, ATTR_UID); +DEFINE(HOSTFS_ATTR_GID, ATTR_GID); +DEFINE(HOSTFS_ATTR_SIZE, ATTR_SIZE); +DEFINE(HOSTFS_ATTR_ATIME, ATTR_ATIME); +DEFINE(HOSTFS_ATTR_MTIME, ATTR_MTIME); +DEFINE(HOSTFS_ATTR_CTIME, ATTR_CTIME); +DEFINE(HOSTFS_ATTR_ATIME_SET, ATTR_ATIME_SET); +DEFINE(HOSTFS_ATTR_MTIME_SET, ATTR_MTIME_SET); diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h index 6fd1ed400399..ee6b44ef2217 100644 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ b/arch/x86/um/shared/sysdep/kernel-offsets.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 15b2f094d36e..aa02599b770f 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -3,40 +3,8 @@ #define __UM_FS_HOSTFS #include +#include -/* - * These are exactly the same definitions as in fs.h, but the names are - * changed so that this file can be included in both kernel and user files. - */ - -#define HOSTFS_ATTR_MODE 1 -#define HOSTFS_ATTR_UID 2 -#define HOSTFS_ATTR_GID 4 -#define HOSTFS_ATTR_SIZE 8 -#define HOSTFS_ATTR_ATIME 16 -#define HOSTFS_ATTR_MTIME 32 -#define HOSTFS_ATTR_CTIME 64 -#define HOSTFS_ATTR_ATIME_SET 128 -#define HOSTFS_ATTR_MTIME_SET 256 - -/* This one is unused by hostfs. */ -#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ -#define HOSTFS_ATTR_ATTR_FLAG 1024 - -/* - * If you are very careful, you'll notice that these two are missing: - * - * #define ATTR_KILL_SUID 2048 - * #define ATTR_KILL_SGID 4096 - * - * and this is because they were added in 2.5 development. - * Actually, they are not needed by most ->setattr() methods - they are set by - * callers of notify_change() to notify that the setuid/setgid bits must be - * dropped. - * notify_change() will delete those flags, make sure attr->ia_valid & ATTR_MODE - * is on, and remove the appropriate bits from attr->ia_mode (attr is a - * "struct iattr *"). -BlaisorBlade - */ struct hostfs_timespec { long long tv_sec; long long tv_nsec; From 6e3fc802ab86e5e3dbb76053717858ccd7675a9b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Oct 2025 09:14:53 +0200 Subject: [PATCH 03/33] um: move asm-offsets generation into a single file There's nothing subarch dependent here, and it's odd that includes need to be done in the subarch, and then entries defined in the common file. Simplify the whole thing from three files into one. Link: https://patch.msgid.link/20251007071452.367989-4-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- arch/um/include/shared/common-offsets.h | 30 --------------- arch/um/kernel/asm-offsets.c | 44 +++++++++++++++++++++- arch/x86/um/shared/sysdep/kernel-offsets.h | 18 --------- 3 files changed, 43 insertions(+), 49 deletions(-) delete mode 100644 arch/um/include/shared/common-offsets.h delete mode 100644 arch/x86/um/shared/sysdep/kernel-offsets.h diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h deleted file mode 100644 index fcec75a93e0c..000000000000 --- a/arch/um/include/shared/common-offsets.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* for use by sys-$SUBARCH/kernel-offsets.c */ - -DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); - -DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); -DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); -DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); - -DEFINE(UM_GFP_KERNEL, GFP_KERNEL); -DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); - -DEFINE(UM_THREAD_SIZE, THREAD_SIZE); - -DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); -DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); - -DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); - -DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE); - -DEFINE(HOSTFS_ATTR_MODE, ATTR_MODE); -DEFINE(HOSTFS_ATTR_UID, ATTR_UID); -DEFINE(HOSTFS_ATTR_GID, ATTR_GID); -DEFINE(HOSTFS_ATTR_SIZE, ATTR_SIZE); -DEFINE(HOSTFS_ATTR_ATIME, ATTR_ATIME); -DEFINE(HOSTFS_ATTR_MTIME, ATTR_MTIME); -DEFINE(HOSTFS_ATTR_CTIME, ATTR_CTIME); -DEFINE(HOSTFS_ATTR_ATIME_SET, ATTR_ATIME_SET); -DEFINE(HOSTFS_ATTR_MTIME_SET, ATTR_MTIME_SET); diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c index a69873aa697f..d38447e39d5e 100644 --- a/arch/um/kernel/asm-offsets.c +++ b/arch/um/kernel/asm-offsets.c @@ -1,3 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #define COMPILE_OFFSETS +#include +#include +#include +#include +#include +#include +#include +#include +#include -#include +/* workaround for a warning with -Wmissing-prototypes */ +void foo(void); + +void foo(void) +{ + DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); + + DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE); + DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK); + DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT); + + DEFINE(UM_GFP_KERNEL, GFP_KERNEL); + DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC); + + DEFINE(UM_THREAD_SIZE, THREAD_SIZE); + + DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC); + DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC); + + DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES); + + DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE); + + DEFINE(HOSTFS_ATTR_MODE, ATTR_MODE); + DEFINE(HOSTFS_ATTR_UID, ATTR_UID); + DEFINE(HOSTFS_ATTR_GID, ATTR_GID); + DEFINE(HOSTFS_ATTR_SIZE, ATTR_SIZE); + DEFINE(HOSTFS_ATTR_ATIME, ATTR_ATIME); + DEFINE(HOSTFS_ATTR_MTIME, ATTR_MTIME); + DEFINE(HOSTFS_ATTR_CTIME, ATTR_CTIME); + DEFINE(HOSTFS_ATTR_ATIME_SET, ATTR_ATIME_SET); + DEFINE(HOSTFS_ATTR_MTIME_SET, ATTR_MTIME_SET); +} diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h deleted file mode 100644 index ee6b44ef2217..000000000000 --- a/arch/x86/um/shared/sysdep/kernel-offsets.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* workaround for a warning with -Wmissing-prototypes */ -void foo(void); - -void foo(void) -{ -#include -} From 019cde8fc9a7a7b509fd0dd52b14599dd884bc59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 13 Oct 2025 12:40:18 +0200 Subject: [PATCH 04/33] x86/um/vdso: Fix prototype of clock_gettime() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clock_gettime() system call takes a pointer to 'struct __kernel_timespec', not 'struct __kernel_old_timespec'. Right now this is not an issue as the vDSO never works with the actual struct but only passes it through to the kernel. Fix the prototype for consistency with the system call. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251013-uml-vdso-cleanup-v1-1-a079c7adcc69@weissschuh.net Signed-off-by: Johannes Berg --- arch/x86/um/vdso/um_vdso.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c index cbae2584124f..5cadcc04d422 100644 --- a/arch/x86/um/vdso/um_vdso.c +++ b/arch/x86/um/vdso/um_vdso.c @@ -14,12 +14,12 @@ #include /* workaround for -Wmissing-prototypes warnings */ -int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts); +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); __kernel_old_time_t __vdso_time(__kernel_old_time_t *t); long __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused); -int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts) +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { long ret; @@ -30,7 +30,7 @@ int __vdso_clock_gettime(clockid_t clock, struct __kernel_old_timespec *ts) return ret; } -int clock_gettime(clockid_t, struct __kernel_old_timespec *) +int clock_gettime(clockid_t, struct __kernel_timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) From 8c0fbd6ae408448a41f7ea6ad4872efc971349db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 13 Oct 2025 12:40:19 +0200 Subject: [PATCH 05/33] x86/um/vdso: Use prototypes from generic vDSO headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic vDSO library provides a convenient header for the vDSO function prototypes, use it. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251013-uml-vdso-cleanup-v1-2-a079c7adcc69@weissschuh.net Signed-off-by: Johannes Berg --- arch/x86/um/vdso/um_vdso.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c index 5cadcc04d422..02d41fdb5655 100644 --- a/arch/x86/um/vdso/um_vdso.c +++ b/arch/x86/um/vdso/um_vdso.c @@ -9,14 +9,12 @@ /* Disable profiling for userspace code */ #define DISABLE_BRANCH_PROFILING +#include #include #include #include /* workaround for -Wmissing-prototypes warnings */ -int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); -int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); -__kernel_old_time_t __vdso_time(__kernel_old_time_t *t); long __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused); int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) From 12fe820fae37b95dd55c226346b7ed3fbebac79c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 13 Oct 2025 12:40:20 +0200 Subject: [PATCH 06/33] x86/um/vdso: Panic when vDSO can not be allocated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vDSO address is added to the userspace auxiliary vectors even if the vDSO was not allocated. When accessing the page, userspace processes will crash. Enforce that the allocation works. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251013-uml-vdso-cleanup-v1-3-a079c7adcc69@weissschuh.net Signed-off-by: Johannes Berg --- arch/x86/um/vdso/vma.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c index dc8dfb2abd80..51a2b9f2eca9 100644 --- a/arch/x86/um/vdso/vma.c +++ b/arch/x86/um/vdso/vma.c @@ -10,7 +10,6 @@ #include #include -static unsigned int __read_mostly vdso_enabled = 1; unsigned long um_vdso_addr; static struct page *um_vdso; @@ -25,17 +24,11 @@ static int __init init_vdso(void) um_vdso = alloc_page(GFP_KERNEL); if (!um_vdso) - goto oom; + panic("Cannot allocate vdso\n"); copy_page(page_address(um_vdso), vdso_start); return 0; - -oom: - printk(KERN_ERR "Cannot allocate vdso\n"); - vdso_enabled = 0; - - return -ENOMEM; } subsys_initcall(init_vdso); @@ -48,9 +41,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) .pages = &um_vdso, }; - if (!vdso_enabled) - return 0; - if (mmap_write_lock_killable(mm)) return -EINTR; From 3c9b904f9033fb250db72d258bbdec791dc89405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 13 Oct 2025 12:40:21 +0200 Subject: [PATCH 07/33] x86/um/vdso: Drop VDSO64-y from Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This symbol is unnecessary, remove it. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251013-uml-vdso-cleanup-v1-4-a079c7adcc69@weissschuh.net Signed-off-by: Johannes Berg --- arch/x86/um/vdso/Makefile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 7478d11dacb7..8a7c8b37cb6e 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -3,16 +3,13 @@ # Building vDSO images for x86. # -VDSO64-y := y - -vdso-install-$(VDSO64-y) += vdso.so - +vdso-install-y += vdso.so # files to link into the vdso vobjs-y := vdso-note.o um_vdso.o # files to link into kernel -obj-$(VDSO64-y) += vdso.o vma.o +obj-y += vdso.o vma.o vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) From 691ff5914835a65151f3793e8b4dea0b230d0b6b Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 13:45:16 +0800 Subject: [PATCH 08/33] um: Make host_task_size a local variable Currently, host_task_size is a global variable, but it is only used in linux_main() to compute stub_start and task_size. Make it a local variable to limit its scope to where it is actually needed. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027054519.1996090-2-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/um/include/shared/as-layout.h | 1 - arch/um/kernel/um_arch.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h index 7c7e17bce403..02ef258e3395 100644 --- a/arch/um/include/shared/as-layout.h +++ b/arch/um/include/shared/as-layout.h @@ -44,7 +44,6 @@ extern unsigned long start_vm; extern unsigned long brk_start; -extern unsigned long host_task_size; extern unsigned long stub_start; extern int linux_main(int argc, char **argv, char **envp); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index ed2f67848a50..4b14f1ea2690 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -252,8 +252,6 @@ unsigned long stub_start; unsigned long task_size; EXPORT_SYMBOL(task_size); -unsigned long host_task_size; - unsigned long brk_start; unsigned long end_iomem; EXPORT_SYMBOL(end_iomem); @@ -306,6 +304,7 @@ int __init linux_main(int argc, char **argv, char **envp) { unsigned long avail, diff; unsigned long virtmem_size, max_physmem; + unsigned long host_task_size; unsigned long stack; unsigned int i; int add; From de203267483de10bdfc3ec74fac246e879361819 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 13:45:17 +0800 Subject: [PATCH 09/33] um: Use PAGE_ALIGN() for address alignment Use PAGE_ALIGN() instead of open-coded calculations. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027054519.1996090-3-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/um/kernel/um_arch.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 4b14f1ea2690..fcabef8c7224 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -294,10 +294,7 @@ static unsigned long __init get_top_address(char **envp) top_addr = (unsigned long) envp[i]; } - top_addr &= ~(UM_KERN_PAGE_SIZE - 1); - top_addr += UM_KERN_PAGE_SIZE; - - return top_addr; + return PAGE_ALIGN(top_addr + 1); } int __init linux_main(int argc, char **argv, char **envp) @@ -366,8 +363,8 @@ int __init linux_main(int argc, char **argv, char **envp) setup_machinename(init_utsname()->machine); - physmem_size = (physmem_size + PAGE_SIZE - 1) & PAGE_MASK; - iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; + physmem_size = PAGE_ALIGN(physmem_size); + iomem_size = PAGE_ALIGN(iomem_size); max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; if (physmem_size > max_physmem) { From 9c84022c1d1f0cfd8f02fa8e2b275ccd361891d2 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 13:45:18 +0800 Subject: [PATCH 10/33] um: Replace UML_ROUND_UP() with PAGE_ALIGN() Although UML_ROUND_UP() is defined in a shared header file, it depends on the PAGE_SIZE and PAGE_MASK macros, so it can only be used in kernel code. Considering its name is not very clear and its functionality is the same as PAGE_ALIGN(), replace its usages with a direct call to PAGE_ALIGN() and remove it. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027054519.1996090-4-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/um/include/shared/kern_util.h | 3 --- arch/um/kernel/mem.c | 2 +- arch/um/kernel/um_arch.c | 5 ++--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 00ca3e12fd9a..949a03c7861e 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -15,9 +15,6 @@ extern int uml_exitcode; extern int kmalloc_ok; -#define UML_ROUND_UP(addr) \ - ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK) - extern unsigned long alloc_stack(int order, int atomic); extern void free_stack(unsigned long stack, int order); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 32e3b1972dc1..19d40b58eac4 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -71,7 +71,7 @@ void __init arch_mm_preinit(void) /* Map in the area just after the brk now that kmalloc is about * to be turned on. */ - brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); + brk_end = PAGE_ALIGN((unsigned long) sbrk(0)); map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); memblock_free((void *)brk_end, uml_reserved - brk_end); uml_reserved = brk_end; diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index fcabef8c7224..6f9a49e6c6a0 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -348,12 +348,11 @@ int __init linux_main(int argc, char **argv, char **envp) * so they actually get what they asked for. This should * add zero for non-exec shield users */ - - diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + diff = PAGE_ALIGN(brk_start) - PAGE_ALIGN((unsigned long) &_end); if (diff > 1024 * 1024) { os_info("Adding %ld bytes to physical memory to account for " "exec-shield gap\n", diff); - physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end); + physmem_size += diff; } uml_physmem = (unsigned long) __binary_start & PAGE_MASK; From a7f7dbae94a5ae5cfbf2375e0d952c54b069fd7f Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 13:45:19 +0800 Subject: [PATCH 11/33] um: Remove file-based iomem emulation support The file-based iomem emulation was introduced to support writing paravirtualized drivers based on emulated iomem regions. However, the only driver that makes use of it is an example driver called mmapper, which was written over two decades ago. We now have several modern device emulation mechanisms, such as vhost-user-based virtio-uml. Remove the file-based iomem emulation support to reduce the maintenance burden. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027054519.1996090-5-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/um/Kconfig | 6 -- arch/um/drivers/Makefile | 1 - arch/um/drivers/mmapper_kern.c | 135 ----------------------------- arch/um/include/asm/pgtable.h | 4 +- arch/um/include/shared/kern_util.h | 1 - arch/um/include/shared/mem_user.h | 13 --- arch/um/kernel/mem.c | 2 +- arch/um/kernel/physmem.c | 71 --------------- arch/um/kernel/um_arch.c | 7 +- arch/um/os-Linux/skas/process.c | 7 -- arch/um/os-Linux/start_up.c | 50 ----------- 11 files changed, 4 insertions(+), 293 deletions(-) delete mode 100644 arch/um/drivers/mmapper_kern.c diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 49781bee7905..0b4d00596a8c 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -200,12 +200,6 @@ config KERNEL_STACK_ORDER increase in the size of the state which needs to be saved when handling signals. -config MMAPPER - tristate "iomem emulation driver" - help - This driver allows a host file to be used as emulated IO memory inside - UML. - config PGTABLE_LEVELS int default 4 if 64BIT diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index 6bf8cbf71d3c..36dc57840084 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -29,7 +29,6 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o obj-$(CONFIG_UML_NET_VECTOR) += vector.o obj-$(CONFIG_MCONSOLE) += mconsole.o -obj-$(CONFIG_MMAPPER) += mmapper_kern.o obj-$(CONFIG_BLK_DEV_UBD) += ubd.o obj-$(CONFIG_UML_SOUND) += hostaudio.o obj-$(CONFIG_NULL_CHAN) += null.o diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c deleted file mode 100644 index 807cd3358740..000000000000 --- a/arch/um/drivers/mmapper_kern.c +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * arch/um/drivers/mmapper_kern.c - * - * BRIEF MODULE DESCRIPTION - * - * Copyright (C) 2000 RidgeRun, Inc. - * Author: RidgeRun, Inc. - * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/* These are set in mmapper_init, which is called at boot time */ -static unsigned long mmapper_size; -static unsigned long p_buf; -static char *v_buf; - -static ssize_t mmapper_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) -{ - return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size); -} - -static ssize_t mmapper_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - if (*ppos > mmapper_size) - return -EINVAL; - - return simple_write_to_buffer(v_buf, mmapper_size, ppos, buf, count); -} - -static long mmapper_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - return -ENOIOCTLCMD; -} - -static int mmapper_mmap(struct file *file, struct vm_area_struct *vma) -{ - int ret = -EINVAL; - int size; - - if (vma->vm_pgoff != 0) - goto out; - - size = vma->vm_end - vma->vm_start; - if (size > mmapper_size) - return -EFAULT; - - /* - * XXX A comment above remap_pfn_range says it should only be - * called when the mm semaphore is held - */ - if (remap_pfn_range(vma, vma->vm_start, p_buf >> PAGE_SHIFT, size, - vma->vm_page_prot)) - goto out; - ret = 0; -out: - return ret; -} - -static int mmapper_open(struct inode *inode, struct file *file) -{ - return 0; -} - -static int mmapper_release(struct inode *inode, struct file *file) -{ - return 0; -} - -static const struct file_operations mmapper_fops = { - .owner = THIS_MODULE, - .read = mmapper_read, - .write = mmapper_write, - .unlocked_ioctl = mmapper_ioctl, - .mmap = mmapper_mmap, - .open = mmapper_open, - .release = mmapper_release, - .llseek = default_llseek, -}; - -/* - * No locking needed - only used (and modified) by below initcall and exitcall. - */ -static struct miscdevice mmapper_dev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "mmapper", - .fops = &mmapper_fops -}; - -static int __init mmapper_init(void) -{ - int err; - - printk(KERN_INFO "Mapper v0.1\n"); - - v_buf = (char *) find_iomem("mmapper", &mmapper_size); - if (mmapper_size == 0) { - printk(KERN_ERR "mmapper_init - find_iomem failed\n"); - return -ENODEV; - } - p_buf = __pa(v_buf); - - err = misc_register(&mmapper_dev); - if (err) { - printk(KERN_ERR "mmapper - misc_register failed, err = %d\n", - err); - return err; - } - return 0; -} - -static void __exit mmapper_exit(void) -{ - misc_deregister(&mmapper_dev); -} - -module_init(mmapper_init); -module_exit(mmapper_exit); - -MODULE_AUTHOR("Greg Lonnon "); -MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 24fdea6f88c3..6ca7583003cd 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -45,10 +45,10 @@ extern unsigned long *empty_zero_page; * area for the same reason. ;) */ -extern unsigned long end_iomem; +#include /* for high_physmem */ #define VMALLOC_OFFSET (__va_space) -#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) +#define VMALLOC_START ((high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) #define VMALLOC_END (TASK_SIZE-2*PAGE_SIZE) #define MODULES_VADDR VMALLOC_START #define MODULES_END VMALLOC_END diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 949a03c7861e..3ca589f3cd97 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -39,7 +39,6 @@ extern void uml_pm_wake(void); extern int start_uml(void); extern void paging_init(void); -extern int parse_iomem(char *str, int *add); extern void uml_cleanup(void); extern void do_uml_exitcalls(void); diff --git a/arch/um/include/shared/mem_user.h b/arch/um/include/shared/mem_user.h index d4727efcf23d..8a5b72872ff8 100644 --- a/arch/um/include/shared/mem_user.h +++ b/arch/um/include/shared/mem_user.h @@ -32,21 +32,8 @@ #ifndef _MEM_USER_H #define _MEM_USER_H -struct iomem_region { - struct iomem_region *next; - char *driver; - int fd; - int size; - unsigned long phys; - unsigned long virt; -}; - -extern struct iomem_region *iomem_regions; -extern int iomem_size; - #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) -extern unsigned long find_iomem(char *driver, unsigned long *len_out); extern void setup_physmem(unsigned long start, unsigned long usable, unsigned long len); extern void map_memory(unsigned long virt, unsigned long phys, diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 19d40b58eac4..dc938715ec9d 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -197,7 +197,7 @@ void __init paging_init(void) panic("%s: Failed to allocate %lu bytes align=%lx\n", __func__, PAGE_SIZE, PAGE_SIZE); - max_zone_pfn[ZONE_NORMAL] = end_iomem >> PAGE_SHIFT; + max_zone_pfn[ZONE_NORMAL] = high_physmem >> PAGE_SHIFT; free_area_init(max_zone_pfn); #if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index af02b5f9911d..ae6ca373c261 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -105,19 +105,6 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out) fd = physmem_fd; *offset_out = phys; } - else if (phys < __pa(end_iomem)) { - struct iomem_region *region = iomem_regions; - - while (region != NULL) { - if ((phys >= region->phys) && - (phys < region->phys + region->size)) { - fd = region->fd; - *offset_out = phys - region->phys; - break; - } - region = region->next; - } - } return fd; } @@ -140,61 +127,3 @@ __uml_setup("mem=", uml_mem_setup, " be more, and the excess, if it's ever used, will just be swapped out.\n" " Example: mem=64M\n\n" ); - -__uml_setup("iomem=", parse_iomem, -"iomem=,\n" -" Configure as an IO memory region named .\n\n" -); - -/* - * This list is constructed in parse_iomem and addresses filled in - * setup_iomem, both of which run during early boot. Afterwards, it's - * unchanged. - */ -struct iomem_region *iomem_regions; - -/* Initialized in parse_iomem and unchanged thereafter */ -int iomem_size; - -unsigned long find_iomem(char *driver, unsigned long *len_out) -{ - struct iomem_region *region = iomem_regions; - - while (region != NULL) { - if (!strcmp(region->driver, driver)) { - *len_out = region->size; - return region->virt; - } - - region = region->next; - } - - return 0; -} -EXPORT_SYMBOL(find_iomem); - -static int setup_iomem(void) -{ - struct iomem_region *region = iomem_regions; - unsigned long iomem_start = high_physmem + PAGE_SIZE; - int err; - - while (region != NULL) { - err = os_map_memory((void *) iomem_start, region->fd, 0, - region->size, 1, 1, 0); - if (err) - printk(KERN_ERR "Mapping iomem region for driver '%s' " - "failed, errno = %d\n", region->driver, -err); - else { - region->virt = iomem_start; - region->phys = __pa(region->virt); - } - - iomem_start += region->size + PAGE_SIZE; - region = region->next; - } - - return 0; -} - -__initcall(setup_iomem); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 6f9a49e6c6a0..cf06bb732ed8 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -253,8 +253,6 @@ unsigned long task_size; EXPORT_SYMBOL(task_size); unsigned long brk_start; -unsigned long end_iomem; -EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) @@ -363,9 +361,7 @@ int __init linux_main(int argc, char **argv, char **envp) setup_machinename(init_utsname()->machine); physmem_size = PAGE_ALIGN(physmem_size); - iomem_size = PAGE_ALIGN(iomem_size); - - max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; + max_physmem = TASK_SIZE - uml_physmem - MIN_VMALLOC; if (physmem_size > max_physmem) { physmem_size = max_physmem; os_info("Physical memory size shrunk to %llu bytes\n", @@ -373,7 +369,6 @@ int __init linux_main(int argc, char **argv, char **envp) } high_physmem = uml_physmem + physmem_size; - end_iomem = high_physmem + iomem_size; start_vm = VMALLOC_START; diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 0bc10cd4cbed..820846ff7179 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -298,7 +298,6 @@ static int userspace_tramp(void *data) .seccomp = using_seccomp, .stub_start = STUB_START, }; - struct iomem_region *iomem; int ret; if (using_seccomp) { @@ -332,12 +331,6 @@ static int userspace_tramp(void *data) fcntl(init_data.stub_data_fd, F_SETFD, 0); - /* In SECCOMP mode, these FDs are passed when needed */ - if (!using_seccomp) { - for (iomem = iomem_regions; iomem; iomem = iomem->next) - fcntl(iomem->fd, F_SETFD, 0); - } - /* dup2 signaling FD/socket to STDIN */ if (dup2(tramp_data->sockpair[0], 0) < 0) exit(3); diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index a827c2e01aa5..8b19dca83f71 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -489,53 +489,3 @@ void __init os_early_checks(void) fatal("Failed to initialize default registers"); stop_ptraced_child(pid, 1); } - -int __init parse_iomem(char *str, int *add) -{ - struct iomem_region *new; - struct stat64 buf; - char *file, *driver; - int fd, size; - - driver = str; - file = strchr(str,','); - if (file == NULL) { - os_warn("parse_iomem : failed to parse iomem\n"); - goto out; - } - *file = '\0'; - file++; - fd = open(file, O_RDWR, 0); - if (fd < 0) { - perror("parse_iomem - Couldn't open io file"); - goto out; - } - - if (fstat64(fd, &buf) < 0) { - perror("parse_iomem - cannot stat_fd file"); - goto out_close; - } - - new = malloc(sizeof(*new)); - if (new == NULL) { - perror("Couldn't allocate iomem_region struct"); - goto out_close; - } - - size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1); - - *new = ((struct iomem_region) { .next = iomem_regions, - .driver = driver, - .fd = fd, - .size = size, - .phys = 0, - .virt = 0 }); - iomem_regions = new; - iomem_size += new->size + UM_KERN_PAGE_SIZE; - - return 0; - out_close: - close(fd); - out: - return 1; -} From 6aaf00d14e6c80f4664932d261be7a03834f558b Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 08:18:08 +0800 Subject: [PATCH 12/33] um: Do not disable kmalloc in initial_thread_cb() Currently, initial_thread_cb() temporarily disables kmalloc when it invokes the callback, allowing the callback to bypass kmalloc. This is unnecessary for the current users of initial_thread_cb(), and we should avoid memory allocations that are not under the control of the UML kernel. Therefore, let's stop temporarily disabling kmalloc in initial_thread_cb(). Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027001815.1666872-2-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/um/kernel/process.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 13d461712c99..0a9249b2b86b 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -187,11 +187,7 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) void initial_thread_cb(void (*proc)(void *), void *arg) { - int save_kmalloc_ok = kmalloc_ok; - - kmalloc_ok = 0; initial_thread_cb_skas(proc, arg); - kmalloc_ok = save_kmalloc_ok; } int arch_dup_task_struct(struct task_struct *dst, From 9e5a9f1c9b336871c8e76c1cefd85182c5b58541 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 08:18:09 +0800 Subject: [PATCH 13/33] um: Turn signals_* into thread-local variables Turn signals_enabled, signals_pending and signals_active into thread-local variables. This enables us to control and track signals independently on each CPU thread. This is a preparation for adding SMP support. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027001815.1666872-3-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/um/include/asm/irqflags.h | 4 ++-- arch/um/include/shared/longjmp.h | 3 +-- arch/um/include/shared/os.h | 1 + arch/um/kernel/ksyms.c | 2 +- arch/um/os-Linux/signal.c | 11 ++++++++--- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h index 1e69ef5bc35e..31e49e0894c5 100644 --- a/arch/um/include/asm/irqflags.h +++ b/arch/um/include/asm/irqflags.h @@ -2,7 +2,7 @@ #ifndef __UM_IRQFLAGS_H #define __UM_IRQFLAGS_H -extern int signals_enabled; +int um_get_signals(void); int um_set_signals(int enable); void block_signals(void); void unblock_signals(void); @@ -10,7 +10,7 @@ void unblock_signals(void); #define arch_local_save_flags arch_local_save_flags static inline unsigned long arch_local_save_flags(void) { - return signals_enabled; + return um_get_signals(); } #define arch_local_irq_restore arch_local_irq_restore diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h index 8863319039f3..c53e43d980c8 100644 --- a/arch/um/include/shared/longjmp.h +++ b/arch/um/include/shared/longjmp.h @@ -5,7 +5,6 @@ #include #include -extern int signals_enabled; extern int setjmp(jmp_buf); extern void longjmp(jmp_buf, int); @@ -15,7 +14,7 @@ extern void longjmp(jmp_buf, int); #define UML_SETJMP(buf) ({ \ int n, enable; \ - enable = *(volatile int *)&signals_enabled; \ + enable = um_get_signals(); \ n = setjmp(*buf); \ if(n != 0) \ um_set_signals_trace(enable); \ diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index b35cc8ce333b..324d4eed3385 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -243,6 +243,7 @@ extern void send_sigio_to_self(void); extern int change_sig(int signal, int on); extern void block_signals(void); extern void unblock_signals(void); +extern int um_get_signals(void); extern int um_set_signals(int enable); extern int um_set_signals_trace(int enable); extern void deliver_alarm(void); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index f2fb77da08cf..96314c31e61c 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -6,8 +6,8 @@ #include #include +EXPORT_SYMBOL(um_get_signals); EXPORT_SYMBOL(um_set_signals); -EXPORT_SYMBOL(signals_enabled); EXPORT_SYMBOL(os_stat_fd); EXPORT_SYMBOL(os_stat_file); diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 11f07f498270..58da8c6ece98 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -68,12 +68,12 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGCHLD_BIT 2 #define SIGCHLD_MASK (1 << SIGCHLD_BIT) -int signals_enabled; +static __thread int signals_enabled; #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; #endif -static unsigned int signals_pending; -static unsigned int signals_active = 0; +static __thread unsigned int signals_pending; +static __thread unsigned int signals_active; static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { @@ -342,6 +342,11 @@ void unblock_signals(void) } } +int um_get_signals(void) +{ + return signals_enabled; +} + int um_set_signals(int enable) { int ret; From 2670917c2fc8902558f3aba4f41e5cc5bf6e18fa Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 08:18:10 +0800 Subject: [PATCH 14/33] um: Determine sleep based on need_resched() With SMP and NO_HZ enabled, the CPU may still need to sleep even if the timer is disarmed. Switch to deciding whether to sleep based on pending resched. Additionally, because disabling IRQs does not block SIGALRM, it is also necessary to check for any pending timer alarms. This is a preparation for adding SMP support. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027001815.1666872-4-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/um/include/shared/kern_util.h | 1 + arch/um/kernel/process.c | 5 +++++ arch/um/os-Linux/internal.h | 5 +++++ arch/um/os-Linux/signal.c | 6 ++++++ arch/um/os-Linux/time.c | 15 +++++++++------ 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 3ca589f3cd97..38321188c04c 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -51,6 +51,7 @@ extern int __uml_cant_sleep(void); extern int get_current_pid(void); extern int copy_from_user_proc(void *to, void *from, int size); extern char *uml_strdup(const char *string); +int uml_need_resched(void); extern unsigned long to_irq_stack(unsigned long *mask_out); extern unsigned long from_irq_stack(int nested); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 0a9249b2b86b..3b28048f269c 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -223,6 +223,11 @@ int __uml_cant_sleep(void) { /* Is in_interrupt() really needed? */ } +int uml_need_resched(void) +{ + return need_resched(); +} + extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; void do_uml_exitcalls(void) diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h index 5d8d3b0817a9..c2c7a0dc673c 100644 --- a/arch/um/os-Linux/internal.h +++ b/arch/um/os-Linux/internal.h @@ -15,6 +15,11 @@ void scan_elf_aux(char **envp); */ void check_tmpexec(void); +/* + * signal.c + */ +int timer_alarm_pending(void); + /* * skas/process.c */ diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 58da8c6ece98..554a87dd32cc 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -20,6 +20,7 @@ #include #include #include +#include "internal.h" void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = { [SIGTRAP] = relay_signal, @@ -159,6 +160,11 @@ void timer_set_signal_handler(void) set_handler(SIGALRM); } +int timer_alarm_pending(void) +{ + return !!(signals_pending & SIGALRM_MASK); +} + void set_sigstack(void *sig_stack, int size) { stack_t stack = { diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index 4d5591d96d8c..f3d4547e5227 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -15,6 +15,7 @@ #include #include #include +#include "internal.h" static timer_t event_high_res_timer = 0; @@ -98,18 +99,20 @@ long long os_nsecs(void) */ void os_idle_sleep(void) { - struct itimerspec its; sigset_t set, old; - /* block SIGALRM while we analyze the timer state */ + /* Block SIGALRM while performing the need_resched check. */ sigemptyset(&set); sigaddset(&set, SIGALRM); sigprocmask(SIG_BLOCK, &set, &old); - /* check the timer, and if it'll fire then wait for it */ - timer_gettime(event_high_res_timer, &its); - if (its.it_value.tv_sec || its.it_value.tv_nsec) + /* + * Because disabling IRQs does not block SIGALRM, it is also + * necessary to check for any pending timer alarms. + */ + if (!uml_need_resched() && !timer_alarm_pending()) sigsuspend(&old); - /* either way, restore the signal mask */ + + /* Restore the signal mask. */ sigprocmask(SIG_UNBLOCK, &set, NULL); } From 9c82de55d4783e906f18219f833ad97fd8d9c5df Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 08:18:11 +0800 Subject: [PATCH 15/33] um: Define timers on a per-CPU basis Define timers on a per-CPU basis to enable each CPU to have its own timer. This is a preparation for adding SMP support. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027001815.1666872-5-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/um/include/linux/time-internal.h | 3 ++ arch/um/include/shared/os.h | 6 +-- arch/um/kernel/irq.c | 2 +- arch/um/kernel/time.c | 58 +++++++++++++++++++-------- arch/um/os-Linux/main.c | 2 +- arch/um/os-Linux/time.c | 29 +++++++++----- 6 files changed, 69 insertions(+), 31 deletions(-) diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index 138908b999d7..c274eb5ad55e 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -90,4 +90,7 @@ extern unsigned long tt_extra_sched_jiffies; * which is intentional since we really shouldn't link it in that case. */ void time_travel_ndelay(unsigned long nsec); + +int um_setup_timer(void); + #endif /* __TIMER_INTERNAL_H__ */ diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 324d4eed3385..0ca6e4548671 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -269,9 +269,9 @@ extern void os_warn(const char *fmt, ...) /* time.c */ extern void os_idle_sleep(void); extern int os_timer_create(void); -extern int os_timer_set_interval(unsigned long long nsecs); -extern int os_timer_one_shot(unsigned long long nsecs); -extern void os_timer_disable(void); +extern int os_timer_set_interval(int cpu, unsigned long long nsecs); +extern int os_timer_one_shot(int cpu, unsigned long long nsecs); +extern void os_timer_disable(int cpu); extern long long os_persistent_clock_emulation(void); extern long long os_nsecs(void); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index d69d137a0334..e95f6c5a259d 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -683,7 +683,7 @@ void __init init_IRQ(void) { int i; - irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); + irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_percpu_irq); for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 17da0a870650..b344a36b44eb 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -625,9 +625,10 @@ void time_travel_sleep(void) * controller application. */ unsigned long long next = S64_MAX; + int cpu = raw_smp_processor_id(); if (time_travel_mode == TT_MODE_BASIC) - os_timer_disable(); + os_timer_disable(cpu); time_travel_update_time(next, true); @@ -638,9 +639,9 @@ void time_travel_sleep(void) * This is somewhat wrong - we should get the first * one sooner like the os_timer_one_shot() below... */ - os_timer_set_interval(time_travel_timer_interval); + os_timer_set_interval(cpu, time_travel_timer_interval); } else { - os_timer_one_shot(time_travel_timer_event.time - next); + os_timer_one_shot(cpu, time_travel_timer_event.time - next); } } } @@ -758,6 +759,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time); #define time_travel_del_event(e) do { } while (0) #endif +static struct clock_event_device timer_clockevent[NR_CPUS]; + void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { unsigned long flags; @@ -780,12 +783,14 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) static int itimer_shutdown(struct clock_event_device *evt) { + int cpu = evt - &timer_clockevent[0]; + if (time_travel_mode != TT_MODE_OFF) time_travel_del_event(&time_travel_timer_event); if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - os_timer_disable(); + os_timer_disable(cpu); return 0; } @@ -793,6 +798,7 @@ static int itimer_shutdown(struct clock_event_device *evt) static int itimer_set_periodic(struct clock_event_device *evt) { unsigned long long interval = NSEC_PER_SEC / HZ; + int cpu = evt - &timer_clockevent[0]; if (time_travel_mode != TT_MODE_OFF) { time_travel_del_event(&time_travel_timer_event); @@ -805,7 +811,7 @@ static int itimer_set_periodic(struct clock_event_device *evt) if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - os_timer_set_interval(interval); + os_timer_set_interval(cpu, interval); return 0; } @@ -825,7 +831,7 @@ static int itimer_next_event(unsigned long delta, if (time_travel_mode != TT_MODE_INFCPU && time_travel_mode != TT_MODE_EXTERNAL) - return os_timer_one_shot(delta); + return os_timer_one_shot(raw_smp_processor_id(), delta); return 0; } @@ -835,10 +841,9 @@ static int itimer_one_shot(struct clock_event_device *evt) return itimer_next_event(0, evt); } -static struct clock_event_device timer_clockevent = { +static struct clock_event_device _timer_clockevent = { .name = "posix-timer", .rating = 250, - .cpumask = cpu_possible_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_state_shutdown = itimer_shutdown, @@ -856,6 +861,9 @@ static struct clock_event_device timer_clockevent = { static irqreturn_t um_timer(int irq, void *dev) { + int cpu = raw_smp_processor_id(); + struct clock_event_device *evt = &timer_clockevent[cpu]; + /* * Interrupt the (possibly) running userspace process, technically this * should only happen if userspace is currently executing. @@ -867,7 +875,7 @@ static irqreturn_t um_timer(int irq, void *dev) get_current()->mm) os_alarm_process(get_current()->mm->context.id.pid); - (*timer_clockevent.event_handler)(&timer_clockevent); + evt->event_handler(evt); return IRQ_HANDLED; } @@ -904,7 +912,24 @@ static struct clocksource timer_clocksource = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static void __init um_timer_setup(void) +int um_setup_timer(void) +{ + int cpu = raw_smp_processor_id(); + struct clock_event_device *evt = &timer_clockevent[cpu]; + int err; + + err = os_timer_create(); + if (err) + return err; + + memcpy(evt, &_timer_clockevent, sizeof(*evt)); + evt->cpumask = cpumask_of(cpu); + clockevents_register_device(evt); + + return 0; +} + +static void __init um_timer_init(void) { int err; @@ -913,8 +938,8 @@ static void __init um_timer_setup(void) printk(KERN_ERR "register_timer : request_irq failed - " "errno = %d\n", -err); - err = os_timer_create(); - if (err != 0) { + err = um_setup_timer(); + if (err) { printk(KERN_ERR "creation of timer failed - errno = %d\n", -err); return; } @@ -924,7 +949,6 @@ static void __init um_timer_setup(void) printk(KERN_ERR "clocksource_register_hz returned %d\n", err); return; } - clockevents_register_device(&timer_clockevent); } void read_persistent_clock64(struct timespec64 *ts) @@ -945,7 +969,7 @@ void read_persistent_clock64(struct timespec64 *ts) void __init time_init(void) { timer_set_signal_handler(); - late_time_init = um_timer_setup; + late_time_init = um_timer_init; } #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT @@ -961,21 +985,21 @@ static int setup_time_travel(char *str) { if (strcmp(str, "=inf-cpu") == 0) { time_travel_mode = TT_MODE_INFCPU; - timer_clockevent.name = "time-travel-timer-infcpu"; + _timer_clockevent.name = "time-travel-timer-infcpu"; timer_clocksource.name = "time-travel-clock"; return 1; } if (strncmp(str, "=ext:", 5) == 0) { time_travel_mode = TT_MODE_EXTERNAL; - timer_clockevent.name = "time-travel-timer-external"; + _timer_clockevent.name = "time-travel-timer-external"; timer_clocksource.name = "time-travel-clock-external"; return time_travel_connect_external(str + 5); } if (!*str) { time_travel_mode = TT_MODE_BASIC; - timer_clockevent.name = "time-travel-timer"; + _timer_clockevent.name = "time-travel-timer"; timer_clocksource.name = "time-travel-clock"; return 1; } diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 3c63ce19e3bf..730723106228 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -171,7 +171,7 @@ int __init main(int argc, char **argv, char **envp) */ /* stop timers and set timer signal to be ignored */ - os_timer_disable(); + os_timer_disable(0); /* disable SIGIO for the fds and set SIGIO to be ignored */ err = deactivate_all_fds(); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index f3d4547e5227..e0197bfe4ac9 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -17,7 +17,7 @@ #include #include "internal.h" -static timer_t event_high_res_timer = 0; +static timer_t event_high_res_timer[CONFIG_NR_CPUS] = { 0 }; static inline long long timespec_to_ns(const struct timespec *ts) { @@ -32,20 +32,30 @@ long long os_persistent_clock_emulation(void) return timespec_to_ns(&realtime_tp); } +#ifndef sigev_notify_thread_id +#define sigev_notify_thread_id _sigev_un._tid +#endif + /** * os_timer_create() - create an new posix (interval) timer */ int os_timer_create(void) { - timer_t *t = &event_high_res_timer; + timer_t *t = &event_high_res_timer[0]; + struct sigevent sev = { + .sigev_notify = SIGEV_THREAD_ID, + .sigev_signo = SIGALRM, + .sigev_value.sival_ptr = t, + .sigev_notify_thread_id = gettid(), + }; - if (timer_create(CLOCK_MONOTONIC, NULL, t) == -1) + if (timer_create(CLOCK_MONOTONIC, &sev, t) == -1) return -1; return 0; } -int os_timer_set_interval(unsigned long long nsecs) +int os_timer_set_interval(int cpu, unsigned long long nsecs) { struct itimerspec its; @@ -55,13 +65,13 @@ int os_timer_set_interval(unsigned long long nsecs) its.it_interval.tv_sec = nsecs / UM_NSEC_PER_SEC; its.it_interval.tv_nsec = nsecs % UM_NSEC_PER_SEC; - if (timer_settime(event_high_res_timer, 0, &its, NULL) == -1) + if (timer_settime(event_high_res_timer[cpu], 0, &its, NULL) == -1) return -errno; return 0; } -int os_timer_one_shot(unsigned long long nsecs) +int os_timer_one_shot(int cpu, unsigned long long nsecs) { struct itimerspec its = { .it_value.tv_sec = nsecs / UM_NSEC_PER_SEC, @@ -71,19 +81,20 @@ int os_timer_one_shot(unsigned long long nsecs) .it_interval.tv_nsec = 0, // we cheat here }; - timer_settime(event_high_res_timer, 0, &its, NULL); + timer_settime(event_high_res_timer[cpu], 0, &its, NULL); return 0; } /** * os_timer_disable() - disable the posix (interval) timer + * @cpu: the CPU for which the timer is to be disabled */ -void os_timer_disable(void) +void os_timer_disable(int cpu) { struct itimerspec its; memset(&its, 0, sizeof(struct itimerspec)); - timer_settime(event_high_res_timer, 0, &its, NULL); + timer_settime(event_high_res_timer[cpu], 0, &its, NULL); } long long os_nsecs(void) From 1e4ee5135d814fe4785890790cec81c3132888fb Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 08:18:12 +0800 Subject: [PATCH 16/33] um: Add initial SMP support Add initial symmetric multi-processing (SMP) support to UML. With this support enabled, users can tell UML to start multiple virtual processors, each represented as a separate host thread. In UML, kthreads and normal threads (when running in kernel mode) can be scheduled and executed simultaneously on different virtual processors. However, the userspace code of normal threads still runs within their respective single-threaded stubs. That is, SMP support is currently available both within the kernel and across different processes, but still remains limited within threads of the same process in userspace. Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027001815.1666872-6-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- .../core/generic-idle-thread/arch-support.txt | 2 +- arch/um/Kconfig | 46 +++- arch/um/include/asm/current.h | 5 +- arch/um/include/asm/hardirq.h | 24 +- arch/um/include/asm/mmu.h | 10 + arch/um/include/asm/pgtable.h | 2 + arch/um/include/asm/smp.h | 15 +- arch/um/include/linux/smp-internal.h | 17 ++ arch/um/include/shared/os.h | 17 ++ arch/um/include/shared/skas/mm_id.h | 5 + arch/um/include/shared/skas/skas.h | 2 + arch/um/include/shared/smp.h | 20 ++ arch/um/kernel/Makefile | 1 + arch/um/kernel/irq.c | 25 ++ arch/um/kernel/process.c | 5 + arch/um/kernel/skas/mmu.c | 33 ++- arch/um/kernel/skas/process.c | 19 +- arch/um/kernel/smp.c | 242 ++++++++++++++++++ arch/um/kernel/tlb.c | 5 +- arch/um/kernel/trap.c | 2 +- arch/um/kernel/um_arch.c | 25 +- arch/um/os-Linux/Makefile | 4 +- arch/um/os-Linux/internal.h | 8 + arch/um/os-Linux/process.c | 20 ++ arch/um/os-Linux/signal.c | 31 ++- arch/um/os-Linux/skas/process.c | 39 ++- arch/um/os-Linux/smp.c | 148 +++++++++++ arch/um/os-Linux/start_up.c | 4 + arch/um/os-Linux/time.c | 38 ++- 29 files changed, 766 insertions(+), 48 deletions(-) create mode 100644 arch/um/include/linux/smp-internal.h create mode 100644 arch/um/include/shared/smp.h create mode 100644 arch/um/kernel/smp.c create mode 100644 arch/um/os-Linux/smp.c diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt index 0735cb5367b4..425442e31fa2 100644 --- a/Documentation/features/core/generic-idle-thread/arch-support.txt +++ b/Documentation/features/core/generic-idle-thread/arch-support.txt @@ -24,7 +24,7 @@ | s390: | ok | | sh: | ok | | sparc: | ok | - | um: | TODO | + | um: | ok | | x86: | ok | | xtensa: | ok | ----------------------- diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 0b4d00596a8c..097c6a6265ef 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -28,6 +28,7 @@ config UML select OF_EARLY_FLATTREE if OF select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES + select GENERIC_SMP_IDLE_THREAD select HAVE_GCC_PLUGINS select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN @@ -81,10 +82,48 @@ config HZ int default 100 -config NR_CPUS +config UML_SUBARCH_SUPPORTS_SMP + bool + +config SMP + bool "Symmetric multi-processing support" + default n + depends on UML_SUBARCH_SUPPORTS_SMP + help + This option enables UML SMP support. + + With this enabled, users can tell UML to start multiple virtual + processors. Each virtual processor is represented as a separate + host thread. + + In UML, kthreads and normal threads (when running in kernel mode) + can be scheduled and executed simultaneously on different virtual + processors. However, the userspace code of normal threads still + runs within their respective single-threaded stubs. + + That is, SMP support is available both within the kernel and + across different processes, but remains limited within threads + of the same process in userspace. + +config NR_CPUS_RANGE_BEGIN int - range 1 1 - default 1 + default 1 if !SMP + default 2 + +config NR_CPUS_RANGE_END + int + default 1 if !SMP + default 64 + +config NR_CPUS_DEFAULT + int + default 1 if !SMP + default 2 + +config NR_CPUS + int "Maximum number of CPUs" if SMP + range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END + default NR_CPUS_DEFAULT source "arch/$(HEADER_ARCH)/um/Kconfig" @@ -254,6 +293,7 @@ source "arch/um/drivers/Kconfig" config ARCH_SUSPEND_POSSIBLE def_bool y + depends on !SMP menu "Power management options" diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h index 8accc6d6f502..159a29b3d4cc 100644 --- a/arch/um/include/asm/current.h +++ b/arch/um/include/asm/current.h @@ -7,15 +7,16 @@ #ifndef __ASSEMBLER__ +#include + struct task_struct; extern struct task_struct *cpu_tasks[NR_CPUS]; static __always_inline struct task_struct *get_current(void) { - return cpu_tasks[0]; + return cpu_tasks[uml_curr_cpu()]; } - #define current get_current() #endif /* __ASSEMBLER__ */ diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h index 52e2c36267a9..8de71752a9b8 100644 --- a/arch/um/include/asm/hardirq.h +++ b/arch/um/include/asm/hardirq.h @@ -2,8 +2,30 @@ #ifndef __ASM_UM_HARDIRQ_H #define __ASM_UM_HARDIRQ_H -#include +#include +#include #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 +typedef struct { + unsigned int __softirq_pending; +#if IS_ENABLED(CONFIG_SMP) + unsigned int irq_resched_count; + unsigned int irq_call_count; +#endif +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); + +#define __ARCH_IRQ_STAT + +#define inc_irq_stat(member) this_cpu_inc(irq_stat.member) + +#include + +static inline void ack_bad_irq(unsigned int irq) +{ + pr_crit("unexpected IRQ trap at vector %02x\n", irq); +} + #endif /* __ASM_UM_HARDIRQ_H */ diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h index 4d0e4239f3cc..07d48738b402 100644 --- a/arch/um/include/asm/mmu.h +++ b/arch/um/include/asm/mmu.h @@ -7,16 +7,26 @@ #define __ARCH_UM_MMU_H #include "linux/types.h" +#include +#include #include typedef struct mm_context { struct mm_id id; + struct mutex turnstile; struct list_head list; /* Address range in need of a TLB sync */ + spinlock_t sync_tlb_lock; unsigned long sync_tlb_range_from; unsigned long sync_tlb_range_to; } mm_context_t; +#define INIT_MM_CONTEXT(mm) \ + .context = { \ + .turnstile = __MUTEX_INITIALIZER(mm.context.turnstile), \ + .sync_tlb_lock = __SPIN_LOCK_INITIALIZER(mm.context.sync_tlb_lock), \ + } + #endif diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 6ca7583003cd..1a0d7405e97c 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -225,6 +225,8 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start, unsigned long end) { + guard(spinlock_irqsave)(&mm->context.sync_tlb_lock); + if (!mm->context.sync_tlb_range_to) { mm->context.sync_tlb_range_from = start; mm->context.sync_tlb_range_to = end; diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h index a8cc1d46ddcb..be1743a6ff3c 100644 --- a/arch/um/include/asm/smp.h +++ b/arch/um/include/asm/smp.h @@ -2,6 +2,19 @@ #ifndef __UM_SMP_H #define __UM_SMP_H -#define hard_smp_processor_id() 0 +#if IS_ENABLED(CONFIG_SMP) + +#include +#include + +#define raw_smp_processor_id() uml_curr_cpu() + +void arch_smp_send_reschedule(int cpu); + +void arch_send_call_function_single_ipi(int cpu); + +void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +#endif /* CONFIG_SMP */ #endif diff --git a/arch/um/include/linux/smp-internal.h b/arch/um/include/linux/smp-internal.h new file mode 100644 index 000000000000..1dbcbc23f9c9 --- /dev/null +++ b/arch/um/include/linux/smp-internal.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SMP_INTERNAL_H +#define __UM_SMP_INTERNAL_H + +#if IS_ENABLED(CONFIG_SMP) + +void prefill_possible_map(void); + +#else /* !CONFIG_SMP */ + +static inline void prefill_possible_map(void) { } + +#endif /* CONFIG_SMP */ + +extern char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE); + +#endif /* __UM_SMP_INTERNAL_H */ diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 0ca6e4548671..b26e94292fc1 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -216,6 +216,9 @@ extern int can_drop_memory(void); void os_set_pdeathsig(void); +int os_futex_wait(void *uaddr, unsigned int val); +int os_futex_wake(void *uaddr); + /* execvp.c */ extern int execvp_noalloc(char *buf, const char *file, char *const argv[]); /* helper.c */ @@ -267,6 +270,7 @@ extern void os_warn(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); /* time.c */ +void os_idle_prepare(void); extern void os_idle_sleep(void); extern int os_timer_create(void); extern int os_timer_set_interval(int cpu, unsigned long long nsecs); @@ -339,4 +343,17 @@ extern void um_trace_signals_off(void); /* time-travel */ extern void deliver_time_travel_irqs(void); +/* smp.c */ +#if IS_ENABLED(CONFIG_SMP) +void os_init_smp(void); +int os_start_cpu_thread(int cpu); +void os_start_secondary(void *arg, jmp_buf *switch_buf); +int os_send_ipi(int cpu, int vector); +void os_local_ipi_enable(void); +void os_local_ipi_disable(void); +#else /* !CONFIG_SMP */ +static inline void os_local_ipi_enable(void) { } +static inline void os_local_ipi_disable(void) { } +#endif /* CONFIG_SMP */ + #endif diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index 4f977ef5dda5..fb96c0bd8222 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -6,6 +6,8 @@ #ifndef __MM_ID_H #define __MM_ID_H +#include + #define STUB_MAX_FDS 4 struct mm_id { @@ -19,6 +21,9 @@ struct mm_id { int syscall_fd_map[STUB_MAX_FDS]; }; +void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile); +void exit_turnstile(struct mm_id *mm_id) __releases(turnstile); + void notify_mm_kill(int pid); #endif diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h index 807514e10538..2237ffedec75 100644 --- a/arch/um/include/shared/skas/skas.h +++ b/arch/um/include/shared/skas/skas.h @@ -15,5 +15,7 @@ extern void handle_syscall(struct uml_pt_regs *regs); extern unsigned long current_stub_stack(void); extern struct mm_id *current_mm_id(void); extern void current_mm_sync(void); +void initial_jmpbuf_lock(void); +void initial_jmpbuf_unlock(void); #endif diff --git a/arch/um/include/shared/smp.h b/arch/um/include/shared/smp.h new file mode 100644 index 000000000000..06e3faa95091 --- /dev/null +++ b/arch/um/include/shared/smp.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __UM_SHARED_SMP_H +#define __UM_SHARED_SMP_H + +#if IS_ENABLED(CONFIG_SMP) + +extern int uml_ncpus; + +int uml_curr_cpu(void); +void uml_start_secondary(void *opaque); +void uml_ipi_handler(int vector); + +#else /* !CONFIG_SMP */ + +#define uml_ncpus 1 +#define uml_curr_cpu() 0 + +#endif /* CONFIG_SMP */ + +#endif /* __UM_SHARED_SMP_H */ diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index b8f4e9281599..be60bc451b3f 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_OF) += dtb.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_SMP) += smp.o USER_OBJS := config.o diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index e95f6c5a259d..f4b13f15a9c1 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -22,6 +22,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); + +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* When epoll triggers we do not know why it did so * we can also have different IRQs for read and write. @@ -701,3 +704,25 @@ void sigchld_handler(int sig, struct siginfo *unused_si, { do_IRQ(SIGCHLD_IRQ, regs); } + +/* + * /proc/interrupts printing for arch specific interrupts + */ +int arch_show_interrupts(struct seq_file *p, int prec) +{ +#if IS_ENABLED(CONFIG_SMP) + int cpu; + + seq_printf(p, "%*s: ", prec, "RES"); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count); + seq_puts(p, " Rescheduling interrupts\n"); + + seq_printf(p, "%*s: ", prec, "CAL"); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count); + seq_puts(p, " Function call interrupts\n"); +#endif + + return 0; +} diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 3b28048f269c..63b38a3f73f7 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -218,6 +218,11 @@ void arch_cpu_idle(void) um_idle_sleep(); } +void arch_cpu_idle_prepare(void) +{ + os_idle_prepare(); +} + int __uml_cant_sleep(void) { return in_atomic() || irqs_disabled() || in_interrupt(); /* Is in_interrupt() really needed? */ diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index afe9a2f251ef..00957788591b 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -23,17 +23,36 @@ static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE); static spinlock_t mm_list_lock; static struct list_head mm_list; +void enter_turnstile(struct mm_id *mm_id) __acquires(turnstile) +{ + struct mm_context *ctx = container_of(mm_id, struct mm_context, id); + + mutex_lock(&ctx->turnstile); +} + +void exit_turnstile(struct mm_id *mm_id) __releases(turnstile) +{ + struct mm_context *ctx = container_of(mm_id, struct mm_context, id); + + mutex_unlock(&ctx->turnstile); +} + int init_new_context(struct task_struct *task, struct mm_struct *mm) { struct mm_id *new_id = &mm->context.id; unsigned long stack = 0; int ret = -ENOMEM; + mutex_init(&mm->context.turnstile); + spin_lock_init(&mm->context.sync_tlb_lock); + stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES)); if (stack == 0) goto out; new_id->stack = stack; + new_id->syscall_data_len = 0; + new_id->syscall_fd_num = 0; scoped_guard(spinlock_irqsave, &mm_list_lock) { /* Insert into list, used for lookups when the child dies */ @@ -73,6 +92,9 @@ void destroy_context(struct mm_struct *mm) return; } + scoped_guard(spinlock_irqsave, &mm_list_lock) + list_del(&mm->context.list); + if (mmu->id.pid > 0) { os_kill_ptraced_process(mmu->id.pid, 1); mmu->id.pid = -1; @@ -82,10 +104,6 @@ void destroy_context(struct mm_struct *mm) os_close_file(mmu->id.sock); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); - - guard(spinlock_irqsave)(&mm_list_lock); - - list_del(&mm->context.list); } static irqreturn_t mm_sigchld_irq(int irq, void* dev) @@ -110,12 +128,11 @@ static irqreturn_t mm_sigchld_irq(int irq, void* dev) /* Marks the MM as dead */ mm_context->id.pid = -1; - /* - * NOTE: If SMP is implemented, a futex_wake - * needs to be added here. - */ stub_data = (void *)mm_context->id.stack; stub_data->futex = FUTEX_IN_KERN; +#if IS_ENABLED(CONFIG_SMP) + os_futex_wake(&stub_data->futex); +#endif /* * NOTE: Currently executing syscalls by diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 5881b17eb987..4a7673b0261a 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -26,12 +27,12 @@ static int __init start_kernel_proc(void *unused) return 0; } -static char cpu0_irqstack[THREAD_SIZE] __aligned(THREAD_SIZE); +char cpu_irqstacks[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE); int __init start_uml(void) { - stack_protections((unsigned long) &cpu0_irqstack); - set_sigstack(cpu0_irqstack, THREAD_SIZE); + stack_protections((unsigned long) &cpu_irqstacks[0]); + set_sigstack(cpu_irqstacks[0], THREAD_SIZE); init_new_thread_signals(); @@ -64,3 +65,15 @@ void current_mm_sync(void) um_tlb_sync(current->mm); } + +static DEFINE_SPINLOCK(initial_jmpbuf_spinlock); + +void initial_jmpbuf_lock(void) +{ + spin_lock_irq(&initial_jmpbuf_spinlock); +} + +void initial_jmpbuf_unlock(void) +{ + spin_unlock_irq(&initial_jmpbuf_spinlock); +} diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c new file mode 100644 index 000000000000..f1e52b7348fb --- /dev/null +++ b/arch/um/kernel/smp.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie + * + * Based on the previous implementation in TT mode + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + UML_IPI_RES = 0, + UML_IPI_CALL_SINGLE, + UML_IPI_CALL, + UML_IPI_STOP, +}; + +void arch_smp_send_reschedule(int cpu) +{ + os_send_ipi(cpu, UML_IPI_RES); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + os_send_ipi(cpu, UML_IPI_CALL_SINGLE); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + int cpu; + + for_each_cpu(cpu, mask) + os_send_ipi(cpu, UML_IPI_CALL); +} + +void smp_send_stop(void) +{ + int cpu, me = smp_processor_id(); + + for_each_online_cpu(cpu) { + if (cpu == me) + continue; + os_send_ipi(cpu, UML_IPI_STOP); + } +} + +static void ipi_handler(int vector, struct uml_pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); + int cpu = raw_smp_processor_id(); + + irq_enter(); + + if (current->mm) + os_alarm_process(current->mm->context.id.pid); + + switch (vector) { + case UML_IPI_RES: + inc_irq_stat(irq_resched_count); + scheduler_ipi(); + break; + + case UML_IPI_CALL_SINGLE: + inc_irq_stat(irq_call_count); + generic_smp_call_function_single_interrupt(); + break; + + case UML_IPI_CALL: + inc_irq_stat(irq_call_count); + generic_smp_call_function_interrupt(); + break; + + case UML_IPI_STOP: + set_cpu_online(cpu, false); + while (1) + pause(); + break; + + default: + pr_err("CPU#%d received unknown IPI (vector=%d)!\n", cpu, vector); + break; + } + + irq_exit(); + set_irq_regs(old_regs); +} + +void uml_ipi_handler(int vector) +{ + struct uml_pt_regs r = { .is_user = 0 }; + + preempt_disable(); + ipi_handler(vector, &r); + preempt_enable(); +} + +/* AP states used only during CPU startup */ +enum { + UML_CPU_PAUSED = 0, + UML_CPU_RUNNING, +}; + +static int cpu_states[NR_CPUS]; + +static int start_secondary(void *unused) +{ + int err, cpu = raw_smp_processor_id(); + + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); + + err = um_setup_timer(); + if (err) + panic("CPU#%d failed to setup timer, err = %d", cpu, err); + + local_irq_enable(); + + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + + return 0; +} + +void uml_start_secondary(void *opaque) +{ + int cpu = raw_smp_processor_id(); + struct mm_struct *mm = &init_mm; + struct task_struct *idle; + + stack_protections((unsigned long) &cpu_irqstacks[cpu]); + set_sigstack(&cpu_irqstacks[cpu], THREAD_SIZE); + + set_cpu_present(cpu, true); + os_futex_wait(&cpu_states[cpu], UML_CPU_PAUSED); + + smp_rmb(); /* paired with smp_wmb() in __cpu_up() */ + + idle = cpu_tasks[cpu]; + idle->thread_info.cpu = cpu; + + mmgrab(mm); + idle->active_mm = mm; + + idle->thread.request.thread.proc = start_secondary; + idle->thread.request.thread.arg = NULL; + + new_thread(task_stack_page(idle), &idle->thread.switch_buf, + new_thread_handler); + os_start_secondary(opaque, &idle->thread.switch_buf); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int err, cpu, me = smp_processor_id(); + unsigned long deadline; + + os_init_smp(); + + for_each_possible_cpu(cpu) { + if (cpu == me) + continue; + + pr_debug("Booting processor %d...\n", cpu); + err = os_start_cpu_thread(cpu); + if (err) { + pr_crit("CPU#%d failed to start cpu thread, err = %d", + cpu, err); + continue; + } + + deadline = jiffies + msecs_to_jiffies(1000); + spin_until_cond(cpu_present(cpu) || + time_is_before_jiffies(deadline)); + + if (!cpu_present(cpu)) + pr_crit("CPU#%d failed to boot\n", cpu); + } +} + +int __cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + cpu_tasks[cpu] = tidle; + smp_wmb(); /* paired with smp_rmb() in uml_start_secondary() */ + cpu_states[cpu] = UML_CPU_RUNNING; + os_futex_wake(&cpu_states[cpu]); + spin_until_cond(cpu_online(cpu)); + + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +/* Set in uml_ncpus_setup */ +int uml_ncpus = 1; + +void __init prefill_possible_map(void) +{ + int cpu; + + for (cpu = 0; cpu < uml_ncpus; cpu++) + set_cpu_possible(cpu, true); + for (; cpu < NR_CPUS; cpu++) + set_cpu_possible(cpu, false); +} + +static int __init uml_ncpus_setup(char *line, int *add) +{ + *add = 0; + + if (kstrtoint(line, 10, ¨_ncpus)) { + os_warn("%s: Couldn't parse '%s'\n", __func__, line); + return -1; + } + + uml_ncpus = clamp(uml_ncpus, 1, NR_CPUS); + + return 0; +} + +__uml_setup("ncpus=", uml_ncpus_setup, +"ncpus=<# of desired CPUs>\n" +" This tells UML how many virtual processors to start. The maximum\n" +" number of supported virtual processors can be obtained by querying\n" +" the CONFIG_NR_CPUS option using --showconfig.\n\n" +); + +EXPORT_SYMBOL(uml_curr_cpu); diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index cf7e0d4407f2..39608cccf2c6 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -162,9 +162,11 @@ int um_tlb_sync(struct mm_struct *mm) { pgd_t *pgd; struct vm_ops ops; - unsigned long addr = mm->context.sync_tlb_range_from, next; + unsigned long addr, next; int ret = 0; + guard(spinlock_irqsave)(&mm->context.sync_tlb_lock); + if (mm->context.sync_tlb_range_to == 0) return 0; @@ -177,6 +179,7 @@ int um_tlb_sync(struct mm_struct *mm) ops.unmap = unmap; } + addr = mm->context.sync_tlb_range_from; pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, mm->context.sync_tlb_range_to); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 5b80a3a89c20..177615820a4c 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -316,7 +316,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (!is_user && regs) current->thread.segv_regs = container_of(regs, struct pt_regs, regs); - if (!is_user && init_mm.context.sync_tlb_range_to) { + if (!is_user && address >= start_vm && address < end_vm) { /* * Kernel has pending updates from set_ptes that were not * flushed yet. Syncing them should fix the pagefault (if not diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index cf06bb732ed8..e2b24e1ecfa6 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -71,6 +72,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) { int i = 0; +#if IS_ENABLED(CONFIG_SMP) + i = (uintptr_t) v - 1; + if (!cpu_online(i)) + return 0; +#endif + seq_printf(m, "processor\t: %d\n", i); seq_printf(m, "vendor_id\t: User Mode Linux\n"); seq_printf(m, "model name\t: UML\n"); @@ -87,13 +94,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); - return 0; } static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL; + if (*pos < nr_cpu_ids) + return (void *)(uintptr_t)(*pos + 1); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) @@ -409,6 +417,7 @@ void __init setup_arch(char **cmdline_p) strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; setup_hostinfo(host_info, sizeof host_info); + prefill_possible_map(); if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) { add_bootloader_randomness(rng_seed, sizeof(rng_seed)); @@ -443,6 +452,18 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } +#if IS_ENABLED(CONFIG_SMP) +void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) +{ +} + +void alternatives_smp_module_del(struct module *mod) +{ +} +#endif + void *text_poke(void *addr, const void *opcode, size_t len) { /* diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index fae836713487..70c73c22f715 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -16,8 +16,10 @@ CFLAGS_main.o += -Wno-frame-larger-than obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o +obj-$(CONFIG_SMP) += smp.o + USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ - tty.o umid.o util.o + tty.o umid.o util.o smp.o include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h index c2c7a0dc673c..bac9fcc8c14c 100644 --- a/arch/um/os-Linux/internal.h +++ b/arch/um/os-Linux/internal.h @@ -4,6 +4,7 @@ #include #include +#include /* * elf_aux.c @@ -18,6 +19,7 @@ void check_tmpexec(void); /* * signal.c */ +extern __thread int signals_enabled; int timer_alarm_pending(void); /* @@ -25,4 +27,10 @@ int timer_alarm_pending(void); */ void wait_stub_done(int pid); void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); + +/* + * smp.c + */ +#define IPI_SIGNAL SIGRTMIN + #endif /* __UM_OS_LINUX_INTERNAL_H */ diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 00b49e90d05f..3a2a84ab9325 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -189,3 +191,21 @@ void os_set_pdeathsig(void) { prctl(PR_SET_PDEATHSIG, SIGKILL); } + +int os_futex_wait(void *uaddr, unsigned int val) +{ + int r; + + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAIT, val, + NULL, NULL, 0)); + return r < 0 ? -errno : r; +} + +int os_futex_wake(void *uaddr) +{ + int r; + + CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, INT_MAX, + NULL, NULL, 0)); + return r < 0 ? -errno : r; +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 554a87dd32cc..327fb3c52fc7 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -69,7 +69,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGCHLD_BIT 2 #define SIGCHLD_MASK (1 << SIGCHLD_BIT) -static __thread int signals_enabled; +__thread int signals_enabled; #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; #endif @@ -259,9 +259,29 @@ int change_sig(int signal, int on) return 0; } +static inline void __block_signals(void) +{ + if (!signals_enabled) + return; + + os_local_ipi_disable(); + barrier(); + signals_enabled = 0; +} + +static inline void __unblock_signals(void) +{ + if (signals_enabled) + return; + + signals_enabled = 1; + barrier(); + os_local_ipi_enable(); +} + void block_signals(void) { - signals_enabled = 0; + __block_signals(); /* * This must return with signals disabled, so this barrier * ensures that writes are flushed out before the return. @@ -278,7 +298,8 @@ void unblock_signals(void) if (signals_enabled == 1) return; - signals_enabled = 1; + __unblock_signals(); + #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) deliver_time_travel_irqs(); #endif @@ -312,7 +333,7 @@ void unblock_signals(void) * tracing that happens inside the handlers we call for the * pending signals will mess up the tracing state. */ - signals_enabled = 0; + __block_signals(); um_trace_signals_off(); /* @@ -344,7 +365,7 @@ void unblock_signals(void) /* Re-enable signals and trace that we're doing so. */ um_trace_signals_on(); - signals_enabled = 1; + __unblock_signals(); } } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 820846ff7179..d6c22f8aa06d 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -546,7 +546,7 @@ extern unsigned long tt_extra_sched_jiffies; void userspace(struct uml_pt_regs *regs) { int err, status, op; - siginfo_t si_ptrace; + siginfo_t si_local; siginfo_t *si; int sig; @@ -556,6 +556,13 @@ void userspace(struct uml_pt_regs *regs) while (1) { struct mm_id *mm_id = current_mm_id(); + /* + * At any given time, only one CPU thread can enter the + * turnstile to operate on the same stub process, including + * executing stub system calls (mmap and munmap). + */ + enter_turnstile(mm_id); + /* * When we are in time-travel mode, userspace can theoretically * do a *lot* of work without being scheduled. The problem with @@ -623,9 +630,10 @@ void userspace(struct uml_pt_regs *regs) } if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) - panic("%s - Invalid siginfo offset from child", - __func__); - si = (void *)&proc_data->sigstack[proc_data->si_offset]; + panic("%s - Invalid siginfo offset from child", __func__); + + si = &si_local; + memcpy(si, &proc_data->sigstack[proc_data->si_offset], sizeof(*si)); regs->is_user = 1; @@ -721,8 +729,8 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: ptrace(PTRACE_GETSIGINFO, pid, 0, - (struct siginfo *)&si_ptrace); - si = &si_ptrace; + (struct siginfo *)&si_local); + si = &si_local; break; default: si = NULL; @@ -733,6 +741,8 @@ void userspace(struct uml_pt_regs *regs) } } + exit_turnstile(mm_id); + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ if (sig) { @@ -802,10 +812,9 @@ void switch_threads(jmp_buf *me, jmp_buf *you) static jmp_buf initial_jmpbuf; -/* XXX Make these percpu */ -static void (*cb_proc)(void *arg); -static void *cb_arg; -static jmp_buf *cb_back; +static __thread void (*cb_proc)(void *arg); +static __thread void *cb_arg; +static __thread jmp_buf *cb_back; int start_idle_thread(void *stack, jmp_buf *switch_buf) { @@ -859,10 +868,10 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) cb_arg = arg; cb_back = &here; - block_signals_trace(); + initial_jmpbuf_lock(); if (UML_SETJMP(&here) == 0) UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); - unblock_signals_trace(); + initial_jmpbuf_unlock(); cb_proc = NULL; cb_arg = NULL; @@ -871,8 +880,9 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg) void halt_skas(void) { - block_signals_trace(); + initial_jmpbuf_lock(); UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); + /* unreachable */ } static bool noreboot; @@ -892,6 +902,7 @@ __uml_setup("noreboot", noreboot_cmd_param, void reboot_skas(void) { - block_signals_trace(); + initial_jmpbuf_lock(); UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); + /* unreachable */ } diff --git a/arch/um/os-Linux/smp.c b/arch/um/os-Linux/smp.c new file mode 100644 index 000000000000..18d3858a7cd2 --- /dev/null +++ b/arch/um/os-Linux/smp.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2025 Ant Group + * Author: Tiwei Bie + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +struct cpu_thread_data { + int cpu; + sigset_t sigset; +}; + +static __thread int __curr_cpu; + +int uml_curr_cpu(void) +{ + return __curr_cpu; +} + +static pthread_t cpu_threads[CONFIG_NR_CPUS]; + +static void *cpu_thread(void *arg) +{ + struct cpu_thread_data *data = arg; + + __curr_cpu = data->cpu; + + uml_start_secondary(data); + + return NULL; +} + +int os_start_cpu_thread(int cpu) +{ + struct cpu_thread_data *data; + sigset_t sigset, oset; + int err; + + data = uml_kmalloc(sizeof(*data), UM_GFP_ATOMIC); + if (!data) + return -ENOMEM; + + sigfillset(&sigset); + if (sigprocmask(SIG_SETMASK, &sigset, &oset) < 0) { + err = errno; + goto err; + } + + data->cpu = cpu; + data->sigset = oset; + + err = pthread_create(&cpu_threads[cpu], NULL, cpu_thread, data); + if (sigprocmask(SIG_SETMASK, &oset, NULL) < 0) + panic("Failed to restore the signal mask, errno = %d", errno); + if (err != 0) + goto err; + + return 0; + +err: + kfree(data); + return -err; +} + +void os_start_secondary(void *arg, jmp_buf *switch_buf) +{ + struct cpu_thread_data *data = arg; + + sigaddset(&data->sigset, IPI_SIGNAL); + sigaddset(&data->sigset, SIGIO); + + if (sigprocmask(SIG_SETMASK, &data->sigset, NULL) < 0) + panic("Failed to restore the signal mask, errno = %d", errno); + + kfree(data); + longjmp(*switch_buf, 1); + + /* unreachable */ + printk(UM_KERN_ERR "impossible long jump!"); + fatal_sigsegv(); +} + +int os_send_ipi(int cpu, int vector) +{ + union sigval value = { .sival_int = vector }; + + return pthread_sigqueue(cpu_threads[cpu], IPI_SIGNAL, value); +} + +static void __local_ipi_set(int enable) +{ + sigset_t sigset; + + sigemptyset(&sigset); + sigaddset(&sigset, IPI_SIGNAL); + + if (sigprocmask(enable ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + panic("%s: sigprocmask failed, errno = %d", __func__, errno); +} + +void os_local_ipi_enable(void) +{ + __local_ipi_set(1); +} + +void os_local_ipi_disable(void) +{ + __local_ipi_set(0); +} + +static void ipi_sig_handler(int sig, siginfo_t *si, void *uc) +{ + int save_errno = errno; + + signals_enabled = 0; + um_trace_signals_off(); + + uml_ipi_handler(si->si_value.sival_int); + + um_trace_signals_on(); + signals_enabled = 1; + + errno = save_errno; +} + +void __init os_init_smp(void) +{ + struct sigaction action = { + .sa_sigaction = ipi_sig_handler, + .sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART, + }; + + sigfillset(&action.sa_mask); + + if (sigaction(IPI_SIGNAL, &action, NULL) < 0) + panic("%s: sigaction failed, errno = %d", __func__, errno); + + cpu_threads[0] = pthread_self(); +} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 8b19dca83f71..054ac03bbf5e 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -481,6 +482,9 @@ void __init os_early_checks(void) fatal("SECCOMP userspace requested but not functional!\n"); } + if (uml_ncpus > 1) + fatal("SMP is not supported with PTRACE userspace.\n"); + using_seccomp = 0; check_ptrace(); diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index e0197bfe4ac9..13ebc86918d4 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -11,9 +11,11 @@ #include #include #include +#include #include #include #include +#include #include #include "internal.h" @@ -41,7 +43,8 @@ long long os_persistent_clock_emulation(void) */ int os_timer_create(void) { - timer_t *t = &event_high_res_timer[0]; + int cpu = uml_curr_cpu(); + timer_t *t = &event_high_res_timer[cpu]; struct sigevent sev = { .sigev_notify = SIGEV_THREAD_ID, .sigev_signo = SIGALRM, @@ -105,24 +108,49 @@ long long os_nsecs(void) return timespec_to_ns(&ts); } +static __thread int wake_signals; + +void os_idle_prepare(void) +{ + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigaddset(&set, IPI_SIGNAL); + + /* + * We need to use signalfd rather than sigsuspend in idle sleep + * because the IPI signal is a real-time signal that carries data, + * and unlike handling SIGALRM, we cannot simply flag it in + * signals_pending. + */ + wake_signals = signalfd(-1, &set, SFD_CLOEXEC); + if (wake_signals < 0) + panic("Failed to create signal FD, errno = %d", errno); +} + /** * os_idle_sleep() - sleep until interrupted */ void os_idle_sleep(void) { - sigset_t set, old; + sigset_t set; - /* Block SIGALRM while performing the need_resched check. */ + /* + * Block SIGALRM while performing the need_resched check. + * Note that, because IRQs are disabled, the IPI signal is + * already blocked. + */ sigemptyset(&set); sigaddset(&set, SIGALRM); - sigprocmask(SIG_BLOCK, &set, &old); + sigprocmask(SIG_BLOCK, &set, NULL); /* * Because disabling IRQs does not block SIGALRM, it is also * necessary to check for any pending timer alarms. */ if (!uml_need_resched() && !timer_alarm_pending()) - sigsuspend(&old); + os_poll(1, &wake_signals); /* Restore the signal mask. */ sigprocmask(SIG_UNBLOCK, &set, NULL); From 37f847b7949605e7803d72d6275beeaaf60099e6 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 08:18:13 +0800 Subject: [PATCH 17/33] um: vdso: Remove getcpu support on x86 We are going to support SMP on UML/x86, so we can't hard code the CPU and NUMA node in __vdso_getcpu() anymore. Let's just remove it and let applications fall back to the syscall. Suggested-by: Johannes Berg Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027001815.1666872-7-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/x86/um/vdso/um_vdso.c | 22 ---------------------- arch/x86/um/vdso/vdso.lds.S | 2 -- 2 files changed, 24 deletions(-) diff --git a/arch/x86/um/vdso/um_vdso.c b/arch/x86/um/vdso/um_vdso.c index 02d41fdb5655..ca1468865b14 100644 --- a/arch/x86/um/vdso/um_vdso.c +++ b/arch/x86/um/vdso/um_vdso.c @@ -11,12 +11,8 @@ #include #include -#include #include -/* workaround for -Wmissing-prototypes warnings */ -long __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused); - int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { long ret; @@ -56,21 +52,3 @@ __kernel_old_time_t __vdso_time(__kernel_old_time_t *t) return secs; } __kernel_old_time_t time(__kernel_old_time_t *t) __attribute__((weak, alias("__vdso_time"))); - -long -__vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) -{ - /* - * UML does not support SMP, we can cheat here. :) - */ - - if (cpu) - *cpu = 0; - if (node) - *node = 0; - - return 0; -} - -long getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *tcache) - __attribute__((weak, alias("__vdso_getcpu"))); diff --git a/arch/x86/um/vdso/vdso.lds.S b/arch/x86/um/vdso/vdso.lds.S index 73c508587a98..401600effc0a 100644 --- a/arch/x86/um/vdso/vdso.lds.S +++ b/arch/x86/um/vdso/vdso.lds.S @@ -22,8 +22,6 @@ VERSION { __vdso_clock_gettime; gettimeofday; __vdso_gettimeofday; - getcpu; - __vdso_getcpu; time; __vdso_time; local: *; From 8d748955279cfe1996e51ac51a4f746468614a10 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 08:18:14 +0800 Subject: [PATCH 18/33] asm-generic: percpu: Add assembly guard Currently, asm/percpu.h is directly or indirectly included by some assembly files on x86. Some of them (e.g., checksum_32.S) are also used on um. But x86 and um provide different versions of asm/percpu.h -- um uses asm-generic/percpu.h directly. When SMP is enabled, asm-generic/percpu.h will introduce C code that cannot be assembled. Since asm-generic/percpu.h currently is not designed for use in assembly, and these assembly files do not actually need asm/percpu.h on um, let's add the assembly guard in asm-generic/percpu.h to fix this issue. Cc: Arnd Bergmann Cc: linux-arch@vger.kernel.org Signed-off-by: Tiwei Bie Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20251027001815.1666872-8-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- include/asm-generic/percpu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 02aeca21479a..6628670bcb90 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -2,6 +2,8 @@ #ifndef _ASM_GENERIC_PERCPU_H_ #define _ASM_GENERIC_PERCPU_H_ +#ifndef __ASSEMBLER__ + #include #include #include @@ -557,4 +559,5 @@ do { \ this_cpu_generic_cmpxchg(pcp, oval, nval) #endif +#endif /* __ASSEMBLER__ */ #endif /* _ASM_GENERIC_PERCPU_H_ */ From aa3e6faf620b1091ae85b07c0106918d48fcce05 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Mon, 27 Oct 2025 08:18:15 +0800 Subject: [PATCH 19/33] um: Enable SMP support on x86 Implement spinlock support for SMP on UML/x86, leveraging x86's spinlock implementation. In addition, to support SMP on CPUs that do not support CX8, some additional work is required. However, considering that such CPUs are already very outdated, and x86 is also removing support for them [1], let's enable SMP support only on CPUs that support CX8. [1] https://lore.kernel.org/lkml/20250515085708.2510123-1-mingo@kernel.org/ Signed-off-by: Tiwei Bie Link: https://patch.msgid.link/20251027001815.1666872-9-tiwei.bie@linux.dev Signed-off-by: Johannes Berg --- arch/x86/um/Kconfig | 3 +++ arch/x86/um/asm/spinlock.h | 8 ++++++++ 2 files changed, 11 insertions(+) create mode 100644 arch/x86/um/asm/spinlock.h diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 986045d5e638..c52fb5cb8d21 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -9,8 +9,11 @@ endmenu config UML_X86 def_bool y select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS select DCACHE_WORD_ACCESS select HAVE_EFFICIENT_UNALIGNED_ACCESS + select UML_SUBARCH_SUPPORTS_SMP if X86_CX8 config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/arch/x86/um/asm/spinlock.h b/arch/x86/um/asm/spinlock.h new file mode 100644 index 000000000000..20fc77514214 --- /dev/null +++ b/arch/x86/um/asm/spinlock.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_UM_SPINLOCK_H +#define __ASM_UM_SPINLOCK_H + +#include +#include + +#endif /* __ASM_UM_SPINLOCK_H */ From 8e03c195cc4d82100291500f772f85c686653748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:02:55 +0100 Subject: [PATCH 20/33] um: Avoid circular dependency on asm-offsets in pgtable.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent changes have added an include of as-layout.h to pgtable.h. However this introduces a circular dependency during asm-offsets generation as as-layout.h depends on asm-offsets and pgtable.h is an input for asm-offsets. Building from a clean state results in the following error: CC arch/um/kernel/asm-offsets.s In file included from arch/um/include/asm/pgtable.h:48, from include/linux/pgtable.h:6, from include/linux/mm.h:31, from include/linux/pid_namespace.h:7, from include/linux/ptrace.h:10, from include/linux/audit.h:13, from arch/um/kernel/asm-offsets.c:8: arch/um/include/shared/as-layout.h:9:10: fatal error: generated/asm-offsets.h: No such file or directory 9 | #include | ^~~~~~~~~~~~~~~~~~~~~~~~~ compilation terminated. make[4]: *** [scripts/Makefile.build:182: arch/um/kernel/asm-offsets.s] Error 1 As the inclusion of as-layout.h in pgtable.h is not yet needed while asm-offsets are generated, break the dependency here. Fixes: a7f7dbae94a5 ("um: Remove file-based iomem emulation support") Signed-off-by: Thomas Weißschuh Reviewed-by: Tiwei Bie Link: https://patch.msgid.link/20251028-uml-offsets-circular-v1-1-601c363cfaaa@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 1a0d7405e97c..3b42b0f45bf6 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -45,7 +45,9 @@ extern unsigned long *empty_zero_page; * area for the same reason. ;) */ +#ifndef COMPILE_OFFSETS #include /* for high_physmem */ +#endif #define VMALLOC_OFFSET (__va_space) #define VMALLOC_START ((high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) From 74d438872dae44abfcfffad4daccd7f22cdf7bad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:36 +0100 Subject: [PATCH 21/33] um: Split out default elf_aux_platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting all auxiliary vector values to default values if one of them was not provided by the host will discard perfectly fine values. Move the elf_aux_platform fallback to its own conditional. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-1-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/os-Linux/elf_aux.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 0a0f91cf4d6d..a62fe39e85c9 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -62,14 +62,16 @@ __init void scan_elf_aux( char **envp) } } if ( ! __kernel_vsyscall || ! vsyscall_ehdr || - ! elf_aux_hwcap || ! elf_aux_platform || + ! elf_aux_hwcap || ! page_size || (vsyscall_ehdr % page_size) ) { __kernel_vsyscall = 0; vsyscall_ehdr = 0; elf_aux_hwcap = 0; - elf_aux_platform = "i586"; } else { vsyscall_end = vsyscall_ehdr + page_size; } + + if (!elf_aux_platform) + elf_aux_platform = "i586"; } From c1b077515116dc7916dbf72d8803a682c5989aa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:37 +0100 Subject: [PATCH 22/33] x86/um: Move ELF_PLATFORM fallback to x86-specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic UM code should not have references to x86-specific value. Move the fallback into the x86-specific header. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-2-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/os-Linux/elf_aux.c | 3 --- arch/x86/um/asm/elf.h | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index a62fe39e85c9..4aadb9ea5ae3 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -71,7 +71,4 @@ __init void scan_elf_aux( char **envp) else { vsyscall_end = vsyscall_ehdr + page_size; } - - if (!elf_aux_platform) - elf_aux_platform = "i586"; } diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 62ed5d68a978..e7a045e01471 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -69,7 +69,8 @@ } while (0); extern char * elf_aux_platform; -#define ELF_PLATFORM (elf_aux_platform) +#define ELF_PLATFORM_FALLBACK "i586" +#define ELF_PLATFORM (elf_aux_platform ?: ELF_PLATFORM_FALLBACK) extern unsigned long vsyscall_ehdr; extern unsigned long vsyscall_end; From 83b4b44a2b05330d13a4432caae0b036f9621ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:38 +0100 Subject: [PATCH 23/33] um: Split out default elf_aux_hwcap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting all auxiliary vector values to default values if one of them was not provided by the host will discard perfectly fine values. Remove the elf_aux_platform fallback from the vDSO ones. As zero is the correct fallback anyways, don't create a new conditional. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-3-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/os-Linux/elf_aux.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 4aadb9ea5ae3..9ee0e3199790 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -62,11 +62,9 @@ __init void scan_elf_aux( char **envp) } } if ( ! __kernel_vsyscall || ! vsyscall_ehdr || - ! elf_aux_hwcap || ! page_size || (vsyscall_ehdr % page_size) ) { __kernel_vsyscall = 0; vsyscall_ehdr = 0; - elf_aux_hwcap = 0; } else { vsyscall_end = vsyscall_ehdr + page_size; From 70d52694b6a67ace517da44ce4071594fcccd1ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:39 +0100 Subject: [PATCH 24/33] x86/um: Do not inherit vDSO from host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inheriting the vDSO from the host is problematic. The values read from the time functions will not be correct for the UML kernel. Furthermore the start and end of the vDSO are not stable or detectable by userspace. Specifically the vDSO datapages start before AT_SYSINFO_EHDR and the vDSO itself is larger than a single page. This codepath is only used on 32bit x86 UML. In my testing with both 32bit and 64bit hosts the passthrough functionality has always been disabled anyways due to the checks against envp in scan_elf_aux(). Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-4-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/os-Linux/elf_aux.c | 27 ------------- arch/um/os-Linux/user_syms.c | 6 --- arch/x86/um/Kconfig | 1 - arch/x86/um/Makefile | 1 - arch/x86/um/asm/elf.h | 29 ++------------ arch/x86/um/elfcore.c | 78 ------------------------------------ 6 files changed, 3 insertions(+), 139 deletions(-) delete mode 100644 arch/x86/um/elfcore.c diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 9ee0e3199790..f8927a5959d8 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -20,31 +20,15 @@ typedef Elf32_auxv_t elf_auxv_t; /* These are initialized very early in boot and never changed */ char * elf_aux_platform; extern long elf_aux_hwcap; -unsigned long vsyscall_ehdr; -unsigned long vsyscall_end; -unsigned long __kernel_vsyscall; __init void scan_elf_aux( char **envp) { - long page_size = 0; elf_auxv_t * auxv; while ( *envp++ != NULL) ; for ( auxv = (elf_auxv_t *)envp; auxv->a_type != AT_NULL; auxv++) { switch ( auxv->a_type ) { - case AT_SYSINFO: - __kernel_vsyscall = auxv->a_un.a_val; - /* See if the page is under TASK_SIZE */ - if (__kernel_vsyscall < (unsigned long) envp) - __kernel_vsyscall = 0; - break; - case AT_SYSINFO_EHDR: - vsyscall_ehdr = auxv->a_un.a_val; - /* See if the page is under TASK_SIZE */ - if (vsyscall_ehdr < (unsigned long) envp) - vsyscall_ehdr = 0; - break; case AT_HWCAP: elf_aux_hwcap = auxv->a_un.a_val; break; @@ -56,17 +40,6 @@ __init void scan_elf_aux( char **envp) elf_aux_platform = (char *) (long) auxv->a_un.a_val; break; - case AT_PAGESZ: - page_size = auxv->a_un.a_val; - break; } } - if ( ! __kernel_vsyscall || ! vsyscall_ehdr || - ! page_size || (vsyscall_ehdr % page_size) ) { - __kernel_vsyscall = 0; - vsyscall_ehdr = 0; - } - else { - vsyscall_end = vsyscall_ehdr + page_size; - } } diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c index a310ae27b479..67f6112318b6 100644 --- a/arch/um/os-Linux/user_syms.c +++ b/arch/um/os-Linux/user_syms.c @@ -31,12 +31,6 @@ extern void *memset(void *, int, size_t); EXPORT_SYMBOL(memset); #endif -#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA -/* needed for __access_ok() */ -EXPORT_SYMBOL(vsyscall_ehdr); -EXPORT_SYMBOL(vsyscall_end); -#endif - #ifdef _FORTIFY_SOURCE extern int __sprintf_chk(char *str, int flag, size_t len, const char *format); EXPORT_SYMBOL(__sprintf_chk); diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index c52fb5cb8d21..798c6cc53e82 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,7 +8,6 @@ endmenu config UML_X86 def_bool y - select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select DCACHE_WORD_ACCESS diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index b42c31cd2390..1767e6061b4d 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -18,7 +18,6 @@ obj-y = bugs_$(BITS).o delay.o fault.o \ ifeq ($(CONFIG_X86_32),y) obj-y += syscalls_32.o -obj-$(CONFIG_ELF_CORE) += elfcore.o subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o subarch-y += ../lib/cmpxchg8b_emu.o ../lib/atomic64_386_32.o diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index e7a045e01471..8d7df4684c38 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -72,32 +72,9 @@ extern char * elf_aux_platform; #define ELF_PLATFORM_FALLBACK "i586" #define ELF_PLATFORM (elf_aux_platform ?: ELF_PLATFORM_FALLBACK) -extern unsigned long vsyscall_ehdr; -extern unsigned long vsyscall_end; -extern unsigned long __kernel_vsyscall; - -/* - * This is the range that is readable by user mode, and things - * acting like user mode such as get_user_pages. - */ -#define FIXADDR_USER_START vsyscall_ehdr -#define FIXADDR_USER_END vsyscall_end - - -/* - * Architecture-neutral AT_ values in 0-17, leave some room - * for more of them, start the x86-specific ones at 32. - */ -#define AT_SYSINFO 32 -#define AT_SYSINFO_EHDR 33 - -#define ARCH_DLINFO \ -do { \ - if ( vsyscall_ehdr ) { \ - NEW_AUX_ENT(AT_SYSINFO, __kernel_vsyscall); \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, vsyscall_ehdr); \ - } \ -} while (0) +/* No user-accessible fixmap addresses, i.e. vsyscall */ +#define FIXADDR_USER_START 0 +#define FIXADDR_USER_END 0 #else diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c deleted file mode 100644 index ef50662fc40d..000000000000 --- a/arch/x86/um/elfcore.c +++ /dev/null @@ -1,78 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include - -#include - - -Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm) -{ - return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; -} - -int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) -{ - if ( vsyscall_ehdr ) { - const struct elfhdr *const ehdrp = - (struct elfhdr *) vsyscall_ehdr; - const struct elf_phdr *const phdrp = - (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); - int i; - Elf32_Off ofs = 0; - - for (i = 0; i < ehdrp->e_phnum; ++i) { - struct elf_phdr phdr = phdrp[i]; - - if (phdr.p_type == PT_LOAD) { - ofs = phdr.p_offset = offset; - offset += phdr.p_filesz; - } else { - phdr.p_offset += ofs; - } - phdr.p_paddr = 0; /* match other core phdrs */ - if (!dump_emit(cprm, &phdr, sizeof(phdr))) - return 0; - } - } - return 1; -} - -int elf_core_write_extra_data(struct coredump_params *cprm) -{ - if ( vsyscall_ehdr ) { - const struct elfhdr *const ehdrp = - (struct elfhdr *) vsyscall_ehdr; - const struct elf_phdr *const phdrp = - (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); - int i; - - for (i = 0; i < ehdrp->e_phnum; ++i) { - if (phdrp[i].p_type == PT_LOAD) { - void *addr = (void *) phdrp[i].p_vaddr; - size_t filesz = phdrp[i].p_filesz; - if (!dump_emit(cprm, addr, filesz)) - return 0; - } - } - } - return 1; -} - -size_t elf_core_extra_data_size(struct coredump_params *cprm) -{ - if ( vsyscall_ehdr ) { - const struct elfhdr *const ehdrp = - (struct elfhdr *)vsyscall_ehdr; - const struct elf_phdr *const phdrp = - (const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff); - int i; - - for (i = 0; i < ehdrp->e_phnum; ++i) - if (phdrp[i].p_type == PT_LOAD) - return (size_t) phdrp[i].p_filesz; - } - return 0; -} From dbd7cf408ab74abb62ae483a81094abb45c9111b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:40 +0100 Subject: [PATCH 25/33] x86/um: Drop gate area handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the removal of the vDSO passthrough from the host, FIXADDR_USER_START is always 0 and the gate area setup code is dead. Remove it. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-5-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/include/asm/page.h | 4 --- arch/x86/um/Makefile | 4 +-- arch/x86/um/mem_32.c | 50 -------------------------------------- 3 files changed, 2 insertions(+), 56 deletions(-) delete mode 100644 arch/x86/um/mem_32.c diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 6f54254aaf44..2d363460d896 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -96,8 +96,4 @@ extern unsigned long uml_physmem; #endif /* __ASSEMBLER__ */ -#ifdef CONFIG_X86_32 -#define __HAVE_ARCH_GATE_AREA 1 -#endif - #endif /* __UM_PAGE_H */ diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 1767e6061b4d..f9ea75bf43ac 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -13,7 +13,7 @@ obj-y = bugs_$(BITS).o delay.o fault.o \ ptrace.o ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ stub_segv.o \ sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ - mem_$(BITS).o subarch.o os-Linux/ + subarch.o os-Linux/ ifeq ($(CONFIG_X86_32),y) @@ -26,7 +26,7 @@ subarch-y += ../kernel/sys_ia32.o else -obj-y += syscalls_64.o vdso/ +obj-y += mem_64.o syscalls_64.o vdso/ subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \ ../lib/memmove_64.o ../lib/memset_64.o diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c deleted file mode 100644 index 29b2203bc82c..000000000000 --- a/arch/x86/um/mem_32.c +++ /dev/null @@ -1,50 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2011 Richard Weinberger - */ - -#include -#include - -static struct vm_area_struct gate_vma; - -static int __init gate_vma_init(void) -{ - if (!FIXADDR_USER_START) - return 0; - - vma_init(&gate_vma, NULL); - gate_vma.vm_start = FIXADDR_USER_START; - gate_vma.vm_end = FIXADDR_USER_END; - vm_flags_init(&gate_vma, VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC); - gate_vma.vm_page_prot = PAGE_READONLY; - - return 0; -} -__initcall(gate_vma_init); - -struct vm_area_struct *get_gate_vma(struct mm_struct *mm) -{ - return FIXADDR_USER_START ? &gate_vma : NULL; -} - -int in_gate_area_no_mm(unsigned long addr) -{ - if (!FIXADDR_USER_START) - return 0; - - if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) - return 1; - - return 0; -} - -int in_gate_area(struct mm_struct *mm, unsigned long addr) -{ - struct vm_area_struct *vma = get_gate_vma(mm); - - if (!vma) - return 0; - - return (addr >= vma->vm_start) && (addr < vma->vm_end); -} From 78fdfc9fc4215add97fe331aff7c64e4a423a104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:41 +0100 Subject: [PATCH 26/33] um: Remove fixaddr_user_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the removal of the vDSO passthrough from the host, FIXADDR_USER_START is always 0 and fixaddr_user_init() is dead code. Remove it. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-6-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/kernel/mem.c | 107 ------------------------------------------- 1 file changed, 107 deletions(-) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index dc938715ec9d..39c4a7e21c6f 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -84,109 +84,6 @@ void __init mem_init(void) kmalloc_ok = 1; } -#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) -/* - * Create a page table and place a pointer to it in a middle page - * directory entry. - */ -static void __init one_page_table_init(pmd_t *pmd) -{ - if (pmd_none(*pmd)) { - pte_t *pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, - PAGE_SIZE); - if (!pte) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_pmd(pmd, __pmd(_KERNPG_TABLE + - (unsigned long) __pa(pte))); - BUG_ON(pte != pte_offset_kernel(pmd, 0)); - } -} - -static void __init one_md_table_init(pud_t *pud) -{ -#if CONFIG_PGTABLE_LEVELS > 2 - pmd_t *pmd_table = (pmd_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - if (!pmd_table) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_pud(pud, __pud(_KERNPG_TABLE + (unsigned long) __pa(pmd_table))); - BUG_ON(pmd_table != pmd_offset(pud, 0)); -#endif -} - -static void __init one_ud_table_init(p4d_t *p4d) -{ -#if CONFIG_PGTABLE_LEVELS > 3 - pud_t *pud_table = (pud_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - if (!pud_table) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE); - - set_p4d(p4d, __p4d(_KERNPG_TABLE + (unsigned long) __pa(pud_table))); - BUG_ON(pud_table != pud_offset(p4d, 0)); -#endif -} - -static void __init fixrange_init(unsigned long start, unsigned long end, - pgd_t *pgd_base) -{ - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - int i, j; - unsigned long vaddr; - - vaddr = start; - i = pgd_index(vaddr); - j = pmd_index(vaddr); - pgd = pgd_base + i; - - for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { - p4d = p4d_offset(pgd, vaddr); - if (p4d_none(*p4d)) - one_ud_table_init(p4d); - pud = pud_offset(p4d, vaddr); - if (pud_none(*pud)) - one_md_table_init(pud); - pmd = pmd_offset(pud, vaddr); - for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) { - one_page_table_init(pmd); - vaddr += PMD_SIZE; - } - j = 0; - } -} - -static void __init fixaddr_user_init( void) -{ - long size = FIXADDR_USER_END - FIXADDR_USER_START; - pte_t *pte; - phys_t p; - unsigned long v, vaddr = FIXADDR_USER_START; - - if (!size) - return; - - fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir); - v = (unsigned long) memblock_alloc_low(size, PAGE_SIZE); - if (!v) - panic("%s: Failed to allocate %lu bytes align=%lx\n", - __func__, size, PAGE_SIZE); - - memcpy((void *) v , (void *) FIXADDR_USER_START, size); - p = __pa(v); - for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE, - p += PAGE_SIZE) { - pte = virt_to_kpte(vaddr); - pte_set_val(*pte, p, PAGE_READONLY); - } -} -#endif - void __init paging_init(void) { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; @@ -199,10 +96,6 @@ void __init paging_init(void) max_zone_pfn[ZONE_NORMAL] = high_physmem >> PAGE_SHIFT; free_area_init(max_zone_pfn); - -#if IS_ENABLED(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) - fixaddr_user_init(); -#endif } /* From 880f615bf96eff89eab88b4f9aacf527cf55a714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:42 +0100 Subject: [PATCH 27/33] um: Remove redundant range check from __access_ok_vsyscall() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only caller __access_ok() is already doing the same check through __addr_range_nowrap(). Remove the redundant check. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-7-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/include/asm/uaccess.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 1c6e0ae41b0c..3770bdeee100 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -17,8 +17,7 @@ #define __access_ok_vsyscall(addr, size) \ (((unsigned long) (addr) >= FIXADDR_USER_START) && \ - ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ - ((unsigned long) (addr) + (size) >= (unsigned long)(addr))) + ((unsigned long) (addr) + (size) <= FIXADDR_USER_END)) #define __addr_range_nowrap(addr, size) \ ((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) From f20e32ffda93e002ec4ce9fa15d71175a5445a96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:43 +0100 Subject: [PATCH 28/33] um: Remove __access_ok_vsyscall() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FIXADDR_USER_START and FIXADDR_USER_END are now always zero. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-8-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/include/asm/uaccess.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 3770bdeee100..0df9ea4abda8 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -15,10 +15,6 @@ (((unsigned long) (addr) < TASK_SIZE) && \ (((unsigned long) (addr) + (size)) < TASK_SIZE)) -#define __access_ok_vsyscall(addr, size) \ - (((unsigned long) (addr) >= FIXADDR_USER_START) && \ - ((unsigned long) (addr) + (size) <= FIXADDR_USER_END)) - #define __addr_range_nowrap(addr, size) \ ((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) @@ -39,9 +35,7 @@ static inline int __access_ok(const void __user *ptr, unsigned long size); static inline int __access_ok(const void __user *ptr, unsigned long size) { unsigned long addr = (unsigned long)ptr; - return __addr_range_nowrap(addr, size) && - (__under_task_size(addr, size) || - __access_ok_vsyscall(addr, size)); + return __addr_range_nowrap(addr, size) && __under_task_size(addr, size); } #define __get_kernel_nofault(dst, src, type, err_label) \ From 117e796fc5bfe84f18eec7e8f3b4f4090f605322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:44 +0100 Subject: [PATCH 29/33] x86/um: Remove FIXADDR_USER_START and FIXADDR_USE_END MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no users left. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-9-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/x86/um/asm/elf.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 8d7df4684c38..fdd5a612f678 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -72,10 +72,6 @@ extern char * elf_aux_platform; #define ELF_PLATFORM_FALLBACK "i586" #define ELF_PLATFORM (elf_aux_platform ?: ELF_PLATFORM_FALLBACK) -/* No user-accessible fixmap addresses, i.e. vsyscall */ -#define FIXADDR_USER_START 0 -#define FIXADDR_USER_END 0 - #else /* x86-64 relocation types, taken from asm-x86_64/elf.h */ @@ -157,10 +153,6 @@ extern char * elf_aux_platform; #define ELF_PLATFORM "x86_64" -/* No user-accessible fixmap addresses, i.e. vsyscall */ -#define FIXADDR_USER_START 0 -#define FIXADDR_USER_END 0 - #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, From 293f71435d14f5b5c46fc3398695fa265c69363d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 28 Oct 2025 10:15:45 +0100 Subject: [PATCH 30/33] um: Always set up AT_HWCAP and AT_PLATFORM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Historically the code to set up AT_HWCAP and AT_PLATFORM was only built for 32bit x86 as it was intermingled with the vDSO passthrough code. Now that vDSO passthrough has been removed, always pass through AT_HWCAP and AT_PLATFORM. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20251028-uml-remove-32bit-pseudo-vdso-v1-10-e930063eff5f@weissschuh.net Signed-off-by: Johannes Berg --- arch/um/os-Linux/Makefile | 4 +--- arch/um/os-Linux/elf_aux.c | 7 ++++++- arch/um/os-Linux/main.c | 4 ---- arch/x86/um/Kconfig | 3 --- arch/x86/um/asm/elf.h | 7 ++++--- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 70c73c22f715..f8d672d570d9 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -6,7 +6,7 @@ # Don't instrument UML-specific code KCOV_INSTRUMENT := n -obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ +obj-y = elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ registers.o sigio.o signal.o start_up.o time.o tty.o \ umid.o user_syms.o util.o skas/ @@ -14,8 +14,6 @@ CFLAGS_signal.o += -Wframe-larger-than=4096 CFLAGS_main.o += -Wno-frame-larger-than -obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o - obj-$(CONFIG_SMP) += smp.o USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \ diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index f8927a5959d8..72f416edf252 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -14,12 +14,17 @@ #include #include #include "internal.h" +#include +#if __BITS_PER_LONG == 64 +typedef Elf64_auxv_t elf_auxv_t; +#else typedef Elf32_auxv_t elf_auxv_t; +#endif /* These are initialized very early in boot and never changed */ char * elf_aux_platform; -extern long elf_aux_hwcap; +long elf_aux_hwcap; __init void scan_elf_aux( char **envp) { diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 730723106228..7e114862a723 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -21,8 +21,6 @@ #define STACKSIZE (8 * 1024 * 1024) -long elf_aux_hwcap; - static void __init set_stklim(void) { struct rlimit lim; @@ -149,9 +147,7 @@ int __init main(int argc, char **argv, char **envp) install_fatal_handler(SIGINT); install_fatal_handler(SIGTERM); -#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA scan_elf_aux(envp); -#endif change_sig(SIGPIPE, 0); ret = linux_main(argc, argv, envp); diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 798c6cc53e82..bdd7c8e39b01 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -34,8 +34,5 @@ config X86_64 config ARCH_HAS_SC_SIGNALS def_bool !64BIT -config ARCH_REUSE_HOST_VSYSCALL_AREA - def_bool !64BIT - config GENERIC_HWEIGHT def_bool y diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index fdd5a612f678..22d0111b543b 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -68,9 +68,7 @@ pr_reg[16] = PT_REGS_SS(regs); \ } while (0); -extern char * elf_aux_platform; #define ELF_PLATFORM_FALLBACK "i586" -#define ELF_PLATFORM (elf_aux_platform ?: ELF_PLATFORM_FALLBACK) #else @@ -151,7 +149,7 @@ extern char * elf_aux_platform; (pr_reg)[25] = 0; \ (pr_reg)[26] = 0; -#define ELF_PLATFORM "x86_64" +#define ELF_PLATFORM_FALLBACK "x86_64" #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; @@ -180,6 +178,9 @@ struct task_struct; extern long elf_aux_hwcap; #define ELF_HWCAP (elf_aux_hwcap) +extern char *elf_aux_platform; +#define ELF_PLATFORM (elf_aux_platform ?: ELF_PLATFORM_FALLBACK) + #define SET_PERSONALITY(ex) do {} while(0) #endif From 54618003a145aeadc2381159bde80f9761cce16c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 13 Nov 2025 02:25:26 +0000 Subject: [PATCH 31/33] um: drivers: virtio: use string choices helper Remove hard-coded strings by using the string helper functions Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87h5uywtwp.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Johannes Berg --- arch/um/drivers/virtio_uml.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index de7867ae220d..6cf1152a1a4e 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1151,8 +1152,7 @@ void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, return; vu_dev->no_vq_suspend = no_vq_suspend; - dev_info(&vdev->dev, "%sabled VQ suspend\n", - no_vq_suspend ? "dis" : "en"); + dev_info(&vdev->dev, "%s VQ suspend\n", str_disabled_enabled(no_vq_suspend)); } static void vu_of_conn_broken(struct work_struct *wk) From a74b6c0e53a6df8e8a096b50c06c4f872906368a Mon Sep 17 00:00:00 2001 From: David Gow Date: Sat, 22 Nov 2025 16:32:12 +0800 Subject: [PATCH 32/33] um: Don't rename vmap to kernel_vmap In order to work around the existence of a vmap symbol in libpcap, the UML makefile unconditionally redefines vmap to kernel_vmap. However, this not only affects the actual vmap symbol, but also anything else named vmap, including a number of struct members in DRM. This would not be too much of a problem, since all uses are also updated, except we now have Rust DRM bindings, which expect the corresponding Rust structs to have 'vmap' names. Since the redefinition applies in bindgen, but not to Rust code, we end up with errors such as: error[E0560]: struct `drm_gem_object_funcs` has no fields named `vmap` --> rust/kernel/drm/gem/mod.rs:210:9 Since libpcap support was removed in commit 12b8e7e69aa7 ("um: Remove obsolete pcap driver"), remove the, now unnecessary, define as well. We also take this opportunity to update the comment. Signed-off-by: David Gow Acked-by: Miguel Ojeda Link: https://patch.msgid.link/20251122083213.3996586-1-davidgow@google.com Fixes: 12b8e7e69aa7 ("um: Remove obsolete pcap driver") [adjust commmit message a bit] Signed-off-by: Johannes Berg --- arch/um/Makefile | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 7be0143b5ba3..721b652ffb65 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -46,19 +46,17 @@ ARCH_INCLUDE := -I$(srctree)/$(SHARED_HEADERS) ARCH_INCLUDE += -I$(srctree)/$(HOST_DIR)/um/shared KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um -# -Dvmap=kernel_vmap prevents anything from referencing the libpcap.o symbol so -# named - it's a common symbol in libpcap, so we get a binary which crashes. -# -# Same things for in6addr_loopback and mktime - found in libc. For these two we -# only get link-time error, luckily. +# -Dstrrchr=kernel_strrchr (as well as the various in6addr symbols) prevents +# anything from referencing +# libc symbols with the same name, which can cause a linker error. # # -Dlongjmp=kernel_longjmp prevents anything from referencing the libpthread.a # embedded copy of longjmp, same thing for setjmp. # -# These apply to USER_CFLAGS to. +# These apply to USER_CFLAGS too. KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ - $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ + $(ARCH_INCLUDE) $(MODE_INCLUDE) \ -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr \ From a3209bb94b36351f11e0d9e72ac44e5dd777a069 Mon Sep 17 00:00:00 2001 From: "Christophe Leroy (CS GROUP)" Date: Sat, 29 Nov 2025 10:56:02 +0100 Subject: [PATCH 33/33] um: Disable KASAN_INLINE when STATIC_LINK is selected um doesn't support KASAN_INLINE together with STATIC_LINK. Instead of failing the build, disable KASAN_INLINE when STATIC_LINK is selected. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202511290451.x9GZVJ1l-lkp@intel.com/ Fixes: 1e338f4d99e6 ("kasan: introduce ARCH_DEFER_KASAN and unify static key across modes") Signed-off-by: Christophe Leroy (CS GROUP) Link: https://patch.msgid.link/2620ab0bbba640b6237c50b9c0dca1c7d1142f5d.1764410067.git.chleroy@kernel.org Signed-off-by: Johannes Berg --- arch/um/Kconfig | 1 + arch/um/include/asm/kasan.h | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 097c6a6265ef..8415d39b0d43 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -5,6 +5,7 @@ menu "UML-specific options" config UML bool default y + select ARCH_DISABLE_KASAN_INLINE if STATIC_LINK select ARCH_NEEDS_DEFER_KASAN if STATIC_LINK select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_HAS_CACHE_LINE_SIZE diff --git a/arch/um/include/asm/kasan.h b/arch/um/include/asm/kasan.h index b54a4e937fd1..81bcdc0f962e 100644 --- a/arch/um/include/asm/kasan.h +++ b/arch/um/include/asm/kasan.h @@ -24,10 +24,6 @@ #ifdef CONFIG_KASAN void kasan_init(void); - -#if defined(CONFIG_STATIC_LINK) && defined(CONFIG_KASAN_INLINE) -#error UML does not work in KASAN_INLINE mode with STATIC_LINK enabled! -#endif #else static inline void kasan_init(void) { } #endif /* CONFIG_KASAN */