mirror of https://github.com/torvalds/linux.git
When freeing page tables, we try to batch them. If batch allocation fails (GFP_NOWAIT), __tlb_remove_table_one() immediately frees the one without batching. On !CONFIG_PT_RECLAIM, the fallback sends an IPI to all CPUs via tlb_remove_table_sync_one(). It disrupts all CPUs even when only a single process is unmapping memory. IPI broadcast was reported to hurt RT workloads[1]. tlb_remove_table_sync_one() synchronizes with lockless page-table walkers (e.g. GUP-fast) that rely on IRQ disabling. These walkers use local_irq_disable(), which is also an RCU read-side critical section. This patch introduces tlb_remove_table_sync_rcu() which uses RCU grace period (synchronize_rcu()) instead of IPI broadcast. This provides the same guarantee as IPI but without disrupting all CPUs. Since batch allocation already failed, we are in a slow path where sleeping is acceptable - we are in process context (unmap_region, exit_mmap) with only mmap_lock held. tlb_remove_table_sync_one() is retained for other callers (e.g., khugepaged after pmdp_collapse_flush(), tlb_finish_mmu() when tlb->fully_unshared_tables) that are not slow paths. Converting those may require different approaches such as targeted IPIs. Link: https://lore.kernel.org/linux-mm/1b27a3fa-359a-43d0-bdeb-c31341749367@kernel.org/ [1] Link: https://lore.kernel.org/linux-mm/20260202150957.GD1282955@noisy.programming.kicks-ass.net/ Link: https://lore.kernel.org/linux-mm/dfdfeac9-5cd5-46fc-a5c1-9ccf9bd3502a@intel.com/ Link: https://lore.kernel.org/linux-mm/bc489455-bb18-44dc-8518-ae75abda6bec@kernel.org/ Link: https://lkml.kernel.org/r/20260224142101.20500-1-lance.yang@linux.dev Signed-off-by: Lance Yang <lance.yang@linux.dev> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Suggested-by: Dave Hansen <dave.hansen@intel.com> Suggested-by: David Hildenbrand (Arm) <david@kernel.org> Acked-by: David Hildenbrand (Arm) <david@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Nick Piggin <npiggin@gmail.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
||
|---|---|---|
| .. | ||
| bitops | ||
| vdso | ||
| Kbuild | ||
| access_ok.h | ||
| agp.h | ||
| archrandom.h | ||
| asm-offsets.h | ||
| asm-prototypes.h | ||
| atomic.h | ||
| atomic64.h | ||
| audit_change_attr.h | ||
| audit_dir_write.h | ||
| audit_read.h | ||
| audit_signal.h | ||
| audit_write.h | ||
| barrier.h | ||
| bitops.h | ||
| bitsperlong.h | ||
| bug.h | ||
| cache.h | ||
| cacheflush.h | ||
| cfi.h | ||
| checksum.h | ||
| cmpxchg-local.h | ||
| cmpxchg.h | ||
| codetag.lds.h | ||
| compat.h | ||
| current.h | ||
| delay.h | ||
| device.h | ||
| div64.h | ||
| dma-mapping.h | ||
| dma.h | ||
| early_ioremap.h | ||
| emergency-restart.h | ||
| error-injection.h | ||
| exec.h | ||
| extable.h | ||
| fixmap.h | ||
| flat.h | ||
| fprobe.h | ||
| ftrace.h | ||
| futex.h | ||
| getorder.h | ||
| hardirq.h | ||
| hugetlb.h | ||
| hw_irq.h | ||
| int-ll64.h | ||
| io.h | ||
| ioctl.h | ||
| iomap.h | ||
| irq.h | ||
| irq_regs.h | ||
| irq_work.h | ||
| irqflags.h | ||
| kdebug.h | ||
| kmap_size.h | ||
| kprobes.h | ||
| kvm_para.h | ||
| kvm_types.h | ||
| linkage.h | ||
| local.h | ||
| local64.h | ||
| logic_io.h | ||
| mcs_spinlock.h | ||
| memory_model.h | ||
| mm_hooks.h | ||
| mmiowb.h | ||
| mmiowb_types.h | ||
| mmu.h | ||
| mmu_context.h | ||
| mmzone.h | ||
| module.h | ||
| module.lds.h | ||
| mshyperv.h | ||
| msi.h | ||
| nommu_context.h | ||
| numa.h | ||
| param.h | ||
| parport.h | ||
| pci.h | ||
| pci_iomap.h | ||
| percpu.h | ||
| pgalloc.h | ||
| pgtable-nop4d.h | ||
| pgtable-nopmd.h | ||
| pgtable-nopud.h | ||
| pgtable_uffd.h | ||
| preempt.h | ||
| qrwlock.h | ||
| qrwlock_types.h | ||
| qspinlock.h | ||
| qspinlock_types.h | ||
| resource.h | ||
| rqspinlock.h | ||
| runtime-const.h | ||
| rwonce.h | ||
| seccomp.h | ||
| sections.h | ||
| serial.h | ||
| set_memory.h | ||
| shmparam.h | ||
| signal.h | ||
| simd.h | ||
| softirq_stack.h | ||
| spinlock.h | ||
| spinlock_types.h | ||
| statfs.h | ||
| string.h | ||
| switch_to.h | ||
| syscall.h | ||
| syscalls.h | ||
| text-patching.h | ||
| thread_info_tif.h | ||
| ticket_spinlock.h | ||
| timex.h | ||
| tlb.h | ||
| tlbflush.h | ||
| topology.h | ||
| trace_clock.h | ||
| uaccess.h | ||
| unwind_user.h | ||
| user.h | ||
| vermagic.h | ||
| vga.h | ||
| video.h | ||
| vmlinux.lds.h | ||
| word-at-a-time.h | ||
| xor.h | ||