fork: define a local GFP_VMAP_STACK

The current allocation of VMAP stack memory is using (THREADINFO_GFP &
~__GFP_ACCOUNT) which is a complicated way of saying (GFP_KERNEL |
__GFP_ZERO):

<linux/thread_info.h>:
define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
<linux/gfp_types.h>:
define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)

This is an unfortunate side-effect of independent changes blurring the
picture:

commit 19809c2da2 changed (THREADINFO_GFP |
__GFP_HIGHMEM) to just THREADINFO_GFP since highmem became implicit.

commit 9b6f7e163c then added stack caching
and rewrote the allocation to (THREADINFO_GFP & ~__GFP_ACCOUNT) as cached
stacks need to be accounted separately.  However that code, when it
eventually accounts the memory does this:

  ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0)

so the memory is charged as a GFP_KERNEL allocation.

Define a unique GFP_VMAP_STACK to use
GFP_KERNEL | __GFP_ZERO and move the comment there.

Link: https://lkml.kernel.org/r/20250509-gfp-stack-v1-1-82f6f7efc210@linaro.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reported-by: Mateusz Guzik <mjguzik@gmail.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Linus Walleij 2025-05-09 09:25:09 +02:00 committed by Andrew Morton
parent d82893c52a
commit 8e02b1b7fc
1 changed files with 45 additions and 43 deletions

View File

@ -185,7 +185,13 @@ static inline void free_task_struct(struct task_struct *tsk)
kmem_cache_free(task_struct_cachep, tsk); kmem_cache_free(task_struct_cachep, tsk);
} }
#ifdef CONFIG_VMAP_STACK /*
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
* kmemcache based allocator.
*/
# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
# ifdef CONFIG_VMAP_STACK
/* /*
* vmalloc() is a bit slow, and calling vfree() enough times will force a TLB * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
* flush. Try to minimize the number of calls by caching stacks. * flush. Try to minimize the number of calls by caching stacks.
@ -198,14 +204,14 @@ struct vm_stack {
struct vm_struct *stack_vm_area; struct vm_struct *stack_vm_area;
}; };
static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area) static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
{ {
unsigned int i; unsigned int i;
for (i = 0; i < NR_CACHED_STACKS; i++) { for (i = 0; i < NR_CACHED_STACKS; i++) {
struct vm_struct *tmp = NULL; struct vm_struct *tmp = NULL;
if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm_area)) if (this_cpu_try_cmpxchg(cached_stacks[i], &tmp, vm))
return true; return true;
} }
return false; return false;
@ -214,12 +220,11 @@ static bool try_release_thread_stack_to_cache(struct vm_struct *vm_area)
static void thread_stack_free_rcu(struct rcu_head *rh) static void thread_stack_free_rcu(struct rcu_head *rh)
{ {
struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu); struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
struct vm_struct *vm_area = vm_stack->stack_vm_area;
if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area)) if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
return; return;
vfree(vm_area->addr); vfree(vm_stack);
} }
static void thread_stack_delayed_free(struct task_struct *tsk) static void thread_stack_delayed_free(struct task_struct *tsk)
@ -232,32 +237,32 @@ static void thread_stack_delayed_free(struct task_struct *tsk)
static int free_vm_stack_cache(unsigned int cpu) static int free_vm_stack_cache(unsigned int cpu)
{ {
struct vm_struct **cached_vm_stack_areas = per_cpu_ptr(cached_stacks, cpu); struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
int i; int i;
for (i = 0; i < NR_CACHED_STACKS; i++) { for (i = 0; i < NR_CACHED_STACKS; i++) {
struct vm_struct *vm_area = cached_vm_stack_areas[i]; struct vm_struct *vm_stack = cached_vm_stacks[i];
if (!vm_area) if (!vm_stack)
continue; continue;
vfree(vm_area->addr); vfree(vm_stack->addr);
cached_vm_stack_areas[i] = NULL; cached_vm_stacks[i] = NULL;
} }
return 0; return 0;
} }
static int memcg_charge_kernel_stack(struct vm_struct *vm_area) static int memcg_charge_kernel_stack(struct vm_struct *vm)
{ {
int i; int i;
int ret; int ret;
int nr_charged = 0; int nr_charged = 0;
BUG_ON(vm_area->nr_pages != THREAD_SIZE / PAGE_SIZE); BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
ret = memcg_kmem_charge_page(vm_area->pages[i], GFP_KERNEL, 0); ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0);
if (ret) if (ret)
goto err; goto err;
nr_charged++; nr_charged++;
@ -265,35 +270,38 @@ static int memcg_charge_kernel_stack(struct vm_struct *vm_area)
return 0; return 0;
err: err:
for (i = 0; i < nr_charged; i++) for (i = 0; i < nr_charged; i++)
memcg_kmem_uncharge_page(vm_area->pages[i], 0); memcg_kmem_uncharge_page(vm->pages[i], 0);
return ret; return ret;
} }
static int alloc_thread_stack_node(struct task_struct *tsk, int node) static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{ {
struct vm_struct *vm_area; struct vm_struct *vm;
void *stack; void *stack;
int i; int i;
for (i = 0; i < NR_CACHED_STACKS; i++) { for (i = 0; i < NR_CACHED_STACKS; i++) {
vm_area = this_cpu_xchg(cached_stacks[i], NULL); struct vm_struct *s;
if (!vm_area)
s = this_cpu_xchg(cached_stacks[i], NULL);
if (!s)
continue; continue;
if (memcg_charge_kernel_stack(vm_area)) {
vfree(vm_area->addr);
return -ENOMEM;
}
/* Reset stack metadata. */ /* Reset stack metadata. */
kasan_unpoison_range(vm_area->addr, THREAD_SIZE); kasan_unpoison_range(s->addr, THREAD_SIZE);
stack = kasan_reset_tag(vm_area->addr); stack = kasan_reset_tag(s->addr);
/* Clear stale pointers from reused stack. */ /* Clear stale pointers from reused stack. */
memset(stack, 0, THREAD_SIZE); memset(stack, 0, THREAD_SIZE);
tsk->stack_vm_area = vm_area; if (memcg_charge_kernel_stack(s)) {
vfree(s->addr);
return -ENOMEM;
}
tsk->stack_vm_area = s;
tsk->stack = stack; tsk->stack = stack;
return 0; return 0;
} }
@ -309,8 +317,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
if (!stack) if (!stack)
return -ENOMEM; return -ENOMEM;
vm_area = find_vm_area(stack); vm = find_vm_area(stack);
if (memcg_charge_kernel_stack(vm_area)) { if (memcg_charge_kernel_stack(vm)) {
vfree(stack); vfree(stack);
return -ENOMEM; return -ENOMEM;
} }
@ -319,7 +327,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
* free_thread_stack() can be called in interrupt context, * free_thread_stack() can be called in interrupt context,
* so cache the vm_struct. * so cache the vm_struct.
*/ */
tsk->stack_vm_area = vm_area; tsk->stack_vm_area = vm;
stack = kasan_reset_tag(stack); stack = kasan_reset_tag(stack);
tsk->stack = stack; tsk->stack = stack;
return 0; return 0;
@ -334,13 +342,7 @@ static void free_thread_stack(struct task_struct *tsk)
tsk->stack_vm_area = NULL; tsk->stack_vm_area = NULL;
} }
#else /* !CONFIG_VMAP_STACK */ # else /* !CONFIG_VMAP_STACK */
/*
* Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
* kmemcache based allocator.
*/
#if THREAD_SIZE >= PAGE_SIZE
static void thread_stack_free_rcu(struct rcu_head *rh) static void thread_stack_free_rcu(struct rcu_head *rh)
{ {
@ -372,7 +374,8 @@ static void free_thread_stack(struct task_struct *tsk)
tsk->stack = NULL; tsk->stack = NULL;
} }
#else /* !(THREAD_SIZE >= PAGE_SIZE) */ # endif /* CONFIG_VMAP_STACK */
# else /* !(THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)) */
static struct kmem_cache *thread_stack_cache; static struct kmem_cache *thread_stack_cache;
@ -411,8 +414,7 @@ void thread_stack_cache_init(void)
BUG_ON(thread_stack_cache == NULL); BUG_ON(thread_stack_cache == NULL);
} }
#endif /* THREAD_SIZE >= PAGE_SIZE */ # endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */
#endif /* CONFIG_VMAP_STACK */
/* SLAB cache for signal_struct structures (tsk->signal) */ /* SLAB cache for signal_struct structures (tsk->signal) */
static struct kmem_cache *signal_cachep; static struct kmem_cache *signal_cachep;
@ -515,11 +517,11 @@ void vm_area_free(struct vm_area_struct *vma)
static void account_kernel_stack(struct task_struct *tsk, int account) static void account_kernel_stack(struct task_struct *tsk, int account)
{ {
if (IS_ENABLED(CONFIG_VMAP_STACK)) { if (IS_ENABLED(CONFIG_VMAP_STACK)) {
struct vm_struct *vm_area = task_stack_vm_area(tsk); struct vm_struct *vm = task_stack_vm_area(tsk);
int i; int i;
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
mod_lruvec_page_state(vm_area->pages[i], NR_KERNEL_STACK_KB, mod_lruvec_page_state(vm->pages[i], NR_KERNEL_STACK_KB,
account * (PAGE_SIZE / 1024)); account * (PAGE_SIZE / 1024));
} else { } else {
void *stack = task_stack_page(tsk); void *stack = task_stack_page(tsk);
@ -535,12 +537,12 @@ void exit_task_stack_account(struct task_struct *tsk)
account_kernel_stack(tsk, -1); account_kernel_stack(tsk, -1);
if (IS_ENABLED(CONFIG_VMAP_STACK)) { if (IS_ENABLED(CONFIG_VMAP_STACK)) {
struct vm_struct *vm_area; struct vm_struct *vm;
int i; int i;
vm_area = task_stack_vm_area(tsk); vm = task_stack_vm_area(tsk);
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
memcg_kmem_uncharge_page(vm_area->pages[i], 0); memcg_kmem_uncharge_page(vm->pages[i], 0);
} }
} }