mirror of https://github.com/torvalds/linux.git
cgroup: Changes for v6.19
- Defer task cgroup unlink until after the dying task's final context switch so that controllers see the cgroup properly populated until the task is truly gone. - cpuset cleanups and simplifications. Enforce that domain isolated CPUs stay in root or isolated partitions and fail if isolated+nohz_full would leave no housekeeping CPU. Fix sched/deadline root domain handling during CPU hot-unplug and race for tasks in attaching cpusets. - Misc fixes including memory reclaim protection documentation and selftest KTAP conformance. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCaS3pEQ4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGYbrAP9H0kVyWH5tK9VhjSZyqidic8NuvtmNOyhIRrg0 8S8K0wD/YG9xlh2JUyRmS4B23ggc59+9y5xM2/sctrho51Pvsgg= =0MB+ -----END PGP SIGNATURE----- Merge tag 'cgroup-for-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup Pull cgroup updates from Tejun Heo: - Defer task cgroup unlink until after the dying task's final context switch so that controllers see the cgroup properly populated until the task is truly gone - cpuset cleanups and simplifications. Enforce that domain isolated CPUs stay in root or isolated partitions and fail if isolated+nohz_full would leave no housekeeping CPU. Fix sched/deadline root domain handling during CPU hot-unplug and race for tasks in attaching cpusets - Misc fixes including memory reclaim protection documentation and selftest KTAP conformance * tag 'cgroup-for-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (21 commits) cpuset: Treat cpusets in attaching as populated sched/deadline: Walk up cpuset hierarchy to decide root domain when hot-unplug cgroup/cpuset: Introduce cpuset_cpus_allowed_locked() docs: cgroup: No special handling of unpopulated memcgs docs: cgroup: Note about sibling relative reclaim protection docs: cgroup: Explain reclaim protection target selftests/cgroup: conform test to KTAP format output cpuset: remove need_rebuild_sched_domains cpuset: remove global remote_children list cpuset: simplify node setting on error cgroup: include missing header for struct irq_work cgroup: Fix sleeping from invalid context warning on PREEMPT_RT cgroup/cpuset: Globally track isolated_cpus update cgroup/cpuset: Ensure domain isolated CPUs stay in root or isolated partition cgroup/cpuset: Move up prstate_housekeeping_conflict() helper cgroup/cpuset: Fail if isolated and nohz_full don't leave any housekeeping cgroup/cpuset: Rename update_unbound_workqueue_cpumask() to update_isolation_cpumasks() cgroup: Defer task cgroup unlink until after the task is done switching out cgroup: Move dying_tasks cleanup from cgroup_task_release() to cgroup_task_free() cgroup: Rename cgroup lifecycle hooks to cgroup_task_*() ...
This commit is contained in:
commit
8449d3252c
|
|
@ -53,7 +53,8 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
|
||||||
5-2. Memory
|
5-2. Memory
|
||||||
5-2-1. Memory Interface Files
|
5-2-1. Memory Interface Files
|
||||||
5-2-2. Usage Guidelines
|
5-2-2. Usage Guidelines
|
||||||
5-2-3. Memory Ownership
|
5-2-3. Reclaim Protection
|
||||||
|
5-2-4. Memory Ownership
|
||||||
5-3. IO
|
5-3. IO
|
||||||
5-3-1. IO Interface Files
|
5-3-1. IO Interface Files
|
||||||
5-3-2. Writeback
|
5-3-2. Writeback
|
||||||
|
|
@ -1317,7 +1318,7 @@ PAGE_SIZE multiple when read back.
|
||||||
smaller overages.
|
smaller overages.
|
||||||
|
|
||||||
Effective min boundary is limited by memory.min values of
|
Effective min boundary is limited by memory.min values of
|
||||||
all ancestor cgroups. If there is memory.min overcommitment
|
ancestor cgroups. If there is memory.min overcommitment
|
||||||
(child cgroup or cgroups are requiring more protected memory
|
(child cgroup or cgroups are requiring more protected memory
|
||||||
than parent will allow), then each child cgroup will get
|
than parent will allow), then each child cgroup will get
|
||||||
the part of parent's protection proportional to its
|
the part of parent's protection proportional to its
|
||||||
|
|
@ -1326,9 +1327,6 @@ PAGE_SIZE multiple when read back.
|
||||||
Putting more memory than generally available under this
|
Putting more memory than generally available under this
|
||||||
protection is discouraged and may lead to constant OOMs.
|
protection is discouraged and may lead to constant OOMs.
|
||||||
|
|
||||||
If a memory cgroup is not populated with processes,
|
|
||||||
its memory.min is ignored.
|
|
||||||
|
|
||||||
memory.low
|
memory.low
|
||||||
A read-write single value file which exists on non-root
|
A read-write single value file which exists on non-root
|
||||||
cgroups. The default is "0".
|
cgroups. The default is "0".
|
||||||
|
|
@ -1343,7 +1341,7 @@ PAGE_SIZE multiple when read back.
|
||||||
smaller overages.
|
smaller overages.
|
||||||
|
|
||||||
Effective low boundary is limited by memory.low values of
|
Effective low boundary is limited by memory.low values of
|
||||||
all ancestor cgroups. If there is memory.low overcommitment
|
ancestor cgroups. If there is memory.low overcommitment
|
||||||
(child cgroup or cgroups are requiring more protected memory
|
(child cgroup or cgroups are requiring more protected memory
|
||||||
than parent will allow), then each child cgroup will get
|
than parent will allow), then each child cgroup will get
|
||||||
the part of parent's protection proportional to its
|
the part of parent's protection proportional to its
|
||||||
|
|
@ -1934,6 +1932,27 @@ memory - is necessary to determine whether a workload needs more
|
||||||
memory; unfortunately, memory pressure monitoring mechanism isn't
|
memory; unfortunately, memory pressure monitoring mechanism isn't
|
||||||
implemented yet.
|
implemented yet.
|
||||||
|
|
||||||
|
Reclaim Protection
|
||||||
|
~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The protection configured with "memory.low" or "memory.min" applies relatively
|
||||||
|
to the target of the reclaim (i.e. any of memory cgroup limits, proactive
|
||||||
|
memory.reclaim or global reclaim apparently located in the root cgroup).
|
||||||
|
The protection value configured for B applies unchanged to the reclaim
|
||||||
|
targeting A (i.e. caused by competition with the sibling E)::
|
||||||
|
|
||||||
|
root - ... - A - B - C
|
||||||
|
\ ` D
|
||||||
|
` E
|
||||||
|
|
||||||
|
When the reclaim targets ancestors of A, the effective protection of B is
|
||||||
|
capped by the protection value configured for A (and any other intermediate
|
||||||
|
ancestors between A and the target).
|
||||||
|
|
||||||
|
To express indifference about relative sibling protection, it is suggested to
|
||||||
|
use memory_recursiveprot. Configuring all descendants of a parent with finite
|
||||||
|
protection to "max" works but it may unnecessarily skew memory.events:low
|
||||||
|
field.
|
||||||
|
|
||||||
Memory Ownership
|
Memory Ownership
|
||||||
~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~
|
||||||
|
|
|
||||||
|
|
@ -137,9 +137,10 @@ extern void cgroup_cancel_fork(struct task_struct *p,
|
||||||
struct kernel_clone_args *kargs);
|
struct kernel_clone_args *kargs);
|
||||||
extern void cgroup_post_fork(struct task_struct *p,
|
extern void cgroup_post_fork(struct task_struct *p,
|
||||||
struct kernel_clone_args *kargs);
|
struct kernel_clone_args *kargs);
|
||||||
void cgroup_exit(struct task_struct *p);
|
void cgroup_task_exit(struct task_struct *p);
|
||||||
void cgroup_release(struct task_struct *p);
|
void cgroup_task_dead(struct task_struct *p);
|
||||||
void cgroup_free(struct task_struct *p);
|
void cgroup_task_release(struct task_struct *p);
|
||||||
|
void cgroup_task_free(struct task_struct *p);
|
||||||
|
|
||||||
int cgroup_init_early(void);
|
int cgroup_init_early(void);
|
||||||
int cgroup_init(void);
|
int cgroup_init(void);
|
||||||
|
|
@ -680,9 +681,10 @@ static inline void cgroup_cancel_fork(struct task_struct *p,
|
||||||
struct kernel_clone_args *kargs) {}
|
struct kernel_clone_args *kargs) {}
|
||||||
static inline void cgroup_post_fork(struct task_struct *p,
|
static inline void cgroup_post_fork(struct task_struct *p,
|
||||||
struct kernel_clone_args *kargs) {}
|
struct kernel_clone_args *kargs) {}
|
||||||
static inline void cgroup_exit(struct task_struct *p) {}
|
static inline void cgroup_task_exit(struct task_struct *p) {}
|
||||||
static inline void cgroup_release(struct task_struct *p) {}
|
static inline void cgroup_task_dead(struct task_struct *p) {}
|
||||||
static inline void cgroup_free(struct task_struct *p) {}
|
static inline void cgroup_task_release(struct task_struct *p) {}
|
||||||
|
static inline void cgroup_task_free(struct task_struct *p) {}
|
||||||
|
|
||||||
static inline int cgroup_init_early(void) { return 0; }
|
static inline int cgroup_init_early(void) { return 0; }
|
||||||
static inline int cgroup_init(void) { return 0; }
|
static inline int cgroup_init(void) { return 0; }
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,7 @@ extern void inc_dl_tasks_cs(struct task_struct *task);
|
||||||
extern void dec_dl_tasks_cs(struct task_struct *task);
|
extern void dec_dl_tasks_cs(struct task_struct *task);
|
||||||
extern void cpuset_lock(void);
|
extern void cpuset_lock(void);
|
||||||
extern void cpuset_unlock(void);
|
extern void cpuset_unlock(void);
|
||||||
|
extern void cpuset_cpus_allowed_locked(struct task_struct *p, struct cpumask *mask);
|
||||||
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
|
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
|
||||||
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
|
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
|
||||||
extern bool cpuset_cpu_is_isolated(int cpu);
|
extern bool cpuset_cpu_is_isolated(int cpu);
|
||||||
|
|
@ -195,12 +196,18 @@ static inline void dec_dl_tasks_cs(struct task_struct *task) { }
|
||||||
static inline void cpuset_lock(void) { }
|
static inline void cpuset_lock(void) { }
|
||||||
static inline void cpuset_unlock(void) { }
|
static inline void cpuset_unlock(void) { }
|
||||||
|
|
||||||
static inline void cpuset_cpus_allowed(struct task_struct *p,
|
static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
|
||||||
struct cpumask *mask)
|
struct cpumask *mask)
|
||||||
{
|
{
|
||||||
cpumask_copy(mask, task_cpu_possible_mask(p));
|
cpumask_copy(mask, task_cpu_possible_mask(p));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void cpuset_cpus_allowed(struct task_struct *p,
|
||||||
|
struct cpumask *mask)
|
||||||
|
{
|
||||||
|
cpuset_cpus_allowed_locked(p, mask);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
|
static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -1324,7 +1324,10 @@ struct task_struct {
|
||||||
struct css_set __rcu *cgroups;
|
struct css_set __rcu *cgroups;
|
||||||
/* cg_list protected by css_set_lock and tsk->alloc_lock: */
|
/* cg_list protected by css_set_lock and tsk->alloc_lock: */
|
||||||
struct list_head cg_list;
|
struct list_head cg_list;
|
||||||
#endif
|
#ifdef CONFIG_PREEMPT_RT
|
||||||
|
struct llist_node cg_dead_lnode;
|
||||||
|
#endif /* CONFIG_PREEMPT_RT */
|
||||||
|
#endif /* CONFIG_CGROUPS */
|
||||||
#ifdef CONFIG_X86_CPU_RESCTRL
|
#ifdef CONFIG_X86_CPU_RESCTRL
|
||||||
u32 closid;
|
u32 closid;
|
||||||
u32 rmid;
|
u32 rmid;
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,7 @@
|
||||||
#include <linux/sched/deadline.h>
|
#include <linux/sched/deadline.h>
|
||||||
#include <linux/psi.h>
|
#include <linux/psi.h>
|
||||||
#include <linux/nstree.h>
|
#include <linux/nstree.h>
|
||||||
|
#include <linux/irq_work.h>
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
|
|
@ -287,6 +288,7 @@ static void kill_css(struct cgroup_subsys_state *css);
|
||||||
static int cgroup_addrm_files(struct cgroup_subsys_state *css,
|
static int cgroup_addrm_files(struct cgroup_subsys_state *css,
|
||||||
struct cgroup *cgrp, struct cftype cfts[],
|
struct cgroup *cgrp, struct cftype cfts[],
|
||||||
bool is_add);
|
bool is_add);
|
||||||
|
static void cgroup_rt_init(void);
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_CGROUP_REF
|
#ifdef CONFIG_DEBUG_CGROUP_REF
|
||||||
#define CGROUP_REF_FN_ATTRS noinline
|
#define CGROUP_REF_FN_ATTRS noinline
|
||||||
|
|
@ -941,7 +943,8 @@ static void css_set_move_task(struct task_struct *task,
|
||||||
/*
|
/*
|
||||||
* We are synchronized through cgroup_threadgroup_rwsem
|
* We are synchronized through cgroup_threadgroup_rwsem
|
||||||
* against PF_EXITING setting such that we can't race
|
* against PF_EXITING setting such that we can't race
|
||||||
* against cgroup_exit()/cgroup_free() dropping the css_set.
|
* against cgroup_task_dead()/cgroup_task_free() dropping
|
||||||
|
* the css_set.
|
||||||
*/
|
*/
|
||||||
WARN_ON_ONCE(task->flags & PF_EXITING);
|
WARN_ON_ONCE(task->flags & PF_EXITING);
|
||||||
|
|
||||||
|
|
@ -6354,6 +6357,7 @@ int __init cgroup_init(void)
|
||||||
BUG_ON(ss_rstat_init(NULL));
|
BUG_ON(ss_rstat_init(NULL));
|
||||||
|
|
||||||
get_user_ns(init_cgroup_ns.user_ns);
|
get_user_ns(init_cgroup_ns.user_ns);
|
||||||
|
cgroup_rt_init();
|
||||||
|
|
||||||
cgroup_lock();
|
cgroup_lock();
|
||||||
|
|
||||||
|
|
@ -6967,19 +6971,29 @@ void cgroup_post_fork(struct task_struct *child,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* cgroup_exit - detach cgroup from exiting task
|
* cgroup_task_exit - detach cgroup from exiting task
|
||||||
* @tsk: pointer to task_struct of exiting process
|
* @tsk: pointer to task_struct of exiting process
|
||||||
*
|
*
|
||||||
* Description: Detach cgroup from @tsk.
|
* Description: Detach cgroup from @tsk.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void cgroup_exit(struct task_struct *tsk)
|
void cgroup_task_exit(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
struct cgroup_subsys *ss;
|
struct cgroup_subsys *ss;
|
||||||
struct css_set *cset;
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
spin_lock_irq(&css_set_lock);
|
/* see cgroup_post_fork() for details */
|
||||||
|
do_each_subsys_mask(ss, i, have_exit_callback) {
|
||||||
|
ss->exit(tsk);
|
||||||
|
} while_each_subsys_mask();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void do_cgroup_task_dead(struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
struct css_set *cset;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&css_set_lock, flags);
|
||||||
|
|
||||||
WARN_ON_ONCE(list_empty(&tsk->cg_list));
|
WARN_ON_ONCE(list_empty(&tsk->cg_list));
|
||||||
cset = task_css_set(tsk);
|
cset = task_css_set(tsk);
|
||||||
|
|
@ -6997,15 +7011,61 @@ void cgroup_exit(struct task_struct *tsk)
|
||||||
test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
|
test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
|
||||||
cgroup_update_frozen(task_dfl_cgroup(tsk));
|
cgroup_update_frozen(task_dfl_cgroup(tsk));
|
||||||
|
|
||||||
spin_unlock_irq(&css_set_lock);
|
spin_unlock_irqrestore(&css_set_lock, flags);
|
||||||
|
|
||||||
/* see cgroup_post_fork() for details */
|
|
||||||
do_each_subsys_mask(ss, i, have_exit_callback) {
|
|
||||||
ss->exit(tsk);
|
|
||||||
} while_each_subsys_mask();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cgroup_release(struct task_struct *task)
|
#ifdef CONFIG_PREEMPT_RT
|
||||||
|
/*
|
||||||
|
* cgroup_task_dead() is called from finish_task_switch() which doesn't allow
|
||||||
|
* scheduling even in RT. As the task_dead path requires grabbing css_set_lock,
|
||||||
|
* this lead to sleeping in the invalid context warning bug. css_set_lock is too
|
||||||
|
* big to become a raw_spinlock. The task_dead path doesn't need to run
|
||||||
|
* synchronously but can't be delayed indefinitely either as the dead task pins
|
||||||
|
* the cgroup and task_struct can be pinned indefinitely. Bounce through lazy
|
||||||
|
* irq_work to allow batching while ensuring timely completion.
|
||||||
|
*/
|
||||||
|
static DEFINE_PER_CPU(struct llist_head, cgrp_dead_tasks);
|
||||||
|
static DEFINE_PER_CPU(struct irq_work, cgrp_dead_tasks_iwork);
|
||||||
|
|
||||||
|
static void cgrp_dead_tasks_iwork_fn(struct irq_work *iwork)
|
||||||
|
{
|
||||||
|
struct llist_node *lnode;
|
||||||
|
struct task_struct *task, *next;
|
||||||
|
|
||||||
|
lnode = llist_del_all(this_cpu_ptr(&cgrp_dead_tasks));
|
||||||
|
llist_for_each_entry_safe(task, next, lnode, cg_dead_lnode) {
|
||||||
|
do_cgroup_task_dead(task);
|
||||||
|
put_task_struct(task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init cgroup_rt_init(void)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
init_llist_head(per_cpu_ptr(&cgrp_dead_tasks, cpu));
|
||||||
|
per_cpu(cgrp_dead_tasks_iwork, cpu) =
|
||||||
|
IRQ_WORK_INIT_LAZY(cgrp_dead_tasks_iwork_fn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cgroup_task_dead(struct task_struct *task)
|
||||||
|
{
|
||||||
|
get_task_struct(task);
|
||||||
|
llist_add(&task->cg_dead_lnode, this_cpu_ptr(&cgrp_dead_tasks));
|
||||||
|
irq_work_queue(this_cpu_ptr(&cgrp_dead_tasks_iwork));
|
||||||
|
}
|
||||||
|
#else /* CONFIG_PREEMPT_RT */
|
||||||
|
static void __init cgroup_rt_init(void) {}
|
||||||
|
|
||||||
|
void cgroup_task_dead(struct task_struct *task)
|
||||||
|
{
|
||||||
|
do_cgroup_task_dead(task);
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PREEMPT_RT */
|
||||||
|
|
||||||
|
void cgroup_task_release(struct task_struct *task)
|
||||||
{
|
{
|
||||||
struct cgroup_subsys *ss;
|
struct cgroup_subsys *ss;
|
||||||
int ssid;
|
int ssid;
|
||||||
|
|
@ -7013,6 +7073,11 @@ void cgroup_release(struct task_struct *task)
|
||||||
do_each_subsys_mask(ss, ssid, have_release_callback) {
|
do_each_subsys_mask(ss, ssid, have_release_callback) {
|
||||||
ss->release(task);
|
ss->release(task);
|
||||||
} while_each_subsys_mask();
|
} while_each_subsys_mask();
|
||||||
|
}
|
||||||
|
|
||||||
|
void cgroup_task_free(struct task_struct *task)
|
||||||
|
{
|
||||||
|
struct css_set *cset = task_css_set(task);
|
||||||
|
|
||||||
if (!list_empty(&task->cg_list)) {
|
if (!list_empty(&task->cg_list)) {
|
||||||
spin_lock_irq(&css_set_lock);
|
spin_lock_irq(&css_set_lock);
|
||||||
|
|
@ -7020,11 +7085,7 @@ void cgroup_release(struct task_struct *task)
|
||||||
list_del_init(&task->cg_list);
|
list_del_init(&task->cg_list);
|
||||||
spin_unlock_irq(&css_set_lock);
|
spin_unlock_irq(&css_set_lock);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void cgroup_free(struct task_struct *task)
|
|
||||||
{
|
|
||||||
struct css_set *cset = task_css_set(task);
|
|
||||||
put_css_set(cset);
|
put_css_set(cset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -155,12 +155,16 @@ struct cpuset {
|
||||||
/* for custom sched domain */
|
/* for custom sched domain */
|
||||||
int relax_domain_level;
|
int relax_domain_level;
|
||||||
|
|
||||||
/* number of valid local child partitions */
|
|
||||||
int nr_subparts;
|
|
||||||
|
|
||||||
/* partition root state */
|
/* partition root state */
|
||||||
int partition_root_state;
|
int partition_root_state;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Whether cpuset is a remote partition.
|
||||||
|
* It used to be a list anchoring all remote partitions — we can switch back
|
||||||
|
* to a list if we need to iterate over the remote partitions.
|
||||||
|
*/
|
||||||
|
bool remote_partition;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* number of SCHED_DEADLINE tasks attached to this cpuset, so that we
|
* number of SCHED_DEADLINE tasks attached to this cpuset, so that we
|
||||||
* know when to rebuild associated root domain bandwidth information.
|
* know when to rebuild associated root domain bandwidth information.
|
||||||
|
|
@ -175,9 +179,6 @@ struct cpuset {
|
||||||
/* Handle for cpuset.cpus.partition */
|
/* Handle for cpuset.cpus.partition */
|
||||||
struct cgroup_file partition_file;
|
struct cgroup_file partition_file;
|
||||||
|
|
||||||
/* Remote partition silbling list anchored at remote_children */
|
|
||||||
struct list_head remote_sibling;
|
|
||||||
|
|
||||||
/* Used to merge intersecting subsets for generate_sched_domains */
|
/* Used to merge intersecting subsets for generate_sched_domains */
|
||||||
struct uf_node node;
|
struct uf_node node;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -81,15 +81,19 @@ static cpumask_var_t subpartitions_cpus;
|
||||||
*/
|
*/
|
||||||
static cpumask_var_t isolated_cpus;
|
static cpumask_var_t isolated_cpus;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* isolated_cpus updating flag (protected by cpuset_mutex)
|
||||||
|
* Set if isolated_cpus is going to be updated in the current
|
||||||
|
* cpuset_mutex crtical section.
|
||||||
|
*/
|
||||||
|
static bool isolated_cpus_updating;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Housekeeping (HK_TYPE_DOMAIN) CPUs at boot
|
* Housekeeping (HK_TYPE_DOMAIN) CPUs at boot
|
||||||
*/
|
*/
|
||||||
static cpumask_var_t boot_hk_cpus;
|
static cpumask_var_t boot_hk_cpus;
|
||||||
static bool have_boot_isolcpus;
|
static bool have_boot_isolcpus;
|
||||||
|
|
||||||
/* List of remote partition root children */
|
|
||||||
static struct list_head remote_children;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A flag to force sched domain rebuild at the end of an operation.
|
* A flag to force sched domain rebuild at the end of an operation.
|
||||||
* It can be set in
|
* It can be set in
|
||||||
|
|
@ -212,7 +216,7 @@ static struct cpuset top_cpuset = {
|
||||||
BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE),
|
BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE),
|
||||||
.partition_root_state = PRS_ROOT,
|
.partition_root_state = PRS_ROOT,
|
||||||
.relax_domain_level = -1,
|
.relax_domain_level = -1,
|
||||||
.remote_sibling = LIST_HEAD_INIT(top_cpuset.remote_sibling),
|
.remote_partition = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -352,33 +356,55 @@ static inline bool is_in_v2_mode(void)
|
||||||
(cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
|
(cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool cpuset_is_populated(struct cpuset *cs)
|
||||||
|
{
|
||||||
|
lockdep_assert_held(&cpuset_mutex);
|
||||||
|
|
||||||
|
/* Cpusets in the process of attaching should be considered as populated */
|
||||||
|
return cgroup_is_populated(cs->css.cgroup) ||
|
||||||
|
cs->attach_in_progress;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* partition_is_populated - check if partition has tasks
|
* partition_is_populated - check if partition has tasks
|
||||||
* @cs: partition root to be checked
|
* @cs: partition root to be checked
|
||||||
* @excluded_child: a child cpuset to be excluded in task checking
|
* @excluded_child: a child cpuset to be excluded in task checking
|
||||||
* Return: true if there are tasks, false otherwise
|
* Return: true if there are tasks, false otherwise
|
||||||
*
|
*
|
||||||
* It is assumed that @cs is a valid partition root. @excluded_child should
|
* @cs should be a valid partition root or going to become a partition root.
|
||||||
* be non-NULL when this cpuset is going to become a partition itself.
|
* @excluded_child should be non-NULL when this cpuset is going to become a
|
||||||
|
* partition itself.
|
||||||
|
*
|
||||||
|
* Note that a remote partition is not allowed underneath a valid local
|
||||||
|
* or remote partition. So if a non-partition root child is populated,
|
||||||
|
* the whole partition is considered populated.
|
||||||
*/
|
*/
|
||||||
static inline bool partition_is_populated(struct cpuset *cs,
|
static inline bool partition_is_populated(struct cpuset *cs,
|
||||||
struct cpuset *excluded_child)
|
struct cpuset *excluded_child)
|
||||||
{
|
{
|
||||||
struct cgroup_subsys_state *css;
|
struct cpuset *cp;
|
||||||
struct cpuset *child;
|
struct cgroup_subsys_state *pos_css;
|
||||||
|
|
||||||
if (cs->css.cgroup->nr_populated_csets)
|
/*
|
||||||
|
* We cannot call cs_is_populated(cs) directly, as
|
||||||
|
* nr_populated_domain_children may include populated
|
||||||
|
* csets from descendants that are partitions.
|
||||||
|
*/
|
||||||
|
if (cs->css.cgroup->nr_populated_csets ||
|
||||||
|
cs->attach_in_progress)
|
||||||
return true;
|
return true;
|
||||||
if (!excluded_child && !cs->nr_subparts)
|
|
||||||
return cgroup_is_populated(cs->css.cgroup);
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
cpuset_for_each_child(child, css, cs) {
|
cpuset_for_each_descendant_pre(cp, pos_css, cs) {
|
||||||
if (child == excluded_child)
|
if (cp == cs || cp == excluded_child)
|
||||||
continue;
|
continue;
|
||||||
if (is_partition_valid(child))
|
|
||||||
|
if (is_partition_valid(cp)) {
|
||||||
|
pos_css = css_rightmost_descendant(pos_css);
|
||||||
continue;
|
continue;
|
||||||
if (cgroup_is_populated(child->css.cgroup)) {
|
}
|
||||||
|
|
||||||
|
if (cpuset_is_populated(cp)) {
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -663,7 +689,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
|
||||||
* be changed to have empty cpus_allowed or mems_allowed.
|
* be changed to have empty cpus_allowed or mems_allowed.
|
||||||
*/
|
*/
|
||||||
ret = -ENOSPC;
|
ret = -ENOSPC;
|
||||||
if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
|
if (cpuset_is_populated(cur)) {
|
||||||
if (!cpumask_empty(cur->cpus_allowed) &&
|
if (!cpumask_empty(cur->cpus_allowed) &&
|
||||||
cpumask_empty(trial->cpus_allowed))
|
cpumask_empty(trial->cpus_allowed))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
@ -1302,7 +1328,6 @@ static void reset_partition_data(struct cpuset *cs)
|
||||||
|
|
||||||
lockdep_assert_held(&callback_lock);
|
lockdep_assert_held(&callback_lock);
|
||||||
|
|
||||||
cs->nr_subparts = 0;
|
|
||||||
if (cpumask_empty(cs->exclusive_cpus)) {
|
if (cpumask_empty(cs->exclusive_cpus)) {
|
||||||
cpumask_clear(cs->effective_xcpus);
|
cpumask_clear(cs->effective_xcpus);
|
||||||
if (is_cpu_exclusive(cs))
|
if (is_cpu_exclusive(cs))
|
||||||
|
|
@ -1325,6 +1350,8 @@ static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus
|
||||||
cpumask_or(isolated_cpus, isolated_cpus, xcpus);
|
cpumask_or(isolated_cpus, isolated_cpus, xcpus);
|
||||||
else
|
else
|
||||||
cpumask_andnot(isolated_cpus, isolated_cpus, xcpus);
|
cpumask_andnot(isolated_cpus, isolated_cpus, xcpus);
|
||||||
|
|
||||||
|
isolated_cpus_updating = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1332,15 +1359,12 @@ static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus
|
||||||
* @new_prs: new partition_root_state
|
* @new_prs: new partition_root_state
|
||||||
* @parent: parent cpuset
|
* @parent: parent cpuset
|
||||||
* @xcpus: exclusive CPUs to be added
|
* @xcpus: exclusive CPUs to be added
|
||||||
* Return: true if isolated_cpus modified, false otherwise
|
|
||||||
*
|
*
|
||||||
* Remote partition if parent == NULL
|
* Remote partition if parent == NULL
|
||||||
*/
|
*/
|
||||||
static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
|
static void partition_xcpus_add(int new_prs, struct cpuset *parent,
|
||||||
struct cpumask *xcpus)
|
struct cpumask *xcpus)
|
||||||
{
|
{
|
||||||
bool isolcpus_updated;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(new_prs < 0);
|
WARN_ON_ONCE(new_prs < 0);
|
||||||
lockdep_assert_held(&callback_lock);
|
lockdep_assert_held(&callback_lock);
|
||||||
if (!parent)
|
if (!parent)
|
||||||
|
|
@ -1350,13 +1374,11 @@ static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
|
||||||
if (parent == &top_cpuset)
|
if (parent == &top_cpuset)
|
||||||
cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus);
|
cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus);
|
||||||
|
|
||||||
isolcpus_updated = (new_prs != parent->partition_root_state);
|
if (new_prs != parent->partition_root_state)
|
||||||
if (isolcpus_updated)
|
|
||||||
isolated_cpus_update(parent->partition_root_state, new_prs,
|
isolated_cpus_update(parent->partition_root_state, new_prs,
|
||||||
xcpus);
|
xcpus);
|
||||||
|
|
||||||
cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
|
cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
|
||||||
return isolcpus_updated;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1364,15 +1386,12 @@ static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
|
||||||
* @old_prs: old partition_root_state
|
* @old_prs: old partition_root_state
|
||||||
* @parent: parent cpuset
|
* @parent: parent cpuset
|
||||||
* @xcpus: exclusive CPUs to be removed
|
* @xcpus: exclusive CPUs to be removed
|
||||||
* Return: true if isolated_cpus modified, false otherwise
|
|
||||||
*
|
*
|
||||||
* Remote partition if parent == NULL
|
* Remote partition if parent == NULL
|
||||||
*/
|
*/
|
||||||
static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
|
static void partition_xcpus_del(int old_prs, struct cpuset *parent,
|
||||||
struct cpumask *xcpus)
|
struct cpumask *xcpus)
|
||||||
{
|
{
|
||||||
bool isolcpus_updated;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(old_prs < 0);
|
WARN_ON_ONCE(old_prs < 0);
|
||||||
lockdep_assert_held(&callback_lock);
|
lockdep_assert_held(&callback_lock);
|
||||||
if (!parent)
|
if (!parent)
|
||||||
|
|
@ -1381,30 +1400,95 @@ static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
|
||||||
if (parent == &top_cpuset)
|
if (parent == &top_cpuset)
|
||||||
cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus);
|
cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus);
|
||||||
|
|
||||||
isolcpus_updated = (old_prs != parent->partition_root_state);
|
if (old_prs != parent->partition_root_state)
|
||||||
if (isolcpus_updated)
|
|
||||||
isolated_cpus_update(old_prs, parent->partition_root_state,
|
isolated_cpus_update(old_prs, parent->partition_root_state,
|
||||||
xcpus);
|
xcpus);
|
||||||
|
|
||||||
cpumask_and(xcpus, xcpus, cpu_active_mask);
|
cpumask_and(xcpus, xcpus, cpu_active_mask);
|
||||||
cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
|
cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
|
||||||
return isolcpus_updated;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_isolation_cpumasks(bool isolcpus_updated)
|
/*
|
||||||
|
* isolated_cpus_can_update - check for isolated & nohz_full conflicts
|
||||||
|
* @add_cpus: cpu mask for cpus that are going to be isolated
|
||||||
|
* @del_cpus: cpu mask for cpus that are no longer isolated, can be NULL
|
||||||
|
* Return: false if there is conflict, true otherwise
|
||||||
|
*
|
||||||
|
* If nohz_full is enabled and we have isolated CPUs, their combination must
|
||||||
|
* still leave housekeeping CPUs.
|
||||||
|
*
|
||||||
|
* TBD: Should consider merging this function into
|
||||||
|
* prstate_housekeeping_conflict().
|
||||||
|
*/
|
||||||
|
static bool isolated_cpus_can_update(struct cpumask *add_cpus,
|
||||||
|
struct cpumask *del_cpus)
|
||||||
|
{
|
||||||
|
cpumask_var_t full_hk_cpus;
|
||||||
|
int res = true;
|
||||||
|
|
||||||
|
if (!housekeeping_enabled(HK_TYPE_KERNEL_NOISE))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (del_cpus && cpumask_weight_and(del_cpus,
|
||||||
|
housekeeping_cpumask(HK_TYPE_KERNEL_NOISE)))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!alloc_cpumask_var(&full_hk_cpus, GFP_KERNEL))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
cpumask_and(full_hk_cpus, housekeeping_cpumask(HK_TYPE_KERNEL_NOISE),
|
||||||
|
housekeeping_cpumask(HK_TYPE_DOMAIN));
|
||||||
|
cpumask_andnot(full_hk_cpus, full_hk_cpus, isolated_cpus);
|
||||||
|
cpumask_and(full_hk_cpus, full_hk_cpus, cpu_active_mask);
|
||||||
|
if (!cpumask_weight_andnot(full_hk_cpus, add_cpus))
|
||||||
|
res = false;
|
||||||
|
|
||||||
|
free_cpumask_var(full_hk_cpus);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* prstate_housekeeping_conflict - check for partition & housekeeping conflicts
|
||||||
|
* @prstate: partition root state to be checked
|
||||||
|
* @new_cpus: cpu mask
|
||||||
|
* Return: true if there is conflict, false otherwise
|
||||||
|
*
|
||||||
|
* CPUs outside of boot_hk_cpus, if defined, can only be used in an
|
||||||
|
* isolated partition.
|
||||||
|
*/
|
||||||
|
static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
|
||||||
|
{
|
||||||
|
if (!have_boot_isolcpus)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if ((prstate != PRS_ISOLATED) && !cpumask_subset(new_cpus, boot_hk_cpus))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* update_isolation_cpumasks - Update external isolation related CPU masks
|
||||||
|
*
|
||||||
|
* The following external CPU masks will be updated if necessary:
|
||||||
|
* - workqueue unbound cpumask
|
||||||
|
*/
|
||||||
|
static void update_isolation_cpumasks(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
lockdep_assert_cpus_held();
|
if (!isolated_cpus_updating)
|
||||||
|
|
||||||
if (!isolcpus_updated)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
lockdep_assert_cpus_held();
|
||||||
|
|
||||||
ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
|
ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
|
||||||
WARN_ON_ONCE(ret < 0);
|
WARN_ON_ONCE(ret < 0);
|
||||||
|
|
||||||
ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
|
ret = tmigr_isolated_exclude_cpumask(isolated_cpus);
|
||||||
WARN_ON_ONCE(ret < 0);
|
WARN_ON_ONCE(ret < 0);
|
||||||
|
|
||||||
|
isolated_cpus_updating = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -1508,7 +1592,7 @@ static int compute_trialcs_excpus(struct cpuset *trialcs, struct cpuset *cs)
|
||||||
|
|
||||||
static inline bool is_remote_partition(struct cpuset *cs)
|
static inline bool is_remote_partition(struct cpuset *cs)
|
||||||
{
|
{
|
||||||
return !list_empty(&cs->remote_sibling);
|
return cs->remote_partition;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool is_local_partition(struct cpuset *cs)
|
static inline bool is_local_partition(struct cpuset *cs)
|
||||||
|
|
@ -1529,8 +1613,6 @@ static inline bool is_local_partition(struct cpuset *cs)
|
||||||
static int remote_partition_enable(struct cpuset *cs, int new_prs,
|
static int remote_partition_enable(struct cpuset *cs, int new_prs,
|
||||||
struct tmpmasks *tmp)
|
struct tmpmasks *tmp)
|
||||||
{
|
{
|
||||||
bool isolcpus_updated;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The user must have sysadmin privilege.
|
* The user must have sysadmin privilege.
|
||||||
*/
|
*/
|
||||||
|
|
@ -1552,13 +1634,17 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
|
||||||
if (!cpumask_intersects(tmp->new_cpus, cpu_active_mask) ||
|
if (!cpumask_intersects(tmp->new_cpus, cpu_active_mask) ||
|
||||||
cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus))
|
cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus))
|
||||||
return PERR_INVCPUS;
|
return PERR_INVCPUS;
|
||||||
|
if (((new_prs == PRS_ISOLATED) &&
|
||||||
|
!isolated_cpus_can_update(tmp->new_cpus, NULL)) ||
|
||||||
|
prstate_housekeeping_conflict(new_prs, tmp->new_cpus))
|
||||||
|
return PERR_HKEEPING;
|
||||||
|
|
||||||
spin_lock_irq(&callback_lock);
|
spin_lock_irq(&callback_lock);
|
||||||
isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
|
partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
|
||||||
list_add(&cs->remote_sibling, &remote_children);
|
cs->remote_partition = true;
|
||||||
cpumask_copy(cs->effective_xcpus, tmp->new_cpus);
|
cpumask_copy(cs->effective_xcpus, tmp->new_cpus);
|
||||||
spin_unlock_irq(&callback_lock);
|
spin_unlock_irq(&callback_lock);
|
||||||
update_isolation_cpumasks(isolcpus_updated);
|
update_isolation_cpumasks();
|
||||||
cpuset_force_rebuild();
|
cpuset_force_rebuild();
|
||||||
cs->prs_err = 0;
|
cs->prs_err = 0;
|
||||||
|
|
||||||
|
|
@ -1581,15 +1667,12 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs,
|
||||||
*/
|
*/
|
||||||
static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
|
static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
|
||||||
{
|
{
|
||||||
bool isolcpus_updated;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(!is_remote_partition(cs));
|
WARN_ON_ONCE(!is_remote_partition(cs));
|
||||||
WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus));
|
WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus));
|
||||||
|
|
||||||
spin_lock_irq(&callback_lock);
|
spin_lock_irq(&callback_lock);
|
||||||
list_del_init(&cs->remote_sibling);
|
cs->remote_partition = false;
|
||||||
isolcpus_updated = partition_xcpus_del(cs->partition_root_state,
|
partition_xcpus_del(cs->partition_root_state, NULL, cs->effective_xcpus);
|
||||||
NULL, cs->effective_xcpus);
|
|
||||||
if (cs->prs_err)
|
if (cs->prs_err)
|
||||||
cs->partition_root_state = -cs->partition_root_state;
|
cs->partition_root_state = -cs->partition_root_state;
|
||||||
else
|
else
|
||||||
|
|
@ -1599,7 +1682,7 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
|
||||||
compute_excpus(cs, cs->effective_xcpus);
|
compute_excpus(cs, cs->effective_xcpus);
|
||||||
reset_partition_data(cs);
|
reset_partition_data(cs);
|
||||||
spin_unlock_irq(&callback_lock);
|
spin_unlock_irq(&callback_lock);
|
||||||
update_isolation_cpumasks(isolcpus_updated);
|
update_isolation_cpumasks();
|
||||||
cpuset_force_rebuild();
|
cpuset_force_rebuild();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1624,7 +1707,6 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
|
||||||
{
|
{
|
||||||
bool adding, deleting;
|
bool adding, deleting;
|
||||||
int prs = cs->partition_root_state;
|
int prs = cs->partition_root_state;
|
||||||
int isolcpus_updated = 0;
|
|
||||||
|
|
||||||
if (WARN_ON_ONCE(!is_remote_partition(cs)))
|
if (WARN_ON_ONCE(!is_remote_partition(cs)))
|
||||||
return;
|
return;
|
||||||
|
|
@ -1651,15 +1733,18 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
|
||||||
else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
|
else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) ||
|
||||||
cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))
|
cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))
|
||||||
cs->prs_err = PERR_NOCPUS;
|
cs->prs_err = PERR_NOCPUS;
|
||||||
|
else if ((prs == PRS_ISOLATED) &&
|
||||||
|
!isolated_cpus_can_update(tmp->addmask, tmp->delmask))
|
||||||
|
cs->prs_err = PERR_HKEEPING;
|
||||||
if (cs->prs_err)
|
if (cs->prs_err)
|
||||||
goto invalidate;
|
goto invalidate;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irq(&callback_lock);
|
spin_lock_irq(&callback_lock);
|
||||||
if (adding)
|
if (adding)
|
||||||
isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask);
|
partition_xcpus_add(prs, NULL, tmp->addmask);
|
||||||
if (deleting)
|
if (deleting)
|
||||||
isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask);
|
partition_xcpus_del(prs, NULL, tmp->delmask);
|
||||||
/*
|
/*
|
||||||
* Need to update effective_xcpus and exclusive_cpus now as
|
* Need to update effective_xcpus and exclusive_cpus now as
|
||||||
* update_sibling_cpumasks() below may iterate back to the same cs.
|
* update_sibling_cpumasks() below may iterate back to the same cs.
|
||||||
|
|
@ -1668,7 +1753,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
|
||||||
if (xcpus)
|
if (xcpus)
|
||||||
cpumask_copy(cs->exclusive_cpus, xcpus);
|
cpumask_copy(cs->exclusive_cpus, xcpus);
|
||||||
spin_unlock_irq(&callback_lock);
|
spin_unlock_irq(&callback_lock);
|
||||||
update_isolation_cpumasks(isolcpus_updated);
|
update_isolation_cpumasks();
|
||||||
if (adding || deleting)
|
if (adding || deleting)
|
||||||
cpuset_force_rebuild();
|
cpuset_force_rebuild();
|
||||||
|
|
||||||
|
|
@ -1683,26 +1768,6 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus,
|
||||||
remote_partition_disable(cs, tmp);
|
remote_partition_disable(cs, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* prstate_housekeeping_conflict - check for partition & housekeeping conflicts
|
|
||||||
* @prstate: partition root state to be checked
|
|
||||||
* @new_cpus: cpu mask
|
|
||||||
* Return: true if there is conflict, false otherwise
|
|
||||||
*
|
|
||||||
* CPUs outside of boot_hk_cpus, if defined, can only be used in an
|
|
||||||
* isolated partition.
|
|
||||||
*/
|
|
||||||
static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
|
|
||||||
{
|
|
||||||
if (!have_boot_isolcpus)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if ((prstate != PRS_ISOLATED) && !cpumask_subset(new_cpus, boot_hk_cpus))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
|
* update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
|
||||||
* @cs: The cpuset that requests change in partition root state
|
* @cs: The cpuset that requests change in partition root state
|
||||||
|
|
@ -1749,9 +1814,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
int deleting; /* Deleting cpus from parent's effective_cpus */
|
int deleting; /* Deleting cpus from parent's effective_cpus */
|
||||||
int old_prs, new_prs;
|
int old_prs, new_prs;
|
||||||
int part_error = PERR_NONE; /* Partition error? */
|
int part_error = PERR_NONE; /* Partition error? */
|
||||||
int subparts_delta = 0;
|
|
||||||
int isolcpus_updated = 0;
|
|
||||||
struct cpumask *xcpus = user_xcpus(cs);
|
struct cpumask *xcpus = user_xcpus(cs);
|
||||||
|
int parent_prs = parent->partition_root_state;
|
||||||
bool nocpu;
|
bool nocpu;
|
||||||
|
|
||||||
lockdep_assert_held(&cpuset_mutex);
|
lockdep_assert_held(&cpuset_mutex);
|
||||||
|
|
@ -1774,10 +1838,9 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
if (is_partition_valid(parent))
|
if (is_partition_valid(parent))
|
||||||
adding = cpumask_and(tmp->addmask,
|
adding = cpumask_and(tmp->addmask,
|
||||||
xcpus, parent->effective_xcpus);
|
xcpus, parent->effective_xcpus);
|
||||||
if (old_prs > 0) {
|
if (old_prs > 0)
|
||||||
new_prs = -old_prs;
|
new_prs = -old_prs;
|
||||||
subparts_delta--;
|
|
||||||
}
|
|
||||||
goto write_error;
|
goto write_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1816,6 +1879,10 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
if (prstate_housekeeping_conflict(new_prs, xcpus))
|
if (prstate_housekeeping_conflict(new_prs, xcpus))
|
||||||
return PERR_HKEEPING;
|
return PERR_HKEEPING;
|
||||||
|
|
||||||
|
if ((new_prs == PRS_ISOLATED) && (new_prs != parent_prs) &&
|
||||||
|
!isolated_cpus_can_update(xcpus, NULL))
|
||||||
|
return PERR_HKEEPING;
|
||||||
|
|
||||||
if (tasks_nocpu_error(parent, cs, xcpus))
|
if (tasks_nocpu_error(parent, cs, xcpus))
|
||||||
return PERR_NOCPUS;
|
return PERR_NOCPUS;
|
||||||
|
|
||||||
|
|
@ -1832,7 +1899,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, parent->effective_cpus));
|
WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, parent->effective_cpus));
|
||||||
|
|
||||||
deleting = true;
|
deleting = true;
|
||||||
subparts_delta++;
|
|
||||||
} else if (cmd == partcmd_disable) {
|
} else if (cmd == partcmd_disable) {
|
||||||
/*
|
/*
|
||||||
* May need to add cpus back to parent's effective_cpus
|
* May need to add cpus back to parent's effective_cpus
|
||||||
|
|
@ -1843,7 +1909,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
if (is_partition_valid(cs)) {
|
if (is_partition_valid(cs)) {
|
||||||
cpumask_copy(tmp->addmask, cs->effective_xcpus);
|
cpumask_copy(tmp->addmask, cs->effective_xcpus);
|
||||||
adding = true;
|
adding = true;
|
||||||
subparts_delta--;
|
|
||||||
}
|
}
|
||||||
new_prs = PRS_MEMBER;
|
new_prs = PRS_MEMBER;
|
||||||
} else if (newmask) {
|
} else if (newmask) {
|
||||||
|
|
@ -1871,6 +1936,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
*
|
*
|
||||||
* For invalid partition:
|
* For invalid partition:
|
||||||
* delmask = newmask & parent->effective_xcpus
|
* delmask = newmask & parent->effective_xcpus
|
||||||
|
* The partition may become valid soon.
|
||||||
*/
|
*/
|
||||||
if (is_partition_invalid(cs)) {
|
if (is_partition_invalid(cs)) {
|
||||||
adding = false;
|
adding = false;
|
||||||
|
|
@ -1885,6 +1951,23 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
deleting = cpumask_and(tmp->delmask, tmp->delmask,
|
deleting = cpumask_and(tmp->delmask, tmp->delmask,
|
||||||
parent->effective_xcpus);
|
parent->effective_xcpus);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TBD: Invalidate a currently valid child root partition may
|
||||||
|
* still break isolated_cpus_can_update() rule if parent is an
|
||||||
|
* isolated partition.
|
||||||
|
*/
|
||||||
|
if (is_partition_valid(cs) && (old_prs != parent_prs)) {
|
||||||
|
if ((parent_prs == PRS_ROOT) &&
|
||||||
|
/* Adding to parent means removing isolated CPUs */
|
||||||
|
!isolated_cpus_can_update(tmp->delmask, tmp->addmask))
|
||||||
|
part_error = PERR_HKEEPING;
|
||||||
|
if ((parent_prs == PRS_ISOLATED) &&
|
||||||
|
/* Adding to parent means adding isolated CPUs */
|
||||||
|
!isolated_cpus_can_update(tmp->addmask, tmp->delmask))
|
||||||
|
part_error = PERR_HKEEPING;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The new CPUs to be removed from parent's effective CPUs
|
* The new CPUs to be removed from parent's effective CPUs
|
||||||
* must be present.
|
* must be present.
|
||||||
|
|
@ -1966,17 +2049,13 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
switch (cs->partition_root_state) {
|
switch (cs->partition_root_state) {
|
||||||
case PRS_ROOT:
|
case PRS_ROOT:
|
||||||
case PRS_ISOLATED:
|
case PRS_ISOLATED:
|
||||||
if (part_error) {
|
if (part_error)
|
||||||
new_prs = -old_prs;
|
new_prs = -old_prs;
|
||||||
subparts_delta--;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case PRS_INVALID_ROOT:
|
case PRS_INVALID_ROOT:
|
||||||
case PRS_INVALID_ISOLATED:
|
case PRS_INVALID_ISOLATED:
|
||||||
if (!part_error) {
|
if (!part_error)
|
||||||
new_prs = -old_prs;
|
new_prs = -old_prs;
|
||||||
subparts_delta++;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2005,28 +2084,20 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
|
||||||
* newly deleted ones will be added back to effective_cpus.
|
* newly deleted ones will be added back to effective_cpus.
|
||||||
*/
|
*/
|
||||||
spin_lock_irq(&callback_lock);
|
spin_lock_irq(&callback_lock);
|
||||||
if (old_prs != new_prs) {
|
if (old_prs != new_prs)
|
||||||
cs->partition_root_state = new_prs;
|
cs->partition_root_state = new_prs;
|
||||||
if (new_prs <= 0)
|
|
||||||
cs->nr_subparts = 0;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Adding to parent's effective_cpus means deletion CPUs from cs
|
* Adding to parent's effective_cpus means deletion CPUs from cs
|
||||||
* and vice versa.
|
* and vice versa.
|
||||||
*/
|
*/
|
||||||
if (adding)
|
if (adding)
|
||||||
isolcpus_updated += partition_xcpus_del(old_prs, parent,
|
partition_xcpus_del(old_prs, parent, tmp->addmask);
|
||||||
tmp->addmask);
|
|
||||||
if (deleting)
|
if (deleting)
|
||||||
isolcpus_updated += partition_xcpus_add(new_prs, parent,
|
partition_xcpus_add(new_prs, parent, tmp->delmask);
|
||||||
tmp->delmask);
|
|
||||||
|
|
||||||
if (is_partition_valid(parent)) {
|
|
||||||
parent->nr_subparts += subparts_delta;
|
|
||||||
WARN_ON_ONCE(parent->nr_subparts < 0);
|
|
||||||
}
|
|
||||||
spin_unlock_irq(&callback_lock);
|
spin_unlock_irq(&callback_lock);
|
||||||
update_isolation_cpumasks(isolcpus_updated);
|
update_isolation_cpumasks();
|
||||||
|
|
||||||
if ((old_prs != new_prs) && (cmd == partcmd_update))
|
if ((old_prs != new_prs) && (cmd == partcmd_update))
|
||||||
update_partition_exclusive_flag(cs, new_prs);
|
update_partition_exclusive_flag(cs, new_prs);
|
||||||
|
|
@ -2108,8 +2179,6 @@ static void compute_partition_effective_cpumask(struct cpuset *cs,
|
||||||
*/
|
*/
|
||||||
spin_lock_irq(&callback_lock);
|
spin_lock_irq(&callback_lock);
|
||||||
make_partition_invalid(child);
|
make_partition_invalid(child);
|
||||||
cs->nr_subparts--;
|
|
||||||
child->nr_subparts = 0;
|
|
||||||
spin_unlock_irq(&callback_lock);
|
spin_unlock_irq(&callback_lock);
|
||||||
notify_partition_change(child, old_prs);
|
notify_partition_change(child, old_prs);
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -2138,7 +2207,6 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
|
||||||
{
|
{
|
||||||
struct cpuset *cp;
|
struct cpuset *cp;
|
||||||
struct cgroup_subsys_state *pos_css;
|
struct cgroup_subsys_state *pos_css;
|
||||||
bool need_rebuild_sched_domains = false;
|
|
||||||
int old_prs, new_prs;
|
int old_prs, new_prs;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
@ -2302,15 +2370,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
|
||||||
if (!cpumask_empty(cp->cpus_allowed) &&
|
if (!cpumask_empty(cp->cpus_allowed) &&
|
||||||
is_sched_load_balance(cp) &&
|
is_sched_load_balance(cp) &&
|
||||||
(!cpuset_v2() || is_partition_valid(cp)))
|
(!cpuset_v2() || is_partition_valid(cp)))
|
||||||
need_rebuild_sched_domains = true;
|
cpuset_force_rebuild();
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
css_put(&cp->css);
|
css_put(&cp->css);
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
if (need_rebuild_sched_domains)
|
|
||||||
cpuset_force_rebuild();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -2848,21 +2913,19 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
|
||||||
*/
|
*/
|
||||||
retval = nodelist_parse(buf, trialcs->mems_allowed);
|
retval = nodelist_parse(buf, trialcs->mems_allowed);
|
||||||
if (retval < 0)
|
if (retval < 0)
|
||||||
goto done;
|
return retval;
|
||||||
|
|
||||||
if (!nodes_subset(trialcs->mems_allowed,
|
if (!nodes_subset(trialcs->mems_allowed,
|
||||||
top_cpuset.mems_allowed)) {
|
top_cpuset.mems_allowed))
|
||||||
retval = -EINVAL;
|
return -EINVAL;
|
||||||
goto done;
|
|
||||||
}
|
/* No change? nothing to do */
|
||||||
|
if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed))
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
|
|
||||||
retval = 0; /* Too easy - nothing to do */
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
retval = validate_change(cs, trialcs);
|
retval = validate_change(cs, trialcs);
|
||||||
if (retval < 0)
|
if (retval < 0)
|
||||||
goto done;
|
return retval;
|
||||||
|
|
||||||
check_insane_mems_config(&trialcs->mems_allowed);
|
check_insane_mems_config(&trialcs->mems_allowed);
|
||||||
|
|
||||||
|
|
@ -2872,8 +2935,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
|
||||||
|
|
||||||
/* use trialcs->mems_allowed as a temp variable */
|
/* use trialcs->mems_allowed as a temp variable */
|
||||||
update_nodemasks_hier(cs, &trialcs->mems_allowed);
|
update_nodemasks_hier(cs, &trialcs->mems_allowed);
|
||||||
done:
|
return 0;
|
||||||
return retval;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool current_cpuset_is_being_rebound(void)
|
bool current_cpuset_is_being_rebound(void)
|
||||||
|
|
@ -3011,6 +3073,11 @@ static int update_prstate(struct cpuset *cs, int new_prs)
|
||||||
* A change in load balance state only, no change in cpumasks.
|
* A change in load balance state only, no change in cpumasks.
|
||||||
* Need to update isolated_cpus.
|
* Need to update isolated_cpus.
|
||||||
*/
|
*/
|
||||||
|
if (((new_prs == PRS_ISOLATED) &&
|
||||||
|
!isolated_cpus_can_update(cs->effective_xcpus, NULL)) ||
|
||||||
|
prstate_housekeeping_conflict(new_prs, cs->effective_xcpus))
|
||||||
|
err = PERR_HKEEPING;
|
||||||
|
else
|
||||||
isolcpus_updated = true;
|
isolcpus_updated = true;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
|
|
@ -3046,7 +3113,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
|
||||||
else if (isolcpus_updated)
|
else if (isolcpus_updated)
|
||||||
isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus);
|
isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus);
|
||||||
spin_unlock_irq(&callback_lock);
|
spin_unlock_irq(&callback_lock);
|
||||||
update_isolation_cpumasks(isolcpus_updated);
|
update_isolation_cpumasks();
|
||||||
|
|
||||||
/* Force update if switching back to member & update effective_xcpus */
|
/* Force update if switching back to member & update effective_xcpus */
|
||||||
update_cpumasks_hier(cs, &tmpmask, !new_prs);
|
update_cpumasks_hier(cs, &tmpmask, !new_prs);
|
||||||
|
|
@ -3552,7 +3619,6 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||||
__set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
__set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
|
||||||
fmeter_init(&cs->fmeter);
|
fmeter_init(&cs->fmeter);
|
||||||
cs->relax_domain_level = -1;
|
cs->relax_domain_level = -1;
|
||||||
INIT_LIST_HEAD(&cs->remote_sibling);
|
|
||||||
|
|
||||||
/* Set CS_MEMORY_MIGRATE for default hierarchy */
|
/* Set CS_MEMORY_MIGRATE for default hierarchy */
|
||||||
if (cpuset_v2())
|
if (cpuset_v2())
|
||||||
|
|
@ -3823,7 +3889,6 @@ int __init cpuset_init(void)
|
||||||
nodes_setall(top_cpuset.effective_mems);
|
nodes_setall(top_cpuset.effective_mems);
|
||||||
|
|
||||||
fmeter_init(&top_cpuset.fmeter);
|
fmeter_init(&top_cpuset.fmeter);
|
||||||
INIT_LIST_HEAD(&remote_children);
|
|
||||||
|
|
||||||
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
|
BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
|
||||||
|
|
||||||
|
|
@ -4024,7 +4089,6 @@ static void cpuset_handle_hotplug(void)
|
||||||
*/
|
*/
|
||||||
if (!cpumask_empty(subpartitions_cpus)) {
|
if (!cpumask_empty(subpartitions_cpus)) {
|
||||||
if (cpumask_subset(&new_cpus, subpartitions_cpus)) {
|
if (cpumask_subset(&new_cpus, subpartitions_cpus)) {
|
||||||
top_cpuset.nr_subparts = 0;
|
|
||||||
cpumask_clear(subpartitions_cpus);
|
cpumask_clear(subpartitions_cpus);
|
||||||
} else {
|
} else {
|
||||||
cpumask_andnot(&new_cpus, &new_cpus,
|
cpumask_andnot(&new_cpus, &new_cpus,
|
||||||
|
|
@ -4119,24 +4183,13 @@ void __init cpuset_init_smp(void)
|
||||||
BUG_ON(!cpuset_migrate_mm_wq);
|
BUG_ON(!cpuset_migrate_mm_wq);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
|
* Return cpus_allowed mask from a task's cpuset.
|
||||||
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
|
*/
|
||||||
* @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
|
static void __cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
|
||||||
*
|
|
||||||
* Description: Returns the cpumask_var_t cpus_allowed of the cpuset
|
|
||||||
* attached to the specified @tsk. Guaranteed to return some non-empty
|
|
||||||
* subset of cpu_active_mask, even if this means going outside the
|
|
||||||
* tasks cpuset, except when the task is in the top cpuset.
|
|
||||||
**/
|
|
||||||
|
|
||||||
void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
|
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
|
||||||
struct cpuset *cs;
|
struct cpuset *cs;
|
||||||
|
|
||||||
spin_lock_irqsave(&callback_lock, flags);
|
|
||||||
|
|
||||||
cs = task_cs(tsk);
|
cs = task_cs(tsk);
|
||||||
if (cs != &top_cpuset)
|
if (cs != &top_cpuset)
|
||||||
guarantee_active_cpus(tsk, pmask);
|
guarantee_active_cpus(tsk, pmask);
|
||||||
|
|
@ -4156,7 +4209,39 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
|
||||||
if (!cpumask_intersects(pmask, cpu_active_mask))
|
if (!cpumask_intersects(pmask, cpu_active_mask))
|
||||||
cpumask_copy(pmask, possible_mask);
|
cpumask_copy(pmask, possible_mask);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpuset_cpus_allowed_locked - return cpus_allowed mask from a task's cpuset.
|
||||||
|
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
|
||||||
|
* @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
|
||||||
|
*
|
||||||
|
* Similir to cpuset_cpus_allowed() except that the caller must have acquired
|
||||||
|
* cpuset_mutex.
|
||||||
|
*/
|
||||||
|
void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
|
||||||
|
{
|
||||||
|
lockdep_assert_held(&cpuset_mutex);
|
||||||
|
__cpuset_cpus_allowed_locked(tsk, pmask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cpuset_cpus_allowed - return cpus_allowed mask from a task's cpuset.
|
||||||
|
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
|
||||||
|
* @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
|
||||||
|
*
|
||||||
|
* Description: Returns the cpumask_var_t cpus_allowed of the cpuset
|
||||||
|
* attached to the specified @tsk. Guaranteed to return some non-empty
|
||||||
|
* subset of cpu_active_mask, even if this means going outside the
|
||||||
|
* tasks cpuset, except when the task is in the top cpuset.
|
||||||
|
**/
|
||||||
|
|
||||||
|
void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&callback_lock, flags);
|
||||||
|
__cpuset_cpus_allowed_locked(tsk, pmask);
|
||||||
spin_unlock_irqrestore(&callback_lock, flags);
|
spin_unlock_irqrestore(&callback_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -257,7 +257,7 @@ void release_task(struct task_struct *p)
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
pidfs_exit(p);
|
pidfs_exit(p);
|
||||||
cgroup_release(p);
|
cgroup_task_release(p);
|
||||||
|
|
||||||
/* Retrieve @thread_pid before __unhash_process() may set it to NULL. */
|
/* Retrieve @thread_pid before __unhash_process() may set it to NULL. */
|
||||||
thread_pid = task_pid(p);
|
thread_pid = task_pid(p);
|
||||||
|
|
@ -974,7 +974,7 @@ void __noreturn do_exit(long code)
|
||||||
exit_thread(tsk);
|
exit_thread(tsk);
|
||||||
|
|
||||||
sched_autogroup_exit_task(tsk);
|
sched_autogroup_exit_task(tsk);
|
||||||
cgroup_exit(tsk);
|
cgroup_task_exit(tsk);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FIXME: do that only when needed, using sched_exit tracepoint
|
* FIXME: do that only when needed, using sched_exit tracepoint
|
||||||
|
|
|
||||||
|
|
@ -738,7 +738,7 @@ void __put_task_struct(struct task_struct *tsk)
|
||||||
unwind_task_free(tsk);
|
unwind_task_free(tsk);
|
||||||
sched_ext_free(tsk);
|
sched_ext_free(tsk);
|
||||||
io_uring_free(tsk);
|
io_uring_free(tsk);
|
||||||
cgroup_free(tsk);
|
cgroup_task_free(tsk);
|
||||||
task_numa_free(tsk, true);
|
task_numa_free(tsk, true);
|
||||||
security_task_free(tsk);
|
security_task_free(tsk);
|
||||||
exit_creds(tsk);
|
exit_creds(tsk);
|
||||||
|
|
|
||||||
|
|
@ -178,8 +178,8 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
|
||||||
* this process can already run with task_group() == prev->tg or we can
|
* this process can already run with task_group() == prev->tg or we can
|
||||||
* race with cgroup code which can read autogroup = prev under rq->lock.
|
* race with cgroup code which can read autogroup = prev under rq->lock.
|
||||||
* In the latter case for_each_thread() can not miss a migrating thread,
|
* In the latter case for_each_thread() can not miss a migrating thread,
|
||||||
* cpu_cgroup_attach() must not be possible after cgroup_exit() and it
|
* cpu_cgroup_attach() must not be possible after cgroup_task_exit()
|
||||||
* can't be removed from thread list, we hold ->siglock.
|
* and it can't be removed from thread list, we hold ->siglock.
|
||||||
*
|
*
|
||||||
* If an exiting thread was already removed from thread list we rely on
|
* If an exiting thread was already removed from thread list we rely on
|
||||||
* sched_autogroup_exit_task().
|
* sched_autogroup_exit_task().
|
||||||
|
|
|
||||||
|
|
@ -5143,6 +5143,8 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
||||||
if (prev->sched_class->task_dead)
|
if (prev->sched_class->task_dead)
|
||||||
prev->sched_class->task_dead(prev);
|
prev->sched_class->task_dead(prev);
|
||||||
|
|
||||||
|
cgroup_task_dead(prev);
|
||||||
|
|
||||||
/* Task is done with its stack. */
|
/* Task is done with its stack. */
|
||||||
put_task_stack(prev);
|
put_task_stack(prev);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2675,6 +2675,7 @@ static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Access rule: must be called on local CPU with preemption disabled */
|
||||||
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
|
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
|
||||||
|
|
||||||
static int find_later_rq(struct task_struct *task)
|
static int find_later_rq(struct task_struct *task)
|
||||||
|
|
@ -3117,11 +3118,43 @@ void __init init_sched_dl_class(void)
|
||||||
GFP_KERNEL, cpu_to_node(i));
|
GFP_KERNEL, cpu_to_node(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function always returns a non-empty bitmap in @cpus. This is because
|
||||||
|
* if a root domain has reserved bandwidth for DL tasks, the DL bandwidth
|
||||||
|
* check will prevent CPU hotplug from deactivating all CPUs in that domain.
|
||||||
|
*/
|
||||||
|
static void dl_get_task_effective_cpus(struct task_struct *p, struct cpumask *cpus)
|
||||||
|
{
|
||||||
|
const struct cpumask *hk_msk;
|
||||||
|
|
||||||
|
hk_msk = housekeeping_cpumask(HK_TYPE_DOMAIN);
|
||||||
|
if (housekeeping_enabled(HK_TYPE_DOMAIN)) {
|
||||||
|
if (!cpumask_intersects(p->cpus_ptr, hk_msk)) {
|
||||||
|
/*
|
||||||
|
* CPUs isolated by isolcpu="domain" always belong to
|
||||||
|
* def_root_domain.
|
||||||
|
*/
|
||||||
|
cpumask_andnot(cpus, cpu_active_mask, hk_msk);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If a root domain holds a DL task, it must have active CPUs. So
|
||||||
|
* active CPUs can always be found by walking up the task's cpuset
|
||||||
|
* hierarchy up to the partition root.
|
||||||
|
*/
|
||||||
|
cpuset_cpus_allowed_locked(p, cpus);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The caller should hold cpuset_mutex */
|
||||||
void dl_add_task_root_domain(struct task_struct *p)
|
void dl_add_task_root_domain(struct task_struct *p)
|
||||||
{
|
{
|
||||||
struct rq_flags rf;
|
struct rq_flags rf;
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
struct dl_bw *dl_b;
|
struct dl_bw *dl_b;
|
||||||
|
unsigned int cpu;
|
||||||
|
struct cpumask *msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
|
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
|
||||||
if (!dl_task(p) || dl_entity_is_special(&p->dl)) {
|
if (!dl_task(p) || dl_entity_is_special(&p->dl)) {
|
||||||
|
|
@ -3129,16 +3162,25 @@ void dl_add_task_root_domain(struct task_struct *p)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
rq = __task_rq_lock(p, &rf);
|
/*
|
||||||
|
* Get an active rq, whose rq->rd traces the correct root
|
||||||
|
* domain.
|
||||||
|
* Ideally this would be under cpuset reader lock until rq->rd is
|
||||||
|
* fetched. However, sleepable locks cannot nest inside pi_lock, so we
|
||||||
|
* rely on the caller of dl_add_task_root_domain() holds 'cpuset_mutex'
|
||||||
|
* to guarantee the CPU stays in the cpuset.
|
||||||
|
*/
|
||||||
|
dl_get_task_effective_cpus(p, msk);
|
||||||
|
cpu = cpumask_first_and(cpu_active_mask, msk);
|
||||||
|
BUG_ON(cpu >= nr_cpu_ids);
|
||||||
|
rq = cpu_rq(cpu);
|
||||||
dl_b = &rq->rd->dl_bw;
|
dl_b = &rq->rd->dl_bw;
|
||||||
|
/* End of fetching rd */
|
||||||
|
|
||||||
raw_spin_lock(&dl_b->lock);
|
raw_spin_lock(&dl_b->lock);
|
||||||
|
|
||||||
__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
|
__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
|
||||||
|
|
||||||
raw_spin_unlock(&dl_b->lock);
|
raw_spin_unlock(&dl_b->lock);
|
||||||
|
raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
|
||||||
task_rq_unlock(rq, p, &rf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void dl_clear_root_domain(struct root_domain *rd)
|
void dl_clear_root_domain(struct root_domain *rd)
|
||||||
|
|
|
||||||
|
|
@ -923,8 +923,10 @@ struct corecg_test {
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
char root[PATH_MAX];
|
char root[PATH_MAX];
|
||||||
int i, ret = EXIT_SUCCESS;
|
int i;
|
||||||
|
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(ARRAY_SIZE(tests));
|
||||||
if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) {
|
if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) {
|
||||||
if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME))
|
if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME))
|
||||||
ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n");
|
ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n");
|
||||||
|
|
@ -946,12 +948,11 @@ int main(int argc, char *argv[])
|
||||||
ksft_test_result_skip("%s\n", tests[i].name);
|
ksft_test_result_skip("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
ksft_test_result_fail("%s\n", tests[i].name);
|
ksft_test_result_fail("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup_named_v1_root(root);
|
cleanup_named_v1_root(root);
|
||||||
return ret;
|
ksft_finished();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -796,8 +796,10 @@ struct cpucg_test {
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
char root[PATH_MAX];
|
char root[PATH_MAX];
|
||||||
int i, ret = EXIT_SUCCESS;
|
int i;
|
||||||
|
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(ARRAY_SIZE(tests));
|
||||||
if (cg_find_unified_root(root, sizeof(root), NULL))
|
if (cg_find_unified_root(root, sizeof(root), NULL))
|
||||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||||
|
|
||||||
|
|
@ -814,11 +816,10 @@ int main(int argc, char *argv[])
|
||||||
ksft_test_result_skip("%s\n", tests[i].name);
|
ksft_test_result_skip("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
ksft_test_result_fail("%s\n", tests[i].name);
|
ksft_test_result_fail("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
ksft_finished();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -247,8 +247,10 @@ struct cpuset_test {
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
char root[PATH_MAX];
|
char root[PATH_MAX];
|
||||||
int i, ret = EXIT_SUCCESS;
|
int i;
|
||||||
|
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(ARRAY_SIZE(tests));
|
||||||
if (cg_find_unified_root(root, sizeof(root), NULL))
|
if (cg_find_unified_root(root, sizeof(root), NULL))
|
||||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||||
|
|
||||||
|
|
@ -265,11 +267,10 @@ int main(int argc, char *argv[])
|
||||||
ksft_test_result_skip("%s\n", tests[i].name);
|
ksft_test_result_skip("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
ksft_test_result_fail("%s\n", tests[i].name);
|
ksft_test_result_fail("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
ksft_finished();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1488,8 +1488,10 @@ struct cgfreezer_test {
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
char root[PATH_MAX];
|
char root[PATH_MAX];
|
||||||
int i, ret = EXIT_SUCCESS;
|
int i;
|
||||||
|
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(ARRAY_SIZE(tests));
|
||||||
if (cg_find_unified_root(root, sizeof(root), NULL))
|
if (cg_find_unified_root(root, sizeof(root), NULL))
|
||||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||||
for (i = 0; i < ARRAY_SIZE(tests); i++) {
|
for (i = 0; i < ARRAY_SIZE(tests); i++) {
|
||||||
|
|
@ -1501,11 +1503,10 @@ int main(int argc, char *argv[])
|
||||||
ksft_test_result_skip("%s\n", tests[i].name);
|
ksft_test_result_skip("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
ksft_test_result_fail("%s\n", tests[i].name);
|
ksft_test_result_fail("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
ksft_finished();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -274,8 +274,10 @@ struct cgkill_test {
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
char root[PATH_MAX];
|
char root[PATH_MAX];
|
||||||
int i, ret = EXIT_SUCCESS;
|
int i;
|
||||||
|
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(ARRAY_SIZE(tests));
|
||||||
if (cg_find_unified_root(root, sizeof(root), NULL))
|
if (cg_find_unified_root(root, sizeof(root), NULL))
|
||||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||||
for (i = 0; i < ARRAY_SIZE(tests); i++) {
|
for (i = 0; i < ARRAY_SIZE(tests); i++) {
|
||||||
|
|
@ -287,11 +289,10 @@ int main(int argc, char *argv[])
|
||||||
ksft_test_result_skip("%s\n", tests[i].name);
|
ksft_test_result_skip("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
ksft_test_result_fail("%s\n", tests[i].name);
|
ksft_test_result_fail("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
ksft_finished();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -421,8 +421,10 @@ struct kmem_test {
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
char root[PATH_MAX];
|
char root[PATH_MAX];
|
||||||
int i, ret = EXIT_SUCCESS;
|
int i;
|
||||||
|
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(ARRAY_SIZE(tests));
|
||||||
if (cg_find_unified_root(root, sizeof(root), NULL))
|
if (cg_find_unified_root(root, sizeof(root), NULL))
|
||||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||||
|
|
||||||
|
|
@ -446,11 +448,10 @@ int main(int argc, char **argv)
|
||||||
ksft_test_result_skip("%s\n", tests[i].name);
|
ksft_test_result_skip("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
ksft_test_result_fail("%s\n", tests[i].name);
|
ksft_test_result_fail("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
ksft_finished();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1650,8 +1650,10 @@ struct memcg_test {
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
char root[PATH_MAX];
|
char root[PATH_MAX];
|
||||||
int i, proc_status, ret = EXIT_SUCCESS;
|
int i, proc_status;
|
||||||
|
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(ARRAY_SIZE(tests));
|
||||||
if (cg_find_unified_root(root, sizeof(root), NULL))
|
if (cg_find_unified_root(root, sizeof(root), NULL))
|
||||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||||
|
|
||||||
|
|
@ -1685,11 +1687,10 @@ int main(int argc, char **argv)
|
||||||
ksft_test_result_skip("%s\n", tests[i].name);
|
ksft_test_result_skip("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
ksft_test_result_fail("%s\n", tests[i].name);
|
ksft_test_result_fail("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
ksft_finished();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -597,8 +597,10 @@ static bool zswap_configured(void)
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
char root[PATH_MAX];
|
char root[PATH_MAX];
|
||||||
int i, ret = EXIT_SUCCESS;
|
int i;
|
||||||
|
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(ARRAY_SIZE(tests));
|
||||||
if (cg_find_unified_root(root, sizeof(root), NULL))
|
if (cg_find_unified_root(root, sizeof(root), NULL))
|
||||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||||
|
|
||||||
|
|
@ -625,11 +627,10 @@ int main(int argc, char **argv)
|
||||||
ksft_test_result_skip("%s\n", tests[i].name);
|
ksft_test_result_skip("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = EXIT_FAILURE;
|
|
||||||
ksft_test_result_fail("%s\n", tests[i].name);
|
ksft_test_result_fail("%s\n", tests[i].name);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
ksft_finished();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue