mirror of https://github.com/torvalds/linux.git
sched: Employ sched_change guards
As proposed a long while ago -- and half done by scx -- wrap the scheduler's 'change' pattern in a guard helper. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Juri Lelli <juri.lelli@redhat.com> Acked-by: Tejun Heo <tj@kernel.org> Acked-by: Vincent Guittot <vincent.guittot@linaro.org>
This commit is contained in:
parent
82d6e01a06
commit
e9139f765a
|
|
@ -340,6 +340,11 @@ _label: \
|
|||
#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \
|
||||
static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
|
||||
|
||||
#define DEFINE_CLASS_IS_UNCONDITIONAL(_name) \
|
||||
__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
|
||||
static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
|
||||
{ return (void *)1; }
|
||||
|
||||
#define __GUARD_IS_ERR(_ptr) \
|
||||
({ \
|
||||
unsigned long _rc = (__force unsigned long)(_ptr); \
|
||||
|
|
|
|||
|
|
@ -7326,7 +7326,7 @@ void rt_mutex_post_schedule(void)
|
|||
*/
|
||||
void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
||||
{
|
||||
int prio, oldprio, queued, running, queue_flag =
|
||||
int prio, oldprio, queue_flag =
|
||||
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
||||
const struct sched_class *prev_class, *next_class;
|
||||
struct rq_flags rf;
|
||||
|
|
@ -7391,53 +7391,43 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
|||
if (prev_class != next_class && p->se.sched_delayed)
|
||||
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
|
||||
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current_donor(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, queue_flag);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
/*
|
||||
* Boosting condition are:
|
||||
* 1. -rt task is running and holds mutex A
|
||||
* --> -dl task blocks on mutex A
|
||||
*
|
||||
* 2. -dl task is running and holds mutex A
|
||||
* --> -dl task blocks on mutex A and could preempt the
|
||||
* running task
|
||||
*/
|
||||
if (dl_prio(prio)) {
|
||||
if (!dl_prio(p->normal_prio) ||
|
||||
(pi_task && dl_prio(pi_task->prio) &&
|
||||
dl_entity_preempt(&pi_task->dl, &p->dl))) {
|
||||
p->dl.pi_se = pi_task->dl.pi_se;
|
||||
queue_flag |= ENQUEUE_REPLENISH;
|
||||
scoped_guard (sched_change, p, queue_flag) {
|
||||
/*
|
||||
* Boosting condition are:
|
||||
* 1. -rt task is running and holds mutex A
|
||||
* --> -dl task blocks on mutex A
|
||||
*
|
||||
* 2. -dl task is running and holds mutex A
|
||||
* --> -dl task blocks on mutex A and could preempt the
|
||||
* running task
|
||||
*/
|
||||
if (dl_prio(prio)) {
|
||||
if (!dl_prio(p->normal_prio) ||
|
||||
(pi_task && dl_prio(pi_task->prio) &&
|
||||
dl_entity_preempt(&pi_task->dl, &p->dl))) {
|
||||
p->dl.pi_se = pi_task->dl.pi_se;
|
||||
scope->flags |= ENQUEUE_REPLENISH;
|
||||
} else {
|
||||
p->dl.pi_se = &p->dl;
|
||||
}
|
||||
} else if (rt_prio(prio)) {
|
||||
if (dl_prio(oldprio))
|
||||
p->dl.pi_se = &p->dl;
|
||||
if (oldprio < prio)
|
||||
scope->flags |= ENQUEUE_HEAD;
|
||||
} else {
|
||||
p->dl.pi_se = &p->dl;
|
||||
if (dl_prio(oldprio))
|
||||
p->dl.pi_se = &p->dl;
|
||||
if (rt_prio(oldprio))
|
||||
p->rt.timeout = 0;
|
||||
}
|
||||
} else if (rt_prio(prio)) {
|
||||
if (dl_prio(oldprio))
|
||||
p->dl.pi_se = &p->dl;
|
||||
if (oldprio < prio)
|
||||
queue_flag |= ENQUEUE_HEAD;
|
||||
} else {
|
||||
if (dl_prio(oldprio))
|
||||
p->dl.pi_se = &p->dl;
|
||||
if (rt_prio(oldprio))
|
||||
p->rt.timeout = 0;
|
||||
|
||||
p->sched_class = next_class;
|
||||
p->prio = prio;
|
||||
|
||||
check_class_changing(rq, p, prev_class);
|
||||
}
|
||||
|
||||
p->sched_class = next_class;
|
||||
p->prio = prio;
|
||||
|
||||
check_class_changing(rq, p, prev_class);
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, p, queue_flag);
|
||||
if (running)
|
||||
set_next_task(rq, p);
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
out_unlock:
|
||||
/* Avoid rq from going away on us: */
|
||||
|
|
@ -8084,26 +8074,9 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
|
|||
*/
|
||||
void sched_setnuma(struct task_struct *p, int nid)
|
||||
{
|
||||
bool queued, running;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
rq = task_rq_lock(p, &rf);
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current_donor(rq, p);
|
||||
|
||||
if (queued)
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
p->numa_preferred_nid = nid;
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
|
||||
if (running)
|
||||
set_next_task(rq, p);
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
guard(task_rq_lock)(p);
|
||||
scoped_guard (sched_change, p, DEQUEUE_SAVE)
|
||||
p->numa_preferred_nid = nid;
|
||||
}
|
||||
#endif /* CONFIG_NUMA_BALANCING */
|
||||
|
||||
|
|
@ -9205,8 +9178,9 @@ static void sched_change_group(struct task_struct *tsk)
|
|||
*/
|
||||
void sched_move_task(struct task_struct *tsk, bool for_autogroup)
|
||||
{
|
||||
int queued, running, queue_flags =
|
||||
unsigned int queue_flags =
|
||||
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
|
||||
bool resched = false;
|
||||
struct rq *rq;
|
||||
|
||||
CLASS(task_rq_lock, rq_guard)(tsk);
|
||||
|
|
@ -9214,29 +9188,16 @@ void sched_move_task(struct task_struct *tsk, bool for_autogroup)
|
|||
|
||||
update_rq_clock(rq);
|
||||
|
||||
running = task_current_donor(rq, tsk);
|
||||
queued = task_on_rq_queued(tsk);
|
||||
|
||||
if (queued)
|
||||
dequeue_task(rq, tsk, queue_flags);
|
||||
if (running)
|
||||
put_prev_task(rq, tsk);
|
||||
|
||||
sched_change_group(tsk);
|
||||
if (!for_autogroup)
|
||||
scx_cgroup_move_task(tsk);
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, tsk, queue_flags);
|
||||
if (running) {
|
||||
set_next_task(rq, tsk);
|
||||
/*
|
||||
* After changing group, the running task may have joined a
|
||||
* throttled one but it's still the running task. Trigger a
|
||||
* resched to make sure that task can still run.
|
||||
*/
|
||||
resched_curr(rq);
|
||||
scoped_guard (sched_change, tsk, queue_flags) {
|
||||
sched_change_group(tsk);
|
||||
if (!for_autogroup)
|
||||
scx_cgroup_move_task(tsk);
|
||||
if (scope->running)
|
||||
resched = true;
|
||||
}
|
||||
|
||||
if (resched)
|
||||
resched_curr(rq);
|
||||
}
|
||||
|
||||
static struct cgroup_subsys_state *
|
||||
|
|
@ -10892,37 +10853,39 @@ void sched_mm_cid_fork(struct task_struct *t)
|
|||
}
|
||||
#endif /* CONFIG_SCHED_MM_CID */
|
||||
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
|
||||
struct sched_enq_and_set_ctx *ctx)
|
||||
static DEFINE_PER_CPU(struct sched_change_ctx, sched_change_ctx);
|
||||
|
||||
struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int flags)
|
||||
{
|
||||
struct sched_change_ctx *ctx = this_cpu_ptr(&sched_change_ctx);
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
lockdep_assert_rq_held(rq);
|
||||
|
||||
*ctx = (struct sched_enq_and_set_ctx){
|
||||
*ctx = (struct sched_change_ctx){
|
||||
.p = p,
|
||||
.queue_flags = queue_flags,
|
||||
.flags = flags,
|
||||
.queued = task_on_rq_queued(p),
|
||||
.running = task_current(rq, p),
|
||||
.running = task_current_donor(rq, p),
|
||||
};
|
||||
|
||||
update_rq_clock(rq);
|
||||
if (ctx->queued)
|
||||
dequeue_task(rq, p, queue_flags | DEQUEUE_NOCLOCK);
|
||||
dequeue_task(rq, p, flags);
|
||||
if (ctx->running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx)
|
||||
void sched_change_end(struct sched_change_ctx *ctx)
|
||||
{
|
||||
struct rq *rq = task_rq(ctx->p);
|
||||
struct task_struct *p = ctx->p;
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
lockdep_assert_rq_held(rq);
|
||||
|
||||
if (ctx->queued)
|
||||
enqueue_task(rq, ctx->p, ctx->queue_flags | ENQUEUE_NOCLOCK);
|
||||
enqueue_task(rq, p, ctx->flags | ENQUEUE_NOCLOCK);
|
||||
if (ctx->running)
|
||||
set_next_task(rq, ctx->p);
|
||||
set_next_task(rq, p);
|
||||
}
|
||||
#endif /* CONFIG_SCHED_CLASS_EXT */
|
||||
|
|
|
|||
|
|
@ -3780,11 +3780,10 @@ static void scx_bypass(bool bypass)
|
|||
*/
|
||||
list_for_each_entry_safe_reverse(p, n, &rq->scx.runnable_list,
|
||||
scx.runnable_node) {
|
||||
struct sched_enq_and_set_ctx ctx;
|
||||
|
||||
/* cycling deq/enq is enough, see the function comment */
|
||||
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
||||
sched_enq_and_set_task(&ctx);
|
||||
scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
|
||||
/* nothing */ ;
|
||||
}
|
||||
}
|
||||
|
||||
/* resched to restore ticks and idle state */
|
||||
|
|
@ -3916,17 +3915,16 @@ static void scx_disable_workfn(struct kthread_work *work)
|
|||
const struct sched_class *old_class = p->sched_class;
|
||||
const struct sched_class *new_class =
|
||||
__setscheduler_class(p->policy, p->prio);
|
||||
struct sched_enq_and_set_ctx ctx;
|
||||
|
||||
update_rq_clock(task_rq(p));
|
||||
|
||||
if (old_class != new_class && p->se.sched_delayed)
|
||||
dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
|
||||
dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
|
||||
|
||||
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
||||
|
||||
p->sched_class = new_class;
|
||||
check_class_changing(task_rq(p), p, old_class);
|
||||
|
||||
sched_enq_and_set_task(&ctx);
|
||||
scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK) {
|
||||
p->sched_class = new_class;
|
||||
check_class_changing(task_rq(p), p, old_class);
|
||||
}
|
||||
|
||||
check_class_changed(task_rq(p), p, old_class, p->prio);
|
||||
scx_exit_task(p);
|
||||
|
|
@ -4660,21 +4658,20 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
|||
const struct sched_class *old_class = p->sched_class;
|
||||
const struct sched_class *new_class =
|
||||
__setscheduler_class(p->policy, p->prio);
|
||||
struct sched_enq_and_set_ctx ctx;
|
||||
|
||||
if (!tryget_task_struct(p))
|
||||
continue;
|
||||
|
||||
update_rq_clock(task_rq(p));
|
||||
|
||||
if (old_class != new_class && p->se.sched_delayed)
|
||||
dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
|
||||
dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
|
||||
|
||||
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
|
||||
|
||||
p->scx.slice = SCX_SLICE_DFL;
|
||||
p->sched_class = new_class;
|
||||
check_class_changing(task_rq(p), p, old_class);
|
||||
|
||||
sched_enq_and_set_task(&ctx);
|
||||
scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK) {
|
||||
p->scx.slice = SCX_SLICE_DFL;
|
||||
p->sched_class = new_class;
|
||||
check_class_changing(task_rq(p), p, old_class);
|
||||
}
|
||||
|
||||
check_class_changed(task_rq(p), p, old_class, p->prio);
|
||||
put_task_struct(p);
|
||||
|
|
|
|||
|
|
@ -3885,23 +3885,38 @@ extern void check_class_changed(struct rq *rq, struct task_struct *p,
|
|||
extern struct balance_callback *splice_balance_callbacks(struct rq *rq);
|
||||
extern void balance_callbacks(struct rq *rq, struct balance_callback *head);
|
||||
|
||||
#ifdef CONFIG_SCHED_CLASS_EXT
|
||||
/*
|
||||
* Used by SCX in the enable/disable paths to move tasks between sched_classes
|
||||
* and establish invariants.
|
||||
* The 'sched_change' pattern is the safe, easy and slow way of changing a
|
||||
* task's scheduling properties. It dequeues a task, such that the scheduler
|
||||
* is fully unaware of it; at which point its properties can be modified;
|
||||
* after which it is enqueued again.
|
||||
*
|
||||
* Typically this must be called while holding task_rq_lock, since most/all
|
||||
* properties are serialized under those locks. There is currently one
|
||||
* exception to this rule in sched/ext which only holds rq->lock.
|
||||
*/
|
||||
struct sched_enq_and_set_ctx {
|
||||
|
||||
/*
|
||||
* This structure is a temporary, used to preserve/convey the queueing state
|
||||
* of the task between sched_change_begin() and sched_change_end(). Ensuring
|
||||
* the task's queueing state is idempotent across the operation.
|
||||
*/
|
||||
struct sched_change_ctx {
|
||||
struct task_struct *p;
|
||||
int queue_flags;
|
||||
int flags;
|
||||
bool queued;
|
||||
bool running;
|
||||
};
|
||||
|
||||
void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
|
||||
struct sched_enq_and_set_ctx *ctx);
|
||||
void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
|
||||
struct sched_change_ctx *sched_change_begin(struct task_struct *p, unsigned int flags);
|
||||
void sched_change_end(struct sched_change_ctx *ctx);
|
||||
|
||||
#endif /* CONFIG_SCHED_CLASS_EXT */
|
||||
DEFINE_CLASS(sched_change, struct sched_change_ctx *,
|
||||
sched_change_end(_T),
|
||||
sched_change_begin(p, flags),
|
||||
struct task_struct *p, unsigned int flags)
|
||||
|
||||
DEFINE_CLASS_IS_UNCONDITIONAL(sched_change)
|
||||
|
||||
#include "ext.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,6 @@ static int effective_prio(struct task_struct *p)
|
|||
|
||||
void set_user_nice(struct task_struct *p, long nice)
|
||||
{
|
||||
bool queued, running;
|
||||
struct rq *rq;
|
||||
int old_prio;
|
||||
|
||||
|
|
@ -90,22 +89,12 @@ void set_user_nice(struct task_struct *p, long nice)
|
|||
return;
|
||||
}
|
||||
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current_donor(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
p->static_prio = NICE_TO_PRIO(nice);
|
||||
set_load_weight(p, true);
|
||||
old_prio = p->prio;
|
||||
p->prio = effective_prio(p);
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
|
||||
if (running)
|
||||
set_next_task(rq, p);
|
||||
scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) {
|
||||
p->static_prio = NICE_TO_PRIO(nice);
|
||||
set_load_weight(p, true);
|
||||
old_prio = p->prio;
|
||||
p->prio = effective_prio(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the task increased its priority or is running and
|
||||
|
|
@ -515,7 +504,7 @@ int __sched_setscheduler(struct task_struct *p,
|
|||
bool user, bool pi)
|
||||
{
|
||||
int oldpolicy = -1, policy = attr->sched_policy;
|
||||
int retval, oldprio, newprio, queued, running;
|
||||
int retval, oldprio, newprio;
|
||||
const struct sched_class *prev_class, *next_class;
|
||||
struct balance_callback *head;
|
||||
struct rq_flags rf;
|
||||
|
|
@ -698,33 +687,25 @@ int __sched_setscheduler(struct task_struct *p,
|
|||
if (prev_class != next_class && p->se.sched_delayed)
|
||||
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
|
||||
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current_donor(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, queue_flags);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
scoped_guard (sched_change, p, queue_flags) {
|
||||
|
||||
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
|
||||
__setscheduler_params(p, attr);
|
||||
p->sched_class = next_class;
|
||||
p->prio = newprio;
|
||||
if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
|
||||
__setscheduler_params(p, attr);
|
||||
p->sched_class = next_class;
|
||||
p->prio = newprio;
|
||||
}
|
||||
__setscheduler_uclamp(p, attr);
|
||||
check_class_changing(rq, p, prev_class);
|
||||
|
||||
if (scope->queued) {
|
||||
/*
|
||||
* We enqueue to tail when the priority of a task is
|
||||
* increased (user space view).
|
||||
*/
|
||||
if (oldprio < p->prio)
|
||||
scope->flags |= ENQUEUE_HEAD;
|
||||
}
|
||||
}
|
||||
__setscheduler_uclamp(p, attr);
|
||||
check_class_changing(rq, p, prev_class);
|
||||
|
||||
if (queued) {
|
||||
/*
|
||||
* We enqueue to tail when the priority of a task is
|
||||
* increased (user space view).
|
||||
*/
|
||||
if (oldprio < p->prio)
|
||||
queue_flags |= ENQUEUE_HEAD;
|
||||
|
||||
enqueue_task(rq, p, queue_flags);
|
||||
}
|
||||
if (running)
|
||||
set_next_task(rq, p);
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue