sched/deadline: Fix dl_server time accounting

The dl_server time accounting code is a little odd. The normal scheduler
pattern is to update curr before doing something, such that the old state is
fully accounted before changing state.

Notably, the dl_server_timer() needs to propagate the current time accounting
since the current task could be ran by dl_server and thus this can affect
dl_se->runtime. Similarly for dl_server_start().

And since the (deferred) dl_server wants idle time accounted, rework
sched_idle_class time accounting to be more like all the others.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251020141130.GJ3245006@noisy.programming.kicks-ass.net
This commit is contained in:
Peter Zijlstra 2025-10-20 16:15:05 +02:00
parent e40cea333e
commit e636ffb9e3
4 changed files with 33 additions and 35 deletions

View File

@ -1166,8 +1166,12 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
sched_clock_tick();
update_rq_clock(rq);
if (!dl_se->dl_runtime)
return HRTIMER_NORESTART;
/*
* Make sure current has propagated its pending runtime into
* any relevant server through calling dl_server_update() and
* friends.
*/
rq->donor->sched_class->update_curr(rq);
if (dl_se->dl_defer_armed) {
/*
@ -1543,35 +1547,16 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
* as time available for the fair server, avoiding a penalty for the
* rt scheduler that did not consumed that time.
*/
void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
void dl_server_update_idle(struct sched_dl_entity *dl_se, s64 delta_exec)
{
s64 delta_exec;
if (!rq->fair_server.dl_defer)
return;
/* no need to discount more */
if (rq->fair_server.runtime < 0)
return;
delta_exec = rq_clock_task(rq) - p->se.exec_start;
if (delta_exec < 0)
return;
rq->fair_server.runtime -= delta_exec;
if (rq->fair_server.runtime < 0) {
rq->fair_server.dl_defer_running = 0;
rq->fair_server.runtime = 0;
}
p->se.exec_start = rq_clock_task(rq);
if (dl_se->dl_server_active && dl_se->dl_runtime && dl_se->dl_defer)
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}
void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
{
/* 0 runtime = fair server disabled */
if (dl_se->dl_runtime)
if (dl_se->dl_server_active && dl_se->dl_runtime)
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}
@ -1582,6 +1567,11 @@ void dl_server_start(struct sched_dl_entity *dl_se)
if (!dl_server(dl_se) || dl_se->dl_server_active)
return;
/*
* Update the current task to 'now'.
*/
rq->donor->sched_class->update_curr(rq);
if (WARN_ON_ONCE(!cpu_online(cpu_of(rq))))
return;

View File

@ -1212,8 +1212,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
* against fair_server such that it can account for this time
* and possibly avoid running this period.
*/
if (dl_server_active(&rq->fair_server))
dl_server_update(&rq->fair_server, delta_exec);
dl_server_update(&rq->fair_server, delta_exec);
}
account_cfs_rq_runtime(cfs_rq, delta_exec);
@ -6961,12 +6960,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
h_nr_idle = 1;
}
if (!rq_h_nr_queued && rq->cfs.h_nr_queued) {
/* Account for idle runtime */
if (!rq->nr_running)
dl_server_update_idle_time(rq, rq->curr);
if (!rq_h_nr_queued && rq->cfs.h_nr_queued)
dl_server_start(&rq->fair_server);
}
/* At this point se is NULL and we are at root level*/
add_nr_running(rq, 1);

View File

@ -452,9 +452,11 @@ static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
resched_curr(rq);
}
static void update_curr_idle(struct rq *rq);
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct task_struct *next)
{
dl_server_update_idle_time(rq, prev);
update_curr_idle(rq);
scx_update_idle(rq, false, true);
}
@ -496,6 +498,7 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
*/
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
{
update_curr_idle(rq);
}
static void switching_to_idle(struct rq *rq, struct task_struct *p)
@ -514,6 +517,17 @@ prio_changed_idle(struct rq *rq, struct task_struct *p, u64 oldprio)
static void update_curr_idle(struct rq *rq)
{
struct sched_entity *se = &rq->idle->se;
u64 now = rq_clock_task(rq);
s64 delta_exec;
delta_exec = now - se->exec_start;
if (unlikely(delta_exec <= 0))
return;
se->exec_start = now;
dl_server_update_idle(&rq->fair_server, delta_exec);
}
/*

View File

@ -404,6 +404,7 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
* naturally thottled to once per period, avoiding high context switch
* workloads from spamming the hrtimer program/cancel paths.
*/
extern void dl_server_update_idle(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_start(struct sched_dl_entity *dl_se);
extern void dl_server_stop(struct sched_dl_entity *dl_se);
@ -411,8 +412,6 @@ extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_pick_f pick_task);
extern void sched_init_dl_servers(void);
extern void dl_server_update_idle_time(struct rq *rq,
struct task_struct *p);
extern void fair_server_init(struct rq *rq);
extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq);
extern int dl_server_apply_params(struct sched_dl_entity *dl_se,