Fix two dl_server regressions: a race that can end up

leaving the dl_server stuck, and a dl_server throttling
 bug causing lag to fair tasks.
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmjWmxcRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1hj+w/9FQlzprbY072g9nBTri+tl1g37UShmbPA
 bogKP0xenl7l1Xfrk/aEIWYEIQ9XrAvaeVS8g0g/4Qp+j1q5+mNVpgAZ1ZUUrNHn
 l/csP36Otx8kF00hKMlir/doPKO91lNv+mNpsEmp4nLHtgbLfktxtSAWVwZNwsq8
 jBAHUJ9JxZA8gngObZnNBuSWM30LNwfB0T5fPNw5ryBjfEJaDClXo/ZsUfyoppbo
 DoTh1Kcjsz8w2dRT1yY88I6xEq6RMDkwCwpzQ2TA0ff7HgQxqC2uuKZmVbNJAQ6t
 kjJR8+A0G9QH0zK2D3vGljW1fwBtmb3j7YYTHEfkhUXL739rR4PpZ5koasD8TGUd
 w9t41JIzpxJrApUWaiAliuKNGJXQW58kpHuryQkqB+RfhesPUxM3bIMQSm0fTBpM
 fp3rAfMY9vYmJ6JtEVnKSSo2iGcFcN70VNnwJV/ZCthoegCQER4QvmAxbq6GDQx/
 ZtMZqLMiDvuLPQ/aRYbEFw9FbV9SnZ6GEDqcdlT6DOsA3ldpFek4GqcCAoKTuVd4
 lUupjdRIhroC6jyjigKNtHJ4iwqkE5UedXvxn8igpPCA2uyRl/rjviraFt2OhE/x
 Of+2QK4iRVnz8sa2t8phKep524hLcH35S/i3zpR/QLA9vFTFBFtwLskFRvpTVLCj
 Bn6UeCxq08U=
 =9ZgA
 -----END PGP SIGNATURE-----

Merge tag 'sched-urgent-2025-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "Fix two dl_server regressions: a race that can end up leaving the
  dl_server stuck, and a dl_server throttling bug causing lag to fair
  tasks"

* tag 'sched-urgent-2025-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/deadline: Fix dl_server behaviour
  sched/deadline: Fix dl_server getting stuck
This commit is contained in:
Linus Torvalds 2025-09-26 12:30:23 -07:00
commit 083fc6d7fa
4 changed files with 35 additions and 46 deletions

View File

@ -706,7 +706,6 @@ struct sched_dl_entity {
unsigned int dl_defer : 1;
unsigned int dl_defer_armed : 1;
unsigned int dl_defer_running : 1;
unsigned int dl_server_idle : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
@ -733,7 +732,6 @@ struct sched_dl_entity {
* runnable task.
*/
struct rq *rq;
dl_server_has_tasks_f server_has_tasks;
dl_server_pick_f server_pick_task;
#ifdef CONFIG_RT_MUTEXES

View File

@ -875,7 +875,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
*/
if (dl_se->dl_defer && !dl_se->dl_defer_running &&
dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {
if (!is_dl_boosted(dl_se)) {
/*
* Set dl_se->dl_defer_armed and dl_throttled variables to
@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
static bool dl_server_stopped(struct sched_dl_entity *dl_se);
static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
{
struct rq *rq = rq_of_dl_se(dl_se);
@ -1171,12 +1169,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
if (!dl_se->dl_runtime)
return HRTIMER_NORESTART;
if (!dl_se->server_has_tasks(dl_se)) {
replenish_dl_entity(dl_se);
dl_server_stopped(dl_se);
return HRTIMER_NORESTART;
}
if (dl_se->dl_defer_armed) {
/*
* First check if the server could consume runtime in background.
@ -1579,10 +1571,8 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
{
/* 0 runtime = fair server disabled */
if (dl_se->dl_runtime) {
dl_se->dl_server_idle = 0;
if (dl_se->dl_runtime)
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}
}
void dl_server_start(struct sched_dl_entity *dl_se)
@ -1610,26 +1600,10 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
dl_se->dl_server_active = 0;
}
static bool dl_server_stopped(struct sched_dl_entity *dl_se)
{
if (!dl_se->dl_server_active)
return true;
if (dl_se->dl_server_idle) {
dl_server_stop(dl_se);
return true;
}
dl_se->dl_server_idle = 1;
return false;
}
void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_task)
{
dl_se->rq = rq;
dl_se->server_has_tasks = has_tasks;
dl_se->server_pick_task = pick_task;
}
@ -2394,10 +2368,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
if (dl_server(dl_se)) {
p = dl_se->server_pick_task(dl_se);
if (!p) {
if (!dl_server_stopped(dl_se)) {
dl_se->dl_yielded = 1;
update_curr_dl_se(rq, dl_se, 0);
}
dl_server_stop(dl_se);
goto again;
}
rq->dl_server = dl_se;

View File

@ -8859,11 +8859,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru
return pick_next_task_fair(rq, prev, NULL);
}
static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
{
return !!dl_se->rq->cfs.nr_queued;
}
static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se)
{
return pick_task_fair(dl_se->rq);
@ -8875,7 +8870,7 @@ void fair_server_init(struct rq *rq)
init_dl_entity(dl_se);
dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task);
dl_server_init(dl_se, rq, fair_server_pick_task);
}
/*

View File

@ -365,25 +365,50 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
*
* dl_se::rq -- runqueue we belong to.
*
* dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
* server when it runs out of tasks to run.
*
* dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
* returns NULL.
*
* dl_server_update() -- called from update_curr_common(), propagates runtime
* to the server.
*
* dl_server_start()
* dl_server_stop() -- start/stop the server when it has (no) tasks.
* dl_server_start() -- start the server when it has tasks; it will stop
* automatically when there are no more tasks, per
* dl_se::server_pick() returning NULL.
*
* dl_server_stop() -- (force) stop the server; use when updating
* parameters.
*
* dl_server_init() -- initializes the server.
*
* When started the dl_server will (per dl_defer) schedule a timer for its
* zero-laxity point -- that is, unlike regular EDF tasks which run ASAP, a
* server will run at the very end of its period.
*
* This is done such that any runtime from the target class can be accounted
* against the server -- through dl_server_update() above -- such that when it
* becomes time to run, it might already be out of runtime and get deferred
* until the next period. In this case dl_server_timer() will alternate
* between defer and replenish but never actually enqueue the server.
*
* Only when the target class does not manage to exhaust the server's runtime
* (there's actualy starvation in the given period), will the dl_server get on
* the runqueue. Once queued it will pick tasks from the target class and run
* them until either its runtime is exhaused, at which point its back to
* dl_server_timer, or until there are no more tasks to run, at which point
* the dl_server stops itself.
*
* By stopping at this point the dl_server retains bandwidth, which, if a new
* task wakes up imminently (starting the server again), can be used --
* subject to CBS wakeup rules -- without having to wait for the next period.
*
* Additionally, because of the dl_defer behaviour the start/stop behaviour is
* naturally thottled to once per period, avoiding high context switch
* workloads from spamming the hrtimer program/cancel paths.
*/
extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_start(struct sched_dl_entity *dl_se);
extern void dl_server_stop(struct sched_dl_entity *dl_se);
extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_task);
extern void sched_init_dl_servers(void);