sched/ext: Fold balance_scx() into pick_task_scx()

With pick_task() having an rf argument, it is possible to do the
lock-break there, get rid of the weird balance/pick_task hack.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Peter Zijlstra 2025-10-01 20:41:36 +02:00
parent 50653216e4
commit 4c95380701
3 changed files with 12 additions and 80 deletions

View File

@ -5845,19 +5845,6 @@ static void prev_balance(struct rq *rq, struct task_struct *prev,
const struct sched_class *start_class = prev->sched_class; const struct sched_class *start_class = prev->sched_class;
const struct sched_class *class; const struct sched_class *class;
#ifdef CONFIG_SCHED_CLASS_EXT
/*
* SCX requires a balance() call before every pick_task() including when
* waking up from SCHED_IDLE. If @start_class is below SCX, start from
* SCX instead. Also, set a flag to detect missing balance() call.
*/
if (scx_enabled()) {
rq->scx.flags |= SCX_RQ_BAL_PENDING;
if (sched_class_above(&ext_sched_class, start_class))
start_class = &ext_sched_class;
}
#endif
/* /*
* We must do the balancing pass before put_prev_task(), such * We must do the balancing pass before put_prev_task(), such
* that when we release the rq->lock the task is in the same * that when we release the rq->lock the task is in the same

View File

@ -2013,7 +2013,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
lockdep_assert_rq_held(rq); lockdep_assert_rq_held(rq);
rq->scx.flags |= SCX_RQ_IN_BALANCE; rq->scx.flags |= SCX_RQ_IN_BALANCE;
rq->scx.flags &= ~(SCX_RQ_BAL_PENDING | SCX_RQ_BAL_KEEP); rq->scx.flags &= ~SCX_RQ_BAL_KEEP;
if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) && if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) &&
unlikely(rq->scx.cpu_released)) { unlikely(rq->scx.cpu_released)) {
@ -2119,40 +2119,6 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
return true; return true;
} }
static int balance_scx(struct rq *rq, struct task_struct *prev,
struct rq_flags *rf)
{
int ret;
rq_unpin_lock(rq, rf);
ret = balance_one(rq, prev);
#ifdef CONFIG_SCHED_SMT
/*
* When core-sched is enabled, this ops.balance() call will be followed
* by pick_task_scx() on this CPU and the SMT siblings. Balance the
* siblings too.
*/
if (sched_core_enabled(rq)) {
const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
int scpu;
for_each_cpu_andnot(scpu, smt_mask, cpumask_of(cpu_of(rq))) {
struct rq *srq = cpu_rq(scpu);
struct task_struct *sprev = srq->curr;
WARN_ON_ONCE(__rq_lockp(rq) != __rq_lockp(srq));
update_rq_clock(srq);
balance_one(srq, sprev);
}
}
#endif
rq_repin_lock(rq, rf);
return ret;
}
static void process_ddsp_deferred_locals(struct rq *rq) static void process_ddsp_deferred_locals(struct rq *rq)
{ {
struct task_struct *p; struct task_struct *p;
@ -2335,38 +2301,19 @@ static struct task_struct *first_local_task(struct rq *rq)
static struct task_struct *pick_task_scx(struct rq *rq, struct rq_flags *rf) static struct task_struct *pick_task_scx(struct rq *rq, struct rq_flags *rf)
{ {
struct task_struct *prev = rq->curr; struct task_struct *prev = rq->curr;
bool keep_prev, kick_idle = false;
struct task_struct *p; struct task_struct *p;
bool keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP;
bool kick_idle = false;
/* rq_modified_clear(rq);
* WORKAROUND: rq_unpin_lock(rq, rf);
* balance_one(rq, prev);
* %SCX_RQ_BAL_KEEP should be set iff $prev is on SCX as it must just rq_repin_lock(rq, rf);
* have gone through balance_scx(). Unfortunately, there currently is a if (rq_modified_above(rq, &ext_sched_class))
* bug where fair could say yes on balance() but no on pick_task(), return RETRY_TASK;
* which then ends up calling pick_task_scx() without preceding
* balance_scx(). keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP;
* if (unlikely(keep_prev &&
* Keep running @prev if possible and avoid stalling from entering idle prev->sched_class != &ext_sched_class)) {
* without balancing.
*
* Once fair is fixed, remove the workaround and trigger WARN_ON_ONCE()
* if pick_task_scx() is called without preceding balance_scx().
*/
if (unlikely(rq->scx.flags & SCX_RQ_BAL_PENDING)) {
if (prev->scx.flags & SCX_TASK_QUEUED) {
keep_prev = true;
} else {
keep_prev = false;
kick_idle = true;
}
} else if (unlikely(keep_prev &&
prev->sched_class != &ext_sched_class)) {
/*
* Can happen while enabling as SCX_RQ_BAL_PENDING assertion is
* conditional on scx_enabled() and may have been skipped.
*/
WARN_ON_ONCE(scx_enable_state() == SCX_ENABLED); WARN_ON_ONCE(scx_enable_state() == SCX_ENABLED);
keep_prev = false; keep_prev = false;
} }
@ -3243,7 +3190,6 @@ DEFINE_SCHED_CLASS(ext) = {
.wakeup_preempt = wakeup_preempt_scx, .wakeup_preempt = wakeup_preempt_scx,
.balance = balance_scx,
.pick_task = pick_task_scx, .pick_task = pick_task_scx,
.put_prev_task = put_prev_task_scx, .put_prev_task = put_prev_task_scx,

View File

@ -779,7 +779,6 @@ enum scx_rq_flags {
*/ */
SCX_RQ_ONLINE = 1 << 0, SCX_RQ_ONLINE = 1 << 0,
SCX_RQ_CAN_STOP_TICK = 1 << 1, SCX_RQ_CAN_STOP_TICK = 1 << 1,
SCX_RQ_BAL_PENDING = 1 << 2, /* balance hasn't run yet */
SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */ SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */
SCX_RQ_BYPASSING = 1 << 4, SCX_RQ_BYPASSING = 1 << 4,
SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */ SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */