sched_ext: Wrap kfunc args in struct to prepare for aux__prog

scx_bpf_dsq_insert_vtime() and scx_bpf_select_cpu_and() currently have 5
parameters. An upcoming change will add aux__prog parameter which will exceed
BPF's 5 argument limit.

Prepare by adding new kfuncs __scx_bpf_dsq_insert_vtime() and
__scx_bpf_select_cpu_and() that take args structs. The existing kfuncs are
kept as compatibility wrappers. BPF programs use inline wrappers that detect
kernel API version via bpf_core_type_exists() and use the new struct-based
kfuncs when available, falling back to compat kfuncs otherwise. This allows
BPF programs to work with both old and new kernels.

Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Acked-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Tejun Heo 2025-10-06 15:51:46 -10:00
parent 3035addfaf
commit c0d630ba34
4 changed files with 182 additions and 39 deletions

View File

@ -5448,37 +5448,9 @@ __bpf_kfunc void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice
scx_dsq_insert_commit(sch, p, dsq_id, enq_flags); scx_dsq_insert_commit(sch, p, dsq_id, enq_flags);
} }
/** static void scx_dsq_insert_vtime(struct scx_sched *sch, struct task_struct *p,
* scx_bpf_dsq_insert_vtime - Insert a task into the vtime priority queue of a DSQ u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags)
* @p: task_struct to insert
* @dsq_id: DSQ to insert into
* @slice: duration @p can run for in nsecs, 0 to keep the current value
* @vtime: @p's ordering inside the vtime-sorted queue of the target DSQ
* @enq_flags: SCX_ENQ_*
*
* Insert @p into the vtime priority queue of the DSQ identified by @dsq_id.
* Tasks queued into the priority queue are ordered by @vtime. All other aspects
* are identical to scx_bpf_dsq_insert().
*
* @vtime ordering is according to time_before64() which considers wrapping. A
* numerically larger vtime may indicate an earlier position in the ordering and
* vice-versa.
*
* A DSQ can only be used as a FIFO or priority queue at any given time and this
* function must not be called on a DSQ which already has one or more FIFO tasks
* queued and vice-versa. Also, the built-in DSQs (SCX_DSQ_LOCAL and
* SCX_DSQ_GLOBAL) cannot be used as priority queues.
*/
__bpf_kfunc void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id,
u64 slice, u64 vtime, u64 enq_flags)
{ {
struct scx_sched *sch;
guard(rcu)();
sch = rcu_dereference(scx_root);
if (unlikely(!sch))
return;
if (!scx_dsq_insert_preamble(sch, p, enq_flags)) if (!scx_dsq_insert_preamble(sch, p, enq_flags))
return; return;
@ -5492,10 +5464,78 @@ __bpf_kfunc void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id,
scx_dsq_insert_commit(sch, p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ); scx_dsq_insert_commit(sch, p, dsq_id, enq_flags | SCX_ENQ_DSQ_PRIQ);
} }
struct scx_bpf_dsq_insert_vtime_args {
/* @p can't be packed together as KF_RCU is not transitive */
u64 dsq_id;
u64 slice;
u64 vtime;
u64 enq_flags;
};
/**
* __scx_bpf_dsq_insert_vtime - Arg-wrapped vtime DSQ insertion
* @p: task_struct to insert
* @args: struct containing the rest of the arguments
* @args->dsq_id: DSQ to insert into
* @args->slice: duration @p can run for in nsecs, 0 to keep the current value
* @args->vtime: @p's ordering inside the vtime-sorted queue of the target DSQ
* @args->enq_flags: SCX_ENQ_*
*
* Wrapper kfunc that takes arguments via struct to work around BPF's 5 argument
* limit. BPF programs should use scx_bpf_dsq_insert_vtime() which is provided
* as an inline wrapper in common.bpf.h.
*
* Insert @p into the vtime priority queue of the DSQ identified by
* @args->dsq_id. Tasks queued into the priority queue are ordered by
* @args->vtime. All other aspects are identical to scx_bpf_dsq_insert().
*
* @args->vtime ordering is according to time_before64() which considers
* wrapping. A numerically larger vtime may indicate an earlier position in the
* ordering and vice-versa.
*
* A DSQ can only be used as a FIFO or priority queue at any given time and this
* function must not be called on a DSQ which already has one or more FIFO tasks
* queued and vice-versa. Also, the built-in DSQs (SCX_DSQ_LOCAL and
* SCX_DSQ_GLOBAL) cannot be used as priority queues.
*/
__bpf_kfunc void
__scx_bpf_dsq_insert_vtime(struct task_struct *p,
struct scx_bpf_dsq_insert_vtime_args *args)
{
struct scx_sched *sch;
guard(rcu)();
sch = rcu_dereference(scx_root);
if (unlikely(!sch))
return;
scx_dsq_insert_vtime(sch, p, args->dsq_id, args->slice, args->vtime,
args->enq_flags);
}
/*
* COMPAT: Will be removed in v6.23.
*/
__bpf_kfunc void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id,
u64 slice, u64 vtime, u64 enq_flags)
{
struct scx_sched *sch;
guard(rcu)();
sch = rcu_dereference(scx_root);
if (unlikely(!sch))
return;
scx_dsq_insert_vtime(sch, p, dsq_id, slice, vtime, enq_flags);
}
__bpf_kfunc_end_defs(); __bpf_kfunc_end_defs();
BTF_KFUNCS_START(scx_kfunc_ids_enqueue_dispatch) BTF_KFUNCS_START(scx_kfunc_ids_enqueue_dispatch)
BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_insert, KF_RCU)
BTF_ID_FLAGS(func, __scx_bpf_dsq_insert_vtime, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_insert_vtime, KF_RCU)
BTF_KFUNCS_END(scx_kfunc_ids_enqueue_dispatch) BTF_KFUNCS_END(scx_kfunc_ids_enqueue_dispatch)

View File

@ -995,26 +995,56 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
return prev_cpu; return prev_cpu;
} }
struct scx_bpf_select_cpu_and_args {
/* @p and @cpus_allowed can't be packed together as KF_RCU is not transitive */
s32 prev_cpu;
u64 wake_flags;
u64 flags;
};
/** /**
* scx_bpf_select_cpu_and - Pick an idle CPU usable by task @p, * __scx_bpf_select_cpu_and - Arg-wrapped CPU selection with cpumask
* prioritizing those in @cpus_allowed
* @p: task_struct to select a CPU for * @p: task_struct to select a CPU for
* @prev_cpu: CPU @p was on previously
* @wake_flags: %SCX_WAKE_* flags
* @cpus_allowed: cpumask of allowed CPUs * @cpus_allowed: cpumask of allowed CPUs
* @flags: %SCX_PICK_IDLE* flags * @args: struct containing the rest of the arguments
* @args->prev_cpu: CPU @p was on previously
* @args->wake_flags: %SCX_WAKE_* flags
* @args->flags: %SCX_PICK_IDLE* flags
*
* Wrapper kfunc that takes arguments via struct to work around BPF's 5 argument
* limit. BPF programs should use scx_bpf_select_cpu_and() which is provided
* as an inline wrapper in common.bpf.h.
* *
* Can be called from ops.select_cpu(), ops.enqueue(), or from an unlocked * Can be called from ops.select_cpu(), ops.enqueue(), or from an unlocked
* context such as a BPF test_run() call, as long as built-in CPU selection * context such as a BPF test_run() call, as long as built-in CPU selection
* is enabled: ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE * is enabled: ops.update_idle() is missing or %SCX_OPS_KEEP_BUILTIN_IDLE
* is set. * is set.
* *
* @p, @prev_cpu and @wake_flags match ops.select_cpu(). * @p, @args->prev_cpu and @args->wake_flags match ops.select_cpu().
* *
* Returns the selected idle CPU, which will be automatically awakened upon * Returns the selected idle CPU, which will be automatically awakened upon
* returning from ops.select_cpu() and can be used for direct dispatch, or * returning from ops.select_cpu() and can be used for direct dispatch, or
* a negative value if no idle CPU is available. * a negative value if no idle CPU is available.
*/ */
__bpf_kfunc s32
__scx_bpf_select_cpu_and(struct task_struct *p, const struct cpumask *cpus_allowed,
struct scx_bpf_select_cpu_and_args *args)
{
struct scx_sched *sch;
guard(rcu)();
sch = rcu_dereference(scx_root);
if (unlikely(!sch))
return -ENODEV;
return select_cpu_from_kfunc(sch, p, args->prev_cpu, args->wake_flags,
cpus_allowed, args->flags);
}
/*
* COMPAT: Will be removed in v6.22.
*/
__bpf_kfunc s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags, __bpf_kfunc s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
const struct cpumask *cpus_allowed, u64 flags) const struct cpumask *cpus_allowed, u64 flags)
{ {
@ -1383,6 +1413,7 @@ BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu_node, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu_node, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu_node, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU)
BTF_ID_FLAGS(func, __scx_bpf_select_cpu_and, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_select_cpu_and, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_select_cpu_and, KF_RCU)
BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
BTF_KFUNCS_END(scx_kfunc_ids_idle) BTF_KFUNCS_END(scx_kfunc_ids_idle)

View File

@ -60,10 +60,10 @@ static inline void ___vmlinux_h_sanity_check___(void)
s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym; s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym; s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags, s32 __scx_bpf_select_cpu_and(struct task_struct *p, const struct cpumask *cpus_allowed,
const struct cpumask *cpus_allowed, u64 flags) __ksym __weak; struct scx_bpf_select_cpu_and_args *args) __ksym __weak;
void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak; void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; void __scx_bpf_dsq_insert_vtime(struct task_struct *p, struct scx_bpf_dsq_insert_vtime_args *args) __ksym __weak;
u32 scx_bpf_dispatch_nr_slots(void) __ksym; u32 scx_bpf_dispatch_nr_slots(void) __ksym;
void scx_bpf_dispatch_cancel(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym;
bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak; bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak;

View File

@ -143,6 +143,78 @@ static inline struct task_struct *__COMPAT_scx_bpf_cpu_curr(int cpu)
return rq ? rq->curr : NULL; return rq ? rq->curr : NULL;
} }
/*
* v6.19: To work around BPF maximum parameter limit, the following kfuncs are
* replaced with variants that pack scalar arguments in a struct. Wrappers are
* provided to maintain source compatibility.
*
* The kernel will carry the compat variants until v6.23 to maintain binary
* compatibility. After v6.23 release, remove the compat handling and move the
* wrappers to common.bpf.h.
*/
s32 scx_bpf_select_cpu_and___compat(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
const struct cpumask *cpus_allowed, u64 flags) __ksym __weak;
void scx_bpf_dsq_insert_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
/**
* scx_bpf_select_cpu_and - Pick an idle CPU usable by task @p
* @p: task_struct to select a CPU for
* @prev_cpu: CPU @p was on previously
* @wake_flags: %SCX_WAKE_* flags
* @cpus_allowed: cpumask of allowed CPUs
* @flags: %SCX_PICK_IDLE* flags
*
* Inline wrapper that packs scalar arguments into a struct and calls
* __scx_bpf_select_cpu_and(). See __scx_bpf_select_cpu_and() for details.
*/
static inline s32
scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
const struct cpumask *cpus_allowed, u64 flags)
{
if (bpf_core_type_exists(struct scx_bpf_select_cpu_and_args)) {
struct scx_bpf_select_cpu_and_args args = {
.prev_cpu = prev_cpu,
.wake_flags = wake_flags,
.flags = flags,
};
return __scx_bpf_select_cpu_and(p, cpus_allowed, &args);
} else {
return scx_bpf_select_cpu_and___compat(p, prev_cpu, wake_flags,
cpus_allowed, flags);
}
}
/**
* scx_bpf_dsq_insert_vtime - Insert a task into the vtime priority queue of a DSQ
* @p: task_struct to insert
* @dsq_id: DSQ to insert into
* @slice: duration @p can run for in nsecs, 0 to keep the current value
* @vtime: @p's ordering inside the vtime-sorted queue of the target DSQ
* @enq_flags: SCX_ENQ_*
*
* Inline wrapper that packs scalar arguments into a struct and calls
* __scx_bpf_dsq_insert_vtime(). See __scx_bpf_dsq_insert_vtime() for details.
*/
static inline void
scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime,
u64 enq_flags)
{
if (bpf_core_type_exists(struct scx_bpf_dsq_insert_vtime_args)) {
struct scx_bpf_dsq_insert_vtime_args args = {
.dsq_id = dsq_id,
.slice = slice,
.vtime = vtime,
.enq_flags = enq_flags,
};
__scx_bpf_dsq_insert_vtime(p, &args);
} else {
scx_bpf_dsq_insert_vtime___compat(p, dsq_id, slice, vtime,
enq_flags);
}
}
/* /*
* Define sched_ext_ops. This may be expanded to define multiple variants for * Define sched_ext_ops. This may be expanded to define multiple variants for
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().