mirror of https://github.com/torvalds/linux.git
tools/sched_ext: Receive updates from SCX repo
Receive tools/sched_ext updates form https://github.com/sched-ext/scx to sync userspace bits: - basic BPF arena allocator abstractions, - additional process flags definitions, - fixed is_migration_disabled() helper, - separate out user_exit_info BPF and user space code. This also fixes the following warning when building the selftests: tools/sched_ext/include/scx/common.bpf.h:550:9: warning: 'likely' macro redefined [-Wmacro-redefined] 550 | #define likely(x) __builtin_expect(!!(x), 1) | ^ Co-developed-by: Cheng-Yang Chou <yphbchou0911@gmail.com> Signed-off-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
8f5ae30d69
commit
de68c05189
|
|
@ -0,0 +1,175 @@
|
||||||
|
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||||
|
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef PAGE_SIZE
|
||||||
|
#define PAGE_SIZE __PAGE_SIZE
|
||||||
|
/*
|
||||||
|
* for older kernels try sizeof(struct genradix_node)
|
||||||
|
* or flexible:
|
||||||
|
* static inline long __bpf_page_size(void) {
|
||||||
|
* return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
|
||||||
|
* }
|
||||||
|
* but generated code is not great.
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
|
||||||
|
#define __arena __attribute__((address_space(1)))
|
||||||
|
#define __arena_global __attribute__((address_space(1)))
|
||||||
|
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
|
||||||
|
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
|
||||||
|
#else
|
||||||
|
|
||||||
|
/* emit instruction:
|
||||||
|
* rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
|
||||||
|
*
|
||||||
|
* This is a workaround for LLVM compiler versions without
|
||||||
|
* __BPF_FEATURE_ADDR_SPACE_CAST that do not automatically cast between arena
|
||||||
|
* pointers and native kernel/userspace ones. In this case we explicitly do so
|
||||||
|
* with cast_kern() and cast_user(). E.g., in the Linux kernel tree,
|
||||||
|
* tools/testing/selftests/bpf includes tests that use these macros to implement
|
||||||
|
* linked lists and hashtables backed by arena memory. In sched_ext, we use
|
||||||
|
* cast_kern() and cast_user() for compatibility with older LLVM toolchains.
|
||||||
|
*/
|
||||||
|
#ifndef bpf_addr_space_cast
|
||||||
|
#define bpf_addr_space_cast(var, dst_as, src_as)\
|
||||||
|
asm volatile(".byte 0xBF; \
|
||||||
|
.ifc %[reg], r0; \
|
||||||
|
.byte 0x00; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r1; \
|
||||||
|
.byte 0x11; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r2; \
|
||||||
|
.byte 0x22; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r3; \
|
||||||
|
.byte 0x33; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r4; \
|
||||||
|
.byte 0x44; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r5; \
|
||||||
|
.byte 0x55; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r6; \
|
||||||
|
.byte 0x66; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r7; \
|
||||||
|
.byte 0x77; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r8; \
|
||||||
|
.byte 0x88; \
|
||||||
|
.endif; \
|
||||||
|
.ifc %[reg], r9; \
|
||||||
|
.byte 0x99; \
|
||||||
|
.endif; \
|
||||||
|
.short %[off]; \
|
||||||
|
.long %[as]" \
|
||||||
|
: [reg]"+r"(var) \
|
||||||
|
: [off]"i"(BPF_ADDR_SPACE_CAST) \
|
||||||
|
, [as]"i"((dst_as << 16) | src_as));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define __arena
|
||||||
|
#define __arena_global SEC(".addr_space.1")
|
||||||
|
#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
|
||||||
|
#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
|
||||||
|
int node_id, __u64 flags) __ksym __weak;
|
||||||
|
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that cond_break can only be portably used in the body of a breakable
|
||||||
|
* construct, whereas can_loop can be used anywhere.
|
||||||
|
*/
|
||||||
|
#ifdef TEST
|
||||||
|
#define can_loop true
|
||||||
|
#define __cond_break(expr) expr
|
||||||
|
#else
|
||||||
|
#ifdef __BPF_FEATURE_MAY_GOTO
|
||||||
|
#define can_loop \
|
||||||
|
({ __label__ l_break, l_continue; \
|
||||||
|
bool ret = true; \
|
||||||
|
asm volatile goto("may_goto %l[l_break]" \
|
||||||
|
:::: l_break); \
|
||||||
|
goto l_continue; \
|
||||||
|
l_break: ret = false; \
|
||||||
|
l_continue:; \
|
||||||
|
ret; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define __cond_break(expr) \
|
||||||
|
({ __label__ l_break, l_continue; \
|
||||||
|
asm volatile goto("may_goto %l[l_break]" \
|
||||||
|
:::: l_break); \
|
||||||
|
goto l_continue; \
|
||||||
|
l_break: expr; \
|
||||||
|
l_continue:; \
|
||||||
|
})
|
||||||
|
#else
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
#define can_loop \
|
||||||
|
({ __label__ l_break, l_continue; \
|
||||||
|
bool ret = true; \
|
||||||
|
asm volatile goto("1:.byte 0xe5; \
|
||||||
|
.byte 0; \
|
||||||
|
.long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
|
||||||
|
.short 0" \
|
||||||
|
:::: l_break); \
|
||||||
|
goto l_continue; \
|
||||||
|
l_break: ret = false; \
|
||||||
|
l_continue:; \
|
||||||
|
ret; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define __cond_break(expr) \
|
||||||
|
({ __label__ l_break, l_continue; \
|
||||||
|
asm volatile goto("1:.byte 0xe5; \
|
||||||
|
.byte 0; \
|
||||||
|
.long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
|
||||||
|
.short 0" \
|
||||||
|
:::: l_break); \
|
||||||
|
goto l_continue; \
|
||||||
|
l_break: expr; \
|
||||||
|
l_continue:; \
|
||||||
|
})
|
||||||
|
#else
|
||||||
|
#define can_loop \
|
||||||
|
({ __label__ l_break, l_continue; \
|
||||||
|
bool ret = true; \
|
||||||
|
asm volatile goto("1:.byte 0xe5; \
|
||||||
|
.byte 0; \
|
||||||
|
.long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
|
||||||
|
.short 0" \
|
||||||
|
:::: l_break); \
|
||||||
|
goto l_continue; \
|
||||||
|
l_break: ret = false; \
|
||||||
|
l_continue:; \
|
||||||
|
ret; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define __cond_break(expr) \
|
||||||
|
({ __label__ l_break, l_continue; \
|
||||||
|
asm volatile goto("1:.byte 0xe5; \
|
||||||
|
.byte 0; \
|
||||||
|
.long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
|
||||||
|
.short 0" \
|
||||||
|
:::: l_break); \
|
||||||
|
goto l_continue; \
|
||||||
|
l_break: expr; \
|
||||||
|
l_continue:; \
|
||||||
|
})
|
||||||
|
#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
|
||||||
|
#endif /* __BPF_FEATURE_MAY_GOTO */
|
||||||
|
#endif /* TEST */
|
||||||
|
|
||||||
|
#define cond_break __cond_break(break)
|
||||||
|
#define cond_break_label(label) __cond_break(goto label)
|
||||||
|
|
||||||
|
|
||||||
|
void bpf_preempt_disable(void) __weak __ksym;
|
||||||
|
void bpf_preempt_enable(void) __weak __ksym;
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||||
|
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef arena_container_of
|
||||||
|
#define arena_container_of(ptr, type, member) \
|
||||||
|
({ \
|
||||||
|
void __arena *__mptr = (void __arena *)(ptr); \
|
||||||
|
((type *)(__mptr - offsetof(type, member))); \
|
||||||
|
})
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Provide the definition of PAGE_SIZE. */
|
||||||
|
#include <sys/user.h>
|
||||||
|
|
||||||
|
#define __arena
|
||||||
|
#define __arg_arena
|
||||||
|
#define cast_kern(ptr) /* nop for user space */
|
||||||
|
#define cast_user(ptr) /* nop for user space */
|
||||||
|
char __attribute__((weak)) arena[1];
|
||||||
|
|
||||||
|
#ifndef offsetof
|
||||||
|
#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
|
||||||
|
int node_id, __u64 flags)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
@ -24,14 +24,26 @@
|
||||||
#include <bpf/bpf_helpers.h>
|
#include <bpf/bpf_helpers.h>
|
||||||
#include <bpf/bpf_tracing.h>
|
#include <bpf/bpf_tracing.h>
|
||||||
#include <asm-generic/errno.h>
|
#include <asm-generic/errno.h>
|
||||||
#include "user_exit_info.h"
|
#include "user_exit_info.bpf.h"
|
||||||
#include "enum_defs.autogen.h"
|
#include "enum_defs.autogen.h"
|
||||||
|
|
||||||
|
#define PF_IDLE 0x00000002 /* I am an IDLE thread */
|
||||||
|
#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
|
||||||
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
|
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
|
||||||
|
#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
|
||||||
|
#define PF_KSWAPD 0x00020000 /* I am kswapd */
|
||||||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
|
||||||
#define PF_EXITING 0x00000004
|
#define PF_EXITING 0x00000004
|
||||||
#define CLOCK_MONOTONIC 1
|
#define CLOCK_MONOTONIC 1
|
||||||
|
|
||||||
|
#ifndef NR_CPUS
|
||||||
|
#define NR_CPUS 1024
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef NUMA_NO_NODE
|
||||||
|
#define NUMA_NO_NODE (-1)
|
||||||
|
#endif
|
||||||
|
|
||||||
extern int LINUX_KERNEL_VERSION __kconfig;
|
extern int LINUX_KERNEL_VERSION __kconfig;
|
||||||
extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
|
extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
|
||||||
extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
|
extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
|
||||||
|
|
@ -107,6 +119,9 @@ void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __
|
||||||
static inline __attribute__((format(printf, 1, 2)))
|
static inline __attribute__((format(printf, 1, 2)))
|
||||||
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
||||||
|
|
||||||
|
#define SCX_STRINGIFY(x) #x
|
||||||
|
#define SCX_TOSTRING(x) SCX_STRINGIFY(x)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Helper macro for initializing the fmt and variadic argument inputs to both
|
* Helper macro for initializing the fmt and variadic argument inputs to both
|
||||||
* bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
|
* bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
|
||||||
|
|
@ -141,13 +156,15 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
|
||||||
* scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
|
* scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
|
||||||
* instead of an array of u64. Invoking this macro will cause the scheduler to
|
* instead of an array of u64. Invoking this macro will cause the scheduler to
|
||||||
* exit in an erroneous state, with diagnostic information being passed to the
|
* exit in an erroneous state, with diagnostic information being passed to the
|
||||||
* user.
|
* user. It appends the file and line number to aid debugging.
|
||||||
*/
|
*/
|
||||||
#define scx_bpf_error(fmt, args...) \
|
#define scx_bpf_error(fmt, args...) \
|
||||||
({ \
|
({ \
|
||||||
scx_bpf_bstr_preamble(fmt, args) \
|
scx_bpf_bstr_preamble( \
|
||||||
|
__FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args) \
|
||||||
scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \
|
scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \
|
||||||
___scx_bpf_bstr_format_checker(fmt, ##args); \
|
___scx_bpf_bstr_format_checker( \
|
||||||
|
__FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args); \
|
||||||
})
|
})
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -229,6 +246,7 @@ BPF_PROG(name, ##args)
|
||||||
* be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
|
* be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
|
||||||
* `MEMBER_VPTR(ptr, ->member)`.
|
* `MEMBER_VPTR(ptr, ->member)`.
|
||||||
*/
|
*/
|
||||||
|
#ifndef MEMBER_VPTR
|
||||||
#define MEMBER_VPTR(base, member) (typeof((base) member) *) \
|
#define MEMBER_VPTR(base, member) (typeof((base) member) *) \
|
||||||
({ \
|
({ \
|
||||||
u64 __base = (u64)&(base); \
|
u64 __base = (u64)&(base); \
|
||||||
|
|
@ -245,6 +263,7 @@ BPF_PROG(name, ##args)
|
||||||
[max]"i"(sizeof(base) - sizeof((base) member))); \
|
[max]"i"(sizeof(base) - sizeof((base) member))); \
|
||||||
__addr; \
|
__addr; \
|
||||||
})
|
})
|
||||||
|
#endif /* MEMBER_VPTR */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ARRAY_ELEM_PTR - Obtain the verified pointer to an array element
|
* ARRAY_ELEM_PTR - Obtain the verified pointer to an array element
|
||||||
|
|
@ -260,6 +279,7 @@ BPF_PROG(name, ##args)
|
||||||
* size of the array to compute the max, which will result in rejection by
|
* size of the array to compute the max, which will result in rejection by
|
||||||
* the verifier.
|
* the verifier.
|
||||||
*/
|
*/
|
||||||
|
#ifndef ARRAY_ELEM_PTR
|
||||||
#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *) \
|
#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *) \
|
||||||
({ \
|
({ \
|
||||||
u64 __base = (u64)arr; \
|
u64 __base = (u64)arr; \
|
||||||
|
|
@ -274,7 +294,7 @@ BPF_PROG(name, ##args)
|
||||||
[max]"r"(sizeof(arr[0]) * ((n) - 1))); \
|
[max]"r"(sizeof(arr[0]) * ((n) - 1))); \
|
||||||
__addr; \
|
__addr; \
|
||||||
})
|
})
|
||||||
|
#endif /* ARRAY_ELEM_PTR */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BPF declarations and helpers
|
* BPF declarations and helpers
|
||||||
|
|
@ -438,8 +458,27 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
|
||||||
*/
|
*/
|
||||||
static inline bool is_migration_disabled(const struct task_struct *p)
|
static inline bool is_migration_disabled(const struct task_struct *p)
|
||||||
{
|
{
|
||||||
if (bpf_core_field_exists(p->migration_disabled))
|
/*
|
||||||
return p->migration_disabled;
|
* Testing p->migration_disabled in a BPF code is tricky because the
|
||||||
|
* migration is _always_ disabled while running the BPF code.
|
||||||
|
* The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) for BPF
|
||||||
|
* code execution disable and re-enable the migration of the current
|
||||||
|
* task, respectively. So, the _current_ task of the sched_ext ops is
|
||||||
|
* always migration-disabled. Moreover, p->migration_disabled could be
|
||||||
|
* two or greater when a sched_ext ops BPF code (e.g., ops.tick) is
|
||||||
|
* executed in the middle of the other BPF code execution.
|
||||||
|
*
|
||||||
|
* Therefore, we should decide that the _current_ task is
|
||||||
|
* migration-disabled only when its migration_disabled count is greater
|
||||||
|
* than one. In other words, when p->migration_disabled == 1, there is
|
||||||
|
* an ambiguity, so we should check if @p is the current task or not.
|
||||||
|
*/
|
||||||
|
if (bpf_core_field_exists(p->migration_disabled)) {
|
||||||
|
if (p->migration_disabled == 1)
|
||||||
|
return bpf_get_current_task_btf() != p;
|
||||||
|
else
|
||||||
|
return p->migration_disabled;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -476,7 +515,7 @@ static inline s64 time_delta(u64 after, u64 before)
|
||||||
*/
|
*/
|
||||||
static inline bool time_after(u64 a, u64 b)
|
static inline bool time_after(u64 a, u64 b)
|
||||||
{
|
{
|
||||||
return (s64)(b - a) < 0;
|
return (s64)(b - a) < 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -500,7 +539,7 @@ static inline bool time_before(u64 a, u64 b)
|
||||||
*/
|
*/
|
||||||
static inline bool time_after_eq(u64 a, u64 b)
|
static inline bool time_after_eq(u64 a, u64 b)
|
||||||
{
|
{
|
||||||
return (s64)(a - b) >= 0;
|
return (s64)(a - b) >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -547,9 +586,15 @@ static inline bool time_in_range_open(u64 a, u64 b, u64 c)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* useful compiler attributes */
|
/* useful compiler attributes */
|
||||||
|
#ifndef likely
|
||||||
#define likely(x) __builtin_expect(!!(x), 1)
|
#define likely(x) __builtin_expect(!!(x), 1)
|
||||||
|
#endif
|
||||||
|
#ifndef unlikely
|
||||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||||
|
#endif
|
||||||
|
#ifndef __maybe_unused
|
||||||
#define __maybe_unused __attribute__((__unused__))
|
#define __maybe_unused __attribute__((__unused__))
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* READ/WRITE_ONCE() are from kernel (include/asm-generic/rwonce.h). They
|
* READ/WRITE_ONCE() are from kernel (include/asm-generic/rwonce.h). They
|
||||||
|
|
@ -632,6 +677,26 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
||||||
__u.__val; \
|
__u.__val; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
/*
|
||||||
|
* __calc_avg - Calculate exponential weighted moving average (EWMA) with
|
||||||
|
* @old and @new values. @decay represents how large the @old value remains.
|
||||||
|
* With a larger @decay value, the moving average changes slowly, exhibiting
|
||||||
|
* fewer fluctuations.
|
||||||
|
*/
|
||||||
|
#define __calc_avg(old, new, decay) ({ \
|
||||||
|
typeof(decay) thr = 1 << (decay); \
|
||||||
|
typeof(old) ret; \
|
||||||
|
if (((old) < thr) || ((new) < thr)) { \
|
||||||
|
if (((old) == 1) && ((new) == 0)) \
|
||||||
|
ret = 0; \
|
||||||
|
else \
|
||||||
|
ret = ((old) - ((old) >> 1)) + ((new) >> 1); \
|
||||||
|
} else { \
|
||||||
|
ret = ((old) - ((old) >> (decay))) + ((new) >> (decay)); \
|
||||||
|
} \
|
||||||
|
ret; \
|
||||||
|
})
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
|
* log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
|
||||||
* @v: The value for which we're computing the base 2 logarithm.
|
* @v: The value for which we're computing the base 2 logarithm.
|
||||||
|
|
@ -662,6 +727,25 @@ static inline u32 log2_u64(u64 v)
|
||||||
return log2_u32(v) + 1;
|
return log2_u32(v) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sqrt_u64 - Calculate the square root of value @x using Newton's method.
|
||||||
|
*/
|
||||||
|
static inline u64 __sqrt_u64(u64 x)
|
||||||
|
{
|
||||||
|
if (x == 0 || x == 1)
|
||||||
|
return x;
|
||||||
|
|
||||||
|
u64 r = ((1ULL << 32) > x) ? x : (1ULL << 32);
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
u64 q = x / r;
|
||||||
|
if (r <= q)
|
||||||
|
break;
|
||||||
|
r = (r + q) >> 1;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return a value proportionally scaled to the task's weight.
|
* Return a value proportionally scaled to the task's weight.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -75,8 +75,9 @@ typedef int64_t s64;
|
||||||
#include "enums.h"
|
#include "enums.h"
|
||||||
|
|
||||||
/* not available when building kernel tools/sched_ext */
|
/* not available when building kernel tools/sched_ext */
|
||||||
#if __has_include(<lib/sdt_task.h>)
|
#if __has_include(<lib/sdt_task_defs.h>)
|
||||||
#include <lib/sdt_task.h>
|
#include "bpf_arena_common.h"
|
||||||
|
#include <lib/sdt_task_defs.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* __SCHED_EXT_COMMON_H */
|
#endif /* __SCHED_EXT_COMMON_H */
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,7 @@ void scx_bpf_dispatch_from_dsq_set_slice___compat(struct bpf_iter_scx_dsq *it__i
|
||||||
void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
|
void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
|
||||||
bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
|
bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
|
||||||
bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
|
bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
|
||||||
|
int bpf_cpumask_populate(struct cpumask *dst, void *src, size_t src__sz) __ksym __weak;
|
||||||
|
|
||||||
#define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags) \
|
#define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags) \
|
||||||
(bpf_ksym_exists(scx_bpf_dsq_insert) ? \
|
(bpf_ksym_exists(scx_bpf_dsq_insert) ? \
|
||||||
|
|
@ -82,6 +83,10 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
|
||||||
scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
|
scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
|
||||||
false))
|
false))
|
||||||
|
|
||||||
|
#define __COMPAT_bpf_cpumask_populate(cpumask, src, size__sz) \
|
||||||
|
(bpf_ksym_exists(bpf_cpumask_populate) ? \
|
||||||
|
(bpf_cpumask_populate(cpumask, src, size__sz)) : -EOPNOTSUPP)
|
||||||
|
|
||||||
#define scx_bpf_dispatch(p, dsq_id, slice, enq_flags) \
|
#define scx_bpf_dispatch(p, dsq_id, slice, enq_flags) \
|
||||||
_Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()")
|
_Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Define struct user_exit_info which is shared between BPF and userspace parts
|
||||||
|
* to communicate exit status and other information.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
|
||||||
|
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
|
||||||
|
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __USER_EXIT_INFO_BPF_H
|
||||||
|
#define __USER_EXIT_INFO_BPF_H
|
||||||
|
|
||||||
|
#ifndef LSP
|
||||||
|
#include "vmlinux.h"
|
||||||
|
#endif
|
||||||
|
#include <bpf/bpf_core_read.h>
|
||||||
|
|
||||||
|
#include "user_exit_info_common.h"
|
||||||
|
|
||||||
|
#define UEI_DEFINE(__name) \
|
||||||
|
char RESIZABLE_ARRAY(data, __name##_dump); \
|
||||||
|
const volatile u32 __name##_dump_len; \
|
||||||
|
struct user_exit_info __name SEC(".data")
|
||||||
|
|
||||||
|
#define UEI_RECORD(__uei_name, __ei) ({ \
|
||||||
|
bpf_probe_read_kernel_str(__uei_name.reason, \
|
||||||
|
sizeof(__uei_name.reason), (__ei)->reason); \
|
||||||
|
bpf_probe_read_kernel_str(__uei_name.msg, \
|
||||||
|
sizeof(__uei_name.msg), (__ei)->msg); \
|
||||||
|
bpf_probe_read_kernel_str(__uei_name##_dump, \
|
||||||
|
__uei_name##_dump_len, (__ei)->dump); \
|
||||||
|
if (bpf_core_field_exists((__ei)->exit_code)) \
|
||||||
|
__uei_name.exit_code = (__ei)->exit_code; \
|
||||||
|
/* use __sync to force memory barrier */ \
|
||||||
|
__sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
|
||||||
|
(__ei)->kind); \
|
||||||
|
})
|
||||||
|
|
||||||
|
#endif /* __USER_EXIT_INFO_BPF_H */
|
||||||
|
|
@ -10,55 +10,11 @@
|
||||||
#ifndef __USER_EXIT_INFO_H
|
#ifndef __USER_EXIT_INFO_H
|
||||||
#define __USER_EXIT_INFO_H
|
#define __USER_EXIT_INFO_H
|
||||||
|
|
||||||
#ifdef LSP
|
|
||||||
#define __bpf__
|
|
||||||
#include "../vmlinux.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
enum uei_sizes {
|
|
||||||
UEI_REASON_LEN = 128,
|
|
||||||
UEI_MSG_LEN = 1024,
|
|
||||||
UEI_DUMP_DFL_LEN = 32768,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct user_exit_info {
|
|
||||||
int kind;
|
|
||||||
s64 exit_code;
|
|
||||||
char reason[UEI_REASON_LEN];
|
|
||||||
char msg[UEI_MSG_LEN];
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef __bpf__
|
|
||||||
|
|
||||||
#ifndef LSP
|
|
||||||
#include "vmlinux.h"
|
|
||||||
#endif
|
|
||||||
#include <bpf/bpf_core_read.h>
|
|
||||||
|
|
||||||
#define UEI_DEFINE(__name) \
|
|
||||||
char RESIZABLE_ARRAY(data, __name##_dump); \
|
|
||||||
const volatile u32 __name##_dump_len; \
|
|
||||||
struct user_exit_info __name SEC(".data")
|
|
||||||
|
|
||||||
#define UEI_RECORD(__uei_name, __ei) ({ \
|
|
||||||
bpf_probe_read_kernel_str(__uei_name.reason, \
|
|
||||||
sizeof(__uei_name.reason), (__ei)->reason); \
|
|
||||||
bpf_probe_read_kernel_str(__uei_name.msg, \
|
|
||||||
sizeof(__uei_name.msg), (__ei)->msg); \
|
|
||||||
bpf_probe_read_kernel_str(__uei_name##_dump, \
|
|
||||||
__uei_name##_dump_len, (__ei)->dump); \
|
|
||||||
if (bpf_core_field_exists((__ei)->exit_code)) \
|
|
||||||
__uei_name.exit_code = (__ei)->exit_code; \
|
|
||||||
/* use __sync to force memory barrier */ \
|
|
||||||
__sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
|
|
||||||
(__ei)->kind); \
|
|
||||||
})
|
|
||||||
|
|
||||||
#else /* !__bpf__ */
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
#include "user_exit_info_common.h"
|
||||||
|
|
||||||
/* no need to call the following explicitly if SCX_OPS_LOAD() is used */
|
/* no need to call the following explicitly if SCX_OPS_LOAD() is used */
|
||||||
#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \
|
#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \
|
||||||
u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \
|
u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \
|
||||||
|
|
@ -114,5 +70,4 @@ enum uei_ecode_mask {
|
||||||
|
|
||||||
#define UEI_ECODE_RESTART(__ecode) (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
|
#define UEI_ECODE_RESTART(__ecode) (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
|
||||||
|
|
||||||
#endif /* __bpf__ */
|
|
||||||
#endif /* __USER_EXIT_INFO_H */
|
#endif /* __USER_EXIT_INFO_H */
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Define struct user_exit_info which is shared between BPF and userspace parts
|
||||||
|
* to communicate exit status and other information.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
|
||||||
|
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
|
||||||
|
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
|
||||||
|
*/
|
||||||
|
#ifndef __USER_EXIT_INFO_COMMON_H
|
||||||
|
#define __USER_EXIT_INFO_COMMON_H
|
||||||
|
|
||||||
|
#ifdef LSP
|
||||||
|
#include "../vmlinux.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum uei_sizes {
|
||||||
|
UEI_REASON_LEN = 128,
|
||||||
|
UEI_MSG_LEN = 1024,
|
||||||
|
UEI_DUMP_DFL_LEN = 32768,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct user_exit_info {
|
||||||
|
int kind;
|
||||||
|
s64 exit_code;
|
||||||
|
char reason[UEI_REASON_LEN];
|
||||||
|
char msg[UEI_MSG_LEN];
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __USER_EXIT_INFO_COMMON_H */
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
/*
|
/*
|
||||||
* A central FIFO sched_ext scheduler which demonstrates the followings:
|
* A central FIFO sched_ext scheduler which demonstrates the following:
|
||||||
*
|
*
|
||||||
* a. Making all scheduling decisions from one CPU:
|
* a. Making all scheduling decisions from one CPU:
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,7 @@ int main(int argc, char **argv)
|
||||||
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
|
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
|
||||||
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||||
|
|
||||||
|
assert(skel->rodata->nr_cpu_ids > 0);
|
||||||
assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
|
assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
|
||||||
|
|
||||||
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
|
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
|
||||||
|
|
|
||||||
|
|
@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops,
|
||||||
.cgroup_move = (void *)fcg_cgroup_move,
|
.cgroup_move = (void *)fcg_cgroup_move,
|
||||||
.init = (void *)fcg_init,
|
.init = (void *)fcg_init,
|
||||||
.exit = (void *)fcg_exit,
|
.exit = (void *)fcg_exit,
|
||||||
.flags = SCX_OPS_ENQ_EXITING,
|
.flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING,
|
||||||
.name = "flatcg");
|
.name = "flatcg");
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@
|
||||||
*/
|
*/
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
#include <assert.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <libgen.h>
|
#include <libgen.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
|
@ -137,6 +138,7 @@ int main(int argc, char **argv)
|
||||||
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
|
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
|
||||||
|
|
||||||
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
|
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
|
||||||
|
assert(skel->rodata->nr_cpus > 0);
|
||||||
skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
|
||||||
|
|
||||||
while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
|
while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
#include <assert.h>
|
||||||
#include <libgen.h>
|
#include <libgen.h>
|
||||||
#include <bpf/bpf.h>
|
#include <bpf/bpf.h>
|
||||||
#include <scx/common.h>
|
#include <scx/common.h>
|
||||||
|
|
@ -41,6 +42,7 @@ static void sigint_handler(int simple)
|
||||||
static void read_stats(struct scx_simple *skel, __u64 *stats)
|
static void read_stats(struct scx_simple *skel, __u64 *stats)
|
||||||
{
|
{
|
||||||
int nr_cpus = libbpf_num_possible_cpus();
|
int nr_cpus = libbpf_num_possible_cpus();
|
||||||
|
assert(nr_cpus > 0);
|
||||||
__u64 cnts[2][nr_cpus];
|
__u64 cnts[2][nr_cpus];
|
||||||
__u32 idx;
|
__u32 idx;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue