tracing: tprobe-events: Register tracepoint when enable tprobe event

As same as fprobe, register tracepoint stub function only when enabling
tprobe events. The major changes are introducing a list of
tracepoint_user and its lock, and tprobe_event_module_nb, which is
another module notifier for module loading/unloading.  By spliting the
lock from event_mutex and a module notifier for trace_fprobe, it
solved AB-BA lock dependency issue between event_mutex and
tracepoint_module_list_mutex.

Link: https://lore.kernel.org/all/174343538901.843280.423773753642677941.stgit@devnote2/

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
This commit is contained in:
Masami Hiramatsu (Google) 2025-04-01 00:36:29 +09:00
parent 434f6703ce
commit 2867495dea
1 changed files with 218 additions and 166 deletions

View File

@ -7,7 +7,9 @@
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <linux/fprobe.h> #include <linux/fprobe.h>
#include <linux/list.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/mutex.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
@ -37,15 +39,21 @@ static struct dyn_event_operations trace_fprobe_ops = {
.match = trace_fprobe_match, .match = trace_fprobe_match,
}; };
/* List of tracepoint_user */
static LIST_HEAD(tracepoint_user_list);
static DEFINE_MUTEX(tracepoint_user_mutex);
/* While living tracepoint_user, @tpoint can be NULL and @refcount != 0. */
struct tracepoint_user { struct tracepoint_user {
struct list_head list;
const char *name;
struct tracepoint *tpoint; struct tracepoint *tpoint;
unsigned int refcount; unsigned int refcount;
}; };
static bool tracepoint_user_is_registered(struct tracepoint_user *tuser) /* NOTE: you must lock tracepoint_user_mutex. */
{ #define for_each_tracepoint_user(tuser) \
return tuser && tuser->tpoint; list_for_each_entry(tuser, &tracepoint_user_list, list)
}
static int tracepoint_user_register(struct tracepoint_user *tuser) static int tracepoint_user_register(struct tracepoint_user *tuser)
{ {
@ -75,58 +83,111 @@ static unsigned long tracepoint_user_ip(struct tracepoint_user *tuser)
return (unsigned long)tuser->tpoint->probestub; return (unsigned long)tuser->tpoint->probestub;
} }
static bool tracepoint_user_within_module(struct tracepoint_user *tuser, static void __tracepoint_user_free(struct tracepoint_user *tuser)
struct module *mod)
{ {
return within_module(tracepoint_user_ip(tuser), mod); if (!tuser)
return;
kfree(tuser->name);
kfree(tuser);
} }
static struct tracepoint_user *tracepoint_user_allocate(struct tracepoint *tpoint) DEFINE_FREE(tuser_free, struct tracepoint_user *, __tracepoint_user_free(_T))
static struct tracepoint_user *__tracepoint_user_init(const char *name, struct tracepoint *tpoint)
{ {
struct tracepoint_user *tuser; struct tracepoint_user *tuser __free(tuser_free) = NULL;
int ret;
tuser = kzalloc(sizeof(*tuser), GFP_KERNEL); tuser = kzalloc(sizeof(*tuser), GFP_KERNEL);
if (!tuser) if (!tuser)
return NULL; return NULL;
tuser->name = kstrdup(name, GFP_KERNEL);
if (!tuser->name)
return NULL;
if (tpoint) {
ret = tracepoint_user_register(tuser);
if (ret)
return ERR_PTR(ret);
}
tuser->tpoint = tpoint; tuser->tpoint = tpoint;
tuser->refcount = 1; tuser->refcount = 1;
INIT_LIST_HEAD(&tuser->list);
list_add(&tuser->list, &tracepoint_user_list);
return_ptr(tuser);
}
static struct tracepoint *find_tracepoint(const char *tp_name,
struct module **tp_mod);
/*
* Get tracepoint_user if exist, or allocate new one and register it.
* If tracepoint is on a module, get its refcounter too.
* This returns errno or NULL (not loaded yet) or tracepoint_user.
*/
static struct tracepoint_user *tracepoint_user_find_get(const char *name, struct module **pmod)
{
struct module *mod __free(module_put) = NULL;
struct tracepoint_user *tuser;
struct tracepoint *tpoint;
if (!name || !pmod)
return ERR_PTR(-EINVAL);
/* Get and lock the module which has tracepoint. */
tpoint = find_tracepoint(name, &mod);
guard(mutex)(&tracepoint_user_mutex);
/* Search existing tracepoint_user */
for_each_tracepoint_user(tuser) {
if (!strcmp(tuser->name, name)) {
tuser->refcount++;
*pmod = no_free_ptr(mod);
return tuser;
}
}
/* The corresponding tracepoint_user is not found. */
tuser = __tracepoint_user_init(name, tpoint);
if (!IS_ERR_OR_NULL(tuser))
*pmod = no_free_ptr(mod);
return tuser; return tuser;
} }
/* These must be called with event_mutex */
static void tracepoint_user_get(struct tracepoint_user *tuser)
{
tuser->refcount++;
}
static void tracepoint_user_put(struct tracepoint_user *tuser) static void tracepoint_user_put(struct tracepoint_user *tuser)
{ {
scoped_guard(mutex, &tracepoint_user_mutex) {
if (--tuser->refcount > 0) if (--tuser->refcount > 0)
return; return;
if (tracepoint_user_is_registered(tuser)) list_del(&tuser->list);
tracepoint_user_unregister(tuser); tracepoint_user_unregister(tuser);
kfree(tuser);
} }
static const char *tracepoint_user_lookup(struct tracepoint_user *tuser, char *buf) __tracepoint_user_free(tuser);
{
struct tracepoint *tpoint = tuser->tpoint;
if (!tpoint)
return NULL;
return kallsyms_lookup((unsigned long)tpoint->probestub, NULL, NULL, NULL, buf);
} }
DEFINE_FREE(tuser_put, struct tracepoint_user *,
if (!IS_ERR_OR_NULL(_T))
tracepoint_user_put(_T))
/* /*
* Fprobe event core functions * Fprobe event core functions
*/ */
/*
* @tprobe is true for tracepoint probe.
* @tuser can be NULL if the trace_fprobe is disabled or the tracepoint is not
* loaded with a module. If @tuser != NULL, this trace_fprobe is enabled.
*/
struct trace_fprobe { struct trace_fprobe {
struct dyn_event devent; struct dyn_event devent;
struct fprobe fp; struct fprobe fp;
const char *symbol; const char *symbol;
bool tprobe;
struct tracepoint_user *tuser; struct tracepoint_user *tuser;
struct trace_probe tp; struct trace_probe tp;
}; };
@ -157,7 +218,7 @@ static bool trace_fprobe_is_return(struct trace_fprobe *tf)
static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf) static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf)
{ {
return tf->tuser != NULL; return tf->tprobe;
} }
static const char *trace_fprobe_symbol(struct trace_fprobe *tf) static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
@ -207,56 +268,6 @@ static bool trace_fprobe_is_registered(struct trace_fprobe *tf)
return fprobe_is_registered(&tf->fp); return fprobe_is_registered(&tf->fp);
} }
static struct tracepoint *find_tracepoint(const char *tp_name,
struct module **tp_mod);
/*
* Get tracepoint_user if exist, or allocate new one. If tracepoint is on a
* module, get its refcounter.
*/
static struct tracepoint_user *
trace_fprobe_get_tracepoint_user(const char *name, struct module **pmod)
{
struct tracepoint_user *tuser __free(kfree) = NULL;
struct tracepoint *tpoint;
struct trace_fprobe *tf;
struct dyn_event *dpos;
struct module *mod __free(module_put) = NULL;
int ret;
/*
* Find appropriate tracepoint and locking module.
* Note: tpoint can be NULL if it is unloaded (or failed to get module.)
*/
tpoint = find_tracepoint(name, &mod);
/* Search existing tracepoint_user */
for_each_trace_fprobe(tf, dpos) {
if (!trace_fprobe_is_tracepoint(tf))
continue;
if (!strcmp(tf->symbol, name)) {
tracepoint_user_get(tf->tuser);
*pmod = no_free_ptr(mod);
return tf->tuser;
}
}
/* Not found, allocate and register new tracepoint_user. */
tuser = tracepoint_user_allocate(tpoint);
if (!tuser)
return NULL;
if (tpoint) {
/* If the tracepoint is not loaded, tpoint can be NULL. */
ret = tracepoint_user_register(tuser);
if (ret)
return ERR_PTR(ret);
}
*pmod = no_free_ptr(mod);
return_ptr(tuser);
}
/* /*
* Note that we don't verify the fetch_insn code, since it does not come * Note that we don't verify the fetch_insn code, since it does not come
* from user space. * from user space.
@ -558,8 +569,8 @@ DEFINE_FREE(free_trace_fprobe, struct trace_fprobe *, if (!IS_ERR_OR_NULL(_T)) f
static struct trace_fprobe *alloc_trace_fprobe(const char *group, static struct trace_fprobe *alloc_trace_fprobe(const char *group,
const char *event, const char *event,
const char *symbol, const char *symbol,
struct tracepoint_user *tuser, int nargs, bool is_return,
int nargs, bool is_return) bool is_tracepoint)
{ {
struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; struct trace_fprobe *tf __free(free_trace_fprobe) = NULL;
int ret = -ENOMEM; int ret = -ENOMEM;
@ -577,7 +588,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
else else
tf->fp.entry_handler = fentry_dispatcher; tf->fp.entry_handler = fentry_dispatcher;
tf->tuser = tuser; tf->tprobe = is_tracepoint;
ret = trace_probe_init(&tf->tp, event, group, false, nargs); ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0) if (ret < 0)
@ -751,12 +762,35 @@ static int unregister_fprobe_event(struct trace_fprobe *tf)
static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf) static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf)
{ {
unsigned long ip = tracepoint_user_ip(tf->tuser); struct tracepoint_user *tuser __free(tuser_put) = NULL;
struct module *mod __free(module_put) = NULL;
unsigned long ip;
int ret;
if (!ip) if (WARN_ON_ONCE(tf->tuser))
return -EINVAL;
/* If the tracepoint is in a module, it must be locked in this function. */
tuser = tracepoint_user_find_get(tf->symbol, &mod);
/* This tracepoint is not loaded yet */
if (IS_ERR(tuser))
return PTR_ERR(tuser);
if (!tuser)
return -ENOMEM;
/* Register fprobe only if the tracepoint is loaded. */
if (tuser->tpoint) {
ip = tracepoint_user_ip(tuser);
if (WARN_ON_ONCE(!ip))
return -ENOENT; return -ENOENT;
return register_fprobe_ips(&tf->fp, &ip, 1); ret = register_fprobe_ips(&tf->fp, &ip, 1);
if (ret < 0)
return ret;
}
tf->tuser = no_free_ptr(tuser);
return 0;
} }
/* Returns an error if the target function is not available, or 0 */ /* Returns an error if the target function is not available, or 0 */
@ -764,16 +798,10 @@ static int trace_fprobe_verify_target(struct trace_fprobe *tf)
{ {
int ret; int ret;
if (trace_fprobe_is_tracepoint(tf)) { /* Tracepoint should have a stub function. */
if (trace_fprobe_is_tracepoint(tf))
/* This tracepoint is not loaded yet */
if (!tracepoint_user_is_registered(tf->tuser))
return 0; return 0;
/* We assume all stab function is tracable. */
return tracepoint_user_ip(tf->tuser) ? 0 : -ENOENT;
}
/* /*
* Note: since we don't lock the module, even if this succeeded, * Note: since we don't lock the module, even if this succeeded,
* register_fprobe() later can fail. * register_fprobe() later can fail.
@ -803,14 +831,8 @@ static int __register_trace_fprobe(struct trace_fprobe *tf)
tf->fp.flags &= ~FPROBE_FL_DISABLED; tf->fp.flags &= ~FPROBE_FL_DISABLED;
if (trace_fprobe_is_tracepoint(tf)) { if (trace_fprobe_is_tracepoint(tf))
/* This tracepoint is not loaded yet */
if (!tracepoint_user_is_registered(tf->tuser))
return 0;
return __regsiter_tracepoint_fprobe(tf); return __regsiter_tracepoint_fprobe(tf);
}
/* TODO: handle filter, nofilter or symbol list */ /* TODO: handle filter, nofilter or symbol list */
return register_fprobe(&tf->fp, tf->symbol, NULL); return register_fprobe(&tf->fp, tf->symbol, NULL);
@ -819,11 +841,9 @@ static int __register_trace_fprobe(struct trace_fprobe *tf)
/* Internal unregister function - just handle fprobe and flags */ /* Internal unregister function - just handle fprobe and flags */
static void __unregister_trace_fprobe(struct trace_fprobe *tf) static void __unregister_trace_fprobe(struct trace_fprobe *tf)
{ {
if (trace_fprobe_is_registered(tf)) { if (trace_fprobe_is_registered(tf))
unregister_fprobe(&tf->fp); unregister_fprobe(&tf->fp);
memset(&tf->fp, 0, sizeof(tf->fp)); if (tf->tuser) {
}
if (trace_fprobe_is_tracepoint(tf)) {
tracepoint_user_put(tf->tuser); tracepoint_user_put(tf->tuser);
tf->tuser = NULL; tf->tuser = NULL;
} }
@ -1027,59 +1047,93 @@ static struct tracepoint *find_tracepoint_in_module(struct module *mod,
return data.tpoint; return data.tpoint;
} }
/* These are CONFIG_MODULES=y specific functions. */
static bool tracepoint_user_within_module(struct tracepoint_user *tuser,
struct module *mod)
{
return within_module(tracepoint_user_ip(tuser), mod);
}
static int tracepoint_user_register_again(struct tracepoint_user *tuser,
struct tracepoint *tpoint)
{
tuser->tpoint = tpoint;
return tracepoint_user_register(tuser);
}
static void tracepoint_user_unregister_clear(struct tracepoint_user *tuser)
{
tracepoint_user_unregister(tuser);
tuser->tpoint = NULL;
}
/* module callback for tracepoint_user */
static int __tracepoint_probe_module_cb(struct notifier_block *self, static int __tracepoint_probe_module_cb(struct notifier_block *self,
unsigned long val, void *data) unsigned long val, void *data)
{ {
struct tp_module *tp_mod = data; struct tp_module *tp_mod = data;
struct tracepoint_user *tuser; struct tracepoint_user *tuser;
struct tracepoint *tpoint;
if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING)
return NOTIFY_DONE;
mutex_lock(&tracepoint_user_mutex);
for_each_tracepoint_user(tuser) {
if (val == MODULE_STATE_COMING) {
/* This is not a tracepoint in this module. Skip it. */
tpoint = find_tracepoint_in_module(tp_mod->mod, tuser->name);
if (!tpoint)
continue;
WARN_ON_ONCE(tracepoint_user_register_again(tuser, tpoint));
} else if (val == MODULE_STATE_GOING &&
tracepoint_user_within_module(tuser, tp_mod->mod)) {
/* Unregister all tracepoint_user in this module. */
tracepoint_user_unregister_clear(tuser);
}
}
mutex_unlock(&tracepoint_user_mutex);
return NOTIFY_DONE;
}
static struct notifier_block tracepoint_module_nb = {
.notifier_call = __tracepoint_probe_module_cb,
};
/* module callback for tprobe events */
static int __tprobe_event_module_cb(struct notifier_block *self,
unsigned long val, void *data)
{
struct trace_fprobe *tf; struct trace_fprobe *tf;
struct dyn_event *pos; struct dyn_event *pos;
struct module *mod = data;
if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING) if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING)
return NOTIFY_DONE; return NOTIFY_DONE;
mutex_lock(&event_mutex); mutex_lock(&event_mutex);
for_each_trace_fprobe(tf, pos) { for_each_trace_fprobe(tf, pos) {
if (!trace_fprobe_is_tracepoint(tf)) /* Skip fprobe and disabled tprobe events. */
if (!trace_fprobe_is_tracepoint(tf) || !tf->tuser)
continue; continue;
if (val == MODULE_STATE_COMING) { /* Before this notification, tracepoint notifier has already done. */
if (val == MODULE_STATE_COMING &&
tracepoint_user_within_module(tf->tuser, mod)) {
unsigned long ip = tracepoint_user_ip(tf->tuser);
WARN_ON_ONCE(register_fprobe_ips(&tf->fp, &ip, 1));
} else if (val == MODULE_STATE_GOING &&
/* /*
* If any tracepoint used by tprobe is in the module, * tracepoint_user_within_module() does not work here because
* register the stub. * tracepoint_user is already unregistered and cleared tpoint.
* Instead, checking whether the fprobe is registered but
* tpoint is cleared(unregistered). Such unbalance probes
* must be adjusted anyway.
*/ */
struct tracepoint *tpoint; trace_fprobe_is_registered(tf) &&
!tf->tuser->tpoint) {
tpoint = find_tracepoint_in_module(tp_mod->mod, tf->symbol);
/* This is not a tracepoint in this module. Skip it. */
if (!tpoint)
continue;
tuser = tf->tuser;
/* If the tracepoint is not registered yet, register it. */
if (!tracepoint_user_is_registered(tuser)) {
tuser->tpoint = tpoint;
if (WARN_ON_ONCE(tracepoint_user_register(tuser)))
continue;
}
/* Finally enable fprobe on this module. */
if (trace_probe_is_enabled(&tf->tp) && !trace_fprobe_is_registered(tf))
WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf));
} else if (val == MODULE_STATE_GOING) {
tuser = tf->tuser;
/* Unregister all tracepoint_user in this module. */
if (tracepoint_user_is_registered(tuser) &&
tracepoint_user_within_module(tuser, tp_mod->mod))
tracepoint_user_unregister(tuser);
/*
* Here we need to handle shared tracepoint_user case.
* Such tuser is unregistered, but trace_fprobe itself
* is registered. (Note this only handles tprobes.)
*/
if (!tracepoint_user_is_registered(tuser) &&
trace_fprobe_is_registered(tf))
unregister_fprobe(&tf->fp); unregister_fprobe(&tf->fp);
} }
} }
@ -1088,8 +1142,11 @@ static int __tracepoint_probe_module_cb(struct notifier_block *self,
return NOTIFY_DONE; return NOTIFY_DONE;
} }
static struct notifier_block tracepoint_module_nb = { /* NOTE: this must be called after tracepoint callback */
.notifier_call = __tracepoint_probe_module_cb, static struct notifier_block tprobe_event_module_nb = {
.notifier_call = __tprobe_event_module_cb,
/* Make sure this is later than tracepoint module notifier. */
.priority = -10,
}; };
#endif /* CONFIG_MODULES */ #endif /* CONFIG_MODULES */
@ -1149,10 +1206,6 @@ static int parse_symbol_and_return(int argc, const char *argv[],
return 0; return 0;
} }
DEFINE_FREE(tuser_put, struct tracepoint_user *,
if (!IS_ERR_OR_NULL(_T))
tracepoint_user_put(_T))
static int trace_fprobe_create_internal(int argc, const char *argv[], static int trace_fprobe_create_internal(int argc, const char *argv[],
struct traceprobe_parse_context *ctx) struct traceprobe_parse_context *ctx)
{ {
@ -1181,7 +1234,6 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
* FETCHARG:TYPE : use TYPE instead of unsigned long. * FETCHARG:TYPE : use TYPE instead of unsigned long.
*/ */
struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; struct trace_fprobe *tf __free(free_trace_fprobe) = NULL;
struct tracepoint_user *tuser __free(tuser_put) = NULL;
struct module *mod __free(module_put) = NULL; struct module *mod __free(module_put) = NULL;
int i, new_argc = 0, ret = 0; int i, new_argc = 0, ret = 0;
bool is_return = false; bool is_return = false;
@ -1244,22 +1296,20 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
else else
ctx->flags |= TPARG_FL_FENTRY; ctx->flags |= TPARG_FL_FENTRY;
ctx->funcname = NULL;
if (is_tracepoint) { if (is_tracepoint) {
/* Get tracepoint and lock its module until the end of the registration. */
struct tracepoint *tpoint;
ctx->flags |= TPARG_FL_TPOINT; ctx->flags |= TPARG_FL_TPOINT;
tuser = trace_fprobe_get_tracepoint_user(symbol, &mod); mod = NULL;
if (!tuser) tpoint = find_tracepoint(symbol, &mod);
return -ENOMEM; if (tpoint)
if (IS_ERR(tuser)) { ctx->funcname = kallsyms_lookup((unsigned long)tpoint->probestub,
trace_probe_log_set_index(1); NULL, NULL, NULL, sbuf);
trace_probe_log_err(0, NO_TRACEPOINT);
return PTR_ERR(tuser);
} }
ctx->funcname = tracepoint_user_lookup(tuser, sbuf);
/* If tracepoint is not loaded yet, use symbol name as funcname. */
if (!ctx->funcname) if (!ctx->funcname)
ctx->funcname = symbol; ctx->funcname = symbol;
} else
ctx->funcname = symbol;
argc -= 2; argv += 2; argc -= 2; argv += 2;
new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc, new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
@ -1281,14 +1331,13 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
return ret; return ret;
/* setup a probe */ /* setup a probe */
tf = alloc_trace_fprobe(group, event, symbol, tuser, argc, is_return); tf = alloc_trace_fprobe(group, event, symbol, argc, is_return, is_tracepoint);
if (IS_ERR(tf)) { if (IS_ERR(tf)) {
ret = PTR_ERR(tf); ret = PTR_ERR(tf);
/* This must return -ENOMEM, else there is a bug */ /* This must return -ENOMEM, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM); WARN_ON_ONCE(ret != -ENOMEM);
return ret; return ret;
} }
tuser = NULL; /* Move tuser to tf. */
/* parse arguments */ /* parse arguments */
for (i = 0; i < argc; i++) { for (i = 0; i < argc; i++) {
@ -1506,6 +1555,9 @@ static __init int init_fprobe_trace_early(void)
ret = register_tracepoint_module_notifier(&tracepoint_module_nb); ret = register_tracepoint_module_notifier(&tracepoint_module_nb);
if (ret) if (ret)
return ret; return ret;
ret = register_module_notifier(&tprobe_event_module_nb);
if (ret)
return ret;
#endif #endif
return 0; return 0;