mirror of https://github.com/torvalds/linux.git
249 lines
5.5 KiB
C
249 lines
5.5 KiB
C
#include "vmlinux.h"
|
|
#include "bpf_tracing_net.h"
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_tracing.h>
|
|
#include "bpf_misc.h"
|
|
#include "bpf_kfuncs.h"
|
|
#include <errno.h>
|
|
|
|
__u32 monitored_pid = 0;
|
|
|
|
int nr_active;
|
|
int nr_snd;
|
|
int nr_passive;
|
|
int nr_sched;
|
|
int nr_txsw;
|
|
int nr_ack;
|
|
|
|
struct sk_stg {
|
|
__u64 sendmsg_ns; /* record ts when sendmsg is called */
|
|
};
|
|
|
|
struct sk_tskey {
|
|
u64 cookie;
|
|
u32 tskey;
|
|
};
|
|
|
|
struct delay_info {
|
|
u64 sendmsg_ns; /* record ts when sendmsg is called */
|
|
u32 sched_delay; /* SCHED_CB - sendmsg_ns */
|
|
u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */
|
|
u32 ack_delay; /* ACK_CB - SND_SW_CB */
|
|
};
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
|
|
__uint(map_flags, BPF_F_NO_PREALLOC);
|
|
__type(key, int);
|
|
__type(value, struct sk_stg);
|
|
} sk_stg_map SEC(".maps");
|
|
|
|
struct {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__type(key, struct sk_tskey);
|
|
__type(value, struct delay_info);
|
|
__uint(max_entries, 1024);
|
|
} time_map SEC(".maps");
|
|
|
|
static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */
|
|
|
|
extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym;
|
|
|
|
static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected)
|
|
{
|
|
int tmp, new = SK_BPF_CB_TX_TIMESTAMPING;
|
|
int opt = SK_BPF_CB_FLAGS;
|
|
int level = SOL_SOCKET;
|
|
|
|
if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected)
|
|
return 1;
|
|
|
|
if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected ||
|
|
(!expected && tmp != new))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk)
|
|
{
|
|
if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops)
|
|
{
|
|
u8 opt[3] = {0};
|
|
int load_flags = 0;
|
|
int ret;
|
|
|
|
ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags);
|
|
if (ret != -EOPNOTSUPP)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops)
|
|
{
|
|
int ret;
|
|
|
|
ret = bpf_sock_ops_cb_flags_set(skops, 0);
|
|
if (ret != -EOPNOTSUPP)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* In the timestamping callbacks, we're not allowed to call the following
|
|
* BPF CALLs for the safety concern. Return false if expected.
|
|
*/
|
|
static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops,
|
|
const struct sock *sk)
|
|
{
|
|
if (bpf_test_access_sockopt(skops, sk))
|
|
return true;
|
|
|
|
if (bpf_test_access_load_hdr_opt(skops))
|
|
return true;
|
|
|
|
if (bpf_test_access_cb_flags_set(skops))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk)
|
|
{
|
|
struct bpf_sock_ops_kern *skops_kern;
|
|
u64 timestamp = bpf_ktime_get_ns();
|
|
struct skb_shared_info *shinfo;
|
|
struct delay_info dinfo = {0};
|
|
struct sk_tskey key = {0};
|
|
struct delay_info *val;
|
|
struct sk_buff *skb;
|
|
struct sk_stg *stg;
|
|
u64 prior_ts, delay;
|
|
|
|
if (bpf_test_access_bpf_calls(skops, sk))
|
|
return false;
|
|
|
|
skops_kern = bpf_cast_to_kern_ctx(skops);
|
|
skb = skops_kern->skb;
|
|
shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info);
|
|
|
|
key.cookie = bpf_get_socket_cookie(skops);
|
|
if (!key.cookie)
|
|
return false;
|
|
|
|
if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) {
|
|
stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0);
|
|
if (!stg)
|
|
return false;
|
|
dinfo.sendmsg_ns = stg->sendmsg_ns;
|
|
bpf_sock_ops_enable_tx_tstamp(skops_kern, 0);
|
|
key.tskey = shinfo->tskey;
|
|
if (!key.tskey)
|
|
return false;
|
|
bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY);
|
|
return true;
|
|
}
|
|
|
|
key.tskey = shinfo->tskey;
|
|
if (!key.tskey)
|
|
return false;
|
|
|
|
val = bpf_map_lookup_elem(&time_map, &key);
|
|
if (!val)
|
|
return false;
|
|
|
|
switch (skops->op) {
|
|
case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
|
|
val->sched_delay = timestamp - val->sendmsg_ns;
|
|
delay = val->sched_delay;
|
|
break;
|
|
case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
|
|
prior_ts = val->sched_delay + val->sendmsg_ns;
|
|
val->snd_sw_delay = timestamp - prior_ts;
|
|
delay = val->snd_sw_delay;
|
|
break;
|
|
case BPF_SOCK_OPS_TSTAMP_ACK_CB:
|
|
prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns;
|
|
val->ack_delay = timestamp - prior_ts;
|
|
delay = val->ack_delay;
|
|
break;
|
|
}
|
|
|
|
if (delay >= delay_tolerance_nsec)
|
|
return false;
|
|
|
|
/* Since it's the last one, remove from the map after latency check */
|
|
if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB)
|
|
bpf_map_delete_elem(&time_map, &key);
|
|
|
|
return true;
|
|
}
|
|
|
|
SEC("fentry/tcp_sendmsg_locked")
|
|
int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg,
|
|
size_t size)
|
|
{
|
|
__u32 pid = bpf_get_current_pid_tgid() >> 32;
|
|
u64 timestamp = bpf_ktime_get_ns();
|
|
u32 flag = sk->sk_bpf_cb_flags;
|
|
struct sk_stg *stg;
|
|
|
|
if (pid != monitored_pid || !flag)
|
|
return 0;
|
|
|
|
stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
|
|
BPF_SK_STORAGE_GET_F_CREATE);
|
|
if (!stg)
|
|
return 0;
|
|
|
|
stg->sendmsg_ns = timestamp;
|
|
nr_snd += 1;
|
|
return 0;
|
|
}
|
|
|
|
SEC("sockops")
|
|
int skops_sockopt(struct bpf_sock_ops *skops)
|
|
{
|
|
struct bpf_sock *bpf_sk = skops->sk;
|
|
const struct sock *sk;
|
|
|
|
if (!bpf_sk)
|
|
return 1;
|
|
|
|
sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
|
|
if (!sk)
|
|
return 1;
|
|
|
|
switch (skops->op) {
|
|
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
|
|
nr_active += !bpf_test_sockopt(skops, sk, 0);
|
|
break;
|
|
case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB:
|
|
if (bpf_test_delay(skops, sk))
|
|
nr_snd += 1;
|
|
break;
|
|
case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
|
|
if (bpf_test_delay(skops, sk))
|
|
nr_sched += 1;
|
|
break;
|
|
case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
|
|
if (bpf_test_delay(skops, sk))
|
|
nr_txsw += 1;
|
|
break;
|
|
case BPF_SOCK_OPS_TSTAMP_ACK_CB:
|
|
if (bpf_test_delay(skops, sk))
|
|
nr_ack += 1;
|
|
break;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL";
|