bpf-next-for-netdev

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQ6NaUOruQGUkvPdG4raS+Z+3y5EwUCaNNwBQAKCRAraS+Z+3y5
 E8heAQDdJTR9rwAL7gD79cldlHP5PTmjyidLIoFG/efaGSbN1AD9EdvrykDU4xOG
 aGaO8TooGUZf7vAL8tIFuMeydYvi/gM=
 =Qu4T
 -----END PGP SIGNATURE-----

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Martin KaFai Lau says:

====================
pull-request: bpf-next 2025-09-23

We've added 9 non-merge commits during the last 33 day(s) which contain
a total of 10 files changed, 480 insertions(+), 53 deletions(-).

The main changes are:

1) A new bpf_xdp_pull_data kfunc that supports pulling data from
   a frag into the linear area of a xdp_buff, from Amery Hung.

   This includes changes in the xdp_native.bpf.c selftest, which
   Nimrod's future work depends on.

   It is a merge from a stable branch 'xdp_pull_data' which has
   also been merged to bpf-next.

   There is a conflict with recent changes in 'include/net/xdp.h'
   in the net-next tree that will need to be resolved.

2) A compiler warning fix when CONFIG_NET=n in the recent dynptr
   skb_meta support, from Jakub Sitnicki.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
  selftests: drv-net: Pull data before parsing headers
  selftests/bpf: Test bpf_xdp_pull_data
  bpf: Support specifying linear xdp packet data size for BPF_PROG_TEST_RUN
  bpf: Make variables in bpf_prog_test_run_xdp less confusing
  bpf: Clear packet pointers after changing packet data in kfuncs
  bpf: Support pulling non-linear xdp data
  bpf: Allow bpf_xdp_shrink_data to shrink a frag from head and tail
  bpf: Clear pfmemalloc flag when freeing all fragments
  bpf: Return an error pointer for skb metadata when CONFIG_NET=n
====================

Link: https://patch.msgid.link/20250924050303.2466356-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-09-24 10:21:39 -07:00
commit 5e3fee34f6
10 changed files with 480 additions and 53 deletions

View File

@ -1822,7 +1822,7 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset) static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
{ {
return NULL; return ERR_PTR(-EOPNOTSUPP);
} }
#endif /* CONFIG_NET */ #endif /* CONFIG_NET */

View File

@ -136,6 +136,11 @@ static __always_inline u32 xdp_buff_get_skb_flags(const struct xdp_buff *xdp)
return xdp->flags; return xdp->flags;
} }
static __always_inline void xdp_buff_clear_frag_pfmemalloc(struct xdp_buff *xdp)
{
xdp->flags &= ~XDP_FLAGS_FRAGS_PF_MEMALLOC;
}
static __always_inline void static __always_inline void
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
{ {

View File

@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
return ret; return ret;
} }
static inline void xsk_buff_del_tail(struct xdp_buff *tail) static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
{ {
struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
list_del(&xskb->list_node); list_del(&xskb->list_node);
} }
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
{
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
struct xdp_buff_xsk *frag;
frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
list_node);
return &frag->xdp;
}
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
{ {
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
@ -389,10 +399,15 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
return NULL; return NULL;
} }
static inline void xsk_buff_del_tail(struct xdp_buff *tail) static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
{ {
} }
static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
{
return NULL;
}
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
{ {
return NULL; return NULL;

View File

@ -12239,6 +12239,7 @@ enum special_kfunc_type {
KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_skb,
KF_bpf_dynptr_from_xdp, KF_bpf_dynptr_from_xdp,
KF_bpf_dynptr_from_skb_meta, KF_bpf_dynptr_from_skb_meta,
KF_bpf_xdp_pull_data,
KF_bpf_dynptr_slice, KF_bpf_dynptr_slice,
KF_bpf_dynptr_slice_rdwr, KF_bpf_dynptr_slice_rdwr,
KF_bpf_dynptr_clone, KF_bpf_dynptr_clone,
@ -12289,10 +12290,12 @@ BTF_ID(func, bpf_rbtree_right)
BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp) BTF_ID(func, bpf_dynptr_from_xdp)
BTF_ID(func, bpf_dynptr_from_skb_meta) BTF_ID(func, bpf_dynptr_from_skb_meta)
BTF_ID(func, bpf_xdp_pull_data)
#else #else
BTF_ID_UNUSED BTF_ID_UNUSED
BTF_ID_UNUSED BTF_ID_UNUSED
BTF_ID_UNUSED BTF_ID_UNUSED
BTF_ID_UNUSED
#endif #endif
BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_slice_rdwr)
@ -12362,6 +12365,11 @@ static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable]; return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
} }
static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
}
static enum kfunc_ptr_arg_type static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
struct bpf_kfunc_call_arg_meta *meta, struct bpf_kfunc_call_arg_meta *meta,
@ -14081,6 +14089,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
} }
} }
if (is_kfunc_pkt_changing(&meta))
clear_all_pkt_pointers(env);
nargs = btf_type_vlen(meta.func_proto); nargs = btf_type_vlen(meta.func_proto);
args = (const struct btf_param *)(meta.func_proto + 1); args = (const struct btf_param *)(meta.func_proto + 1);
for (i = 0; i < nargs; i++) { for (i = 0; i < nargs; i++) {
@ -17798,6 +17809,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
*/ */
if (ret == 0 && is_kfunc_sleepable(&meta)) if (ret == 0 && is_kfunc_sleepable(&meta))
mark_subprog_might_sleep(env, t); mark_subprog_might_sleep(env, t);
if (ret == 0 && is_kfunc_pkt_changing(&meta))
mark_subprog_changes_pkt_data(env, t);
} }
return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL); return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);

View File

@ -665,7 +665,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
void __user *data_in = u64_to_user_ptr(kattr->test.data_in); void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
void *data; void *data;
if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom) if (user_size > PAGE_SIZE - headroom - tailroom)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
size = SKB_DATA_ALIGN(size); size = SKB_DATA_ALIGN(size);
@ -1001,6 +1001,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
kattr->test.cpu || kattr->test.batch_size) kattr->test.cpu || kattr->test.batch_size)
return -EINVAL; return -EINVAL;
if (size < ETH_HLEN)
return -EINVAL;
data = bpf_test_init(kattr, kattr->test.data_size_in, data = bpf_test_init(kattr, kattr->test.data_size_in,
size, NET_SKB_PAD + NET_IP_ALIGN, size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
@ -1207,9 +1210,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
{ {
bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES); bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size;
u32 linear_sz = kattr->test.data_size_in;
u32 batch_size = kattr->test.batch_size; u32 batch_size = kattr->test.batch_size;
u32 retval = 0, duration, max_data_sz;
u32 size = kattr->test.data_size_in;
u32 headroom = XDP_PACKET_HEADROOM; u32 headroom = XDP_PACKET_HEADROOM;
u32 repeat = kattr->test.repeat; u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue; struct netdev_rx_queue *rxqueue;
@ -1246,39 +1249,45 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (ctx) { if (ctx) {
/* There can't be user provided data before the meta data */ /* There can't be user provided data before the meta data */
if (ctx->data_meta || ctx->data_end != size || if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in ||
ctx->data > ctx->data_end || ctx->data > ctx->data_end ||
unlikely(xdp_metalen_invalid(ctx->data)) || unlikely(xdp_metalen_invalid(ctx->data)) ||
(do_live && (kattr->test.data_out || kattr->test.ctx_out))) (do_live && (kattr->test.data_out || kattr->test.ctx_out)))
goto free_ctx; goto free_ctx;
/* Meta data is allocated from the headroom */ /* Meta data is allocated from the headroom */
headroom -= ctx->data; headroom -= ctx->data;
meta_sz = ctx->data;
linear_sz = ctx->data_end;
} }
max_data_sz = PAGE_SIZE - headroom - tailroom; max_linear_sz = PAGE_SIZE - headroom - tailroom;
if (size > max_data_sz) { linear_sz = min_t(u32, linear_sz, max_linear_sz);
/* disallow live data mode for jumbo frames */ /* disallow live data mode for jumbo frames */
if (do_live) if (do_live && kattr->test.data_size_in > linear_sz)
goto free_ctx; goto free_ctx;
size = max_data_sz;
}
data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom); if (kattr->test.data_size_in - meta_sz < ETH_HLEN)
return -EINVAL;
data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom);
if (IS_ERR(data)) { if (IS_ERR(data)) {
ret = PTR_ERR(data); ret = PTR_ERR(data);
goto free_ctx; goto free_ctx;
} }
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom; rxqueue->xdp_rxq.frag_size = PAGE_SIZE;
xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq); xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
xdp_prepare_buff(&xdp, data, headroom, size, true); xdp_prepare_buff(&xdp, data, headroom, linear_sz, true);
sinfo = xdp_get_shared_info_from_buff(&xdp); sinfo = xdp_get_shared_info_from_buff(&xdp);
ret = xdp_convert_md_to_buff(ctx, &xdp); ret = xdp_convert_md_to_buff(ctx, &xdp);
if (ret) if (ret)
goto free_data; goto free_data;
size = linear_sz;
if (unlikely(kattr->test.data_size_in > size)) { if (unlikely(kattr->test.data_size_in > size)) {
void __user *data_in = u64_to_user_ptr(kattr->test.data_in); void __user *data_in = u64_to_user_ptr(kattr->test.data_in);

View File

@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
return 0; return 0;
} }
static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
enum xdp_mem_type mem_type, bool release) bool tail, bool release)
{ {
struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp); struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
xsk_buff_get_head(xdp);
if (release) { if (release) {
xsk_buff_del_tail(zc_frag); xsk_buff_del_frag(zc_frag);
__xdp_return(0, mem_type, false, zc_frag);
} else { } else {
if (tail)
zc_frag->data_end -= shrink; zc_frag->data_end -= shrink;
else
zc_frag->data += shrink;
} }
return zc_frag;
} }
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag, static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
int shrink) int shrink, bool tail)
{ {
enum xdp_mem_type mem_type = xdp->rxq->mem.type; enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink; bool release = skb_frag_size(frag) == shrink;
netmem_ref netmem = skb_frag_netmem(frag);
struct xdp_buff *zc_frag = NULL;
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) { if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release); netmem = 0;
goto out; zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
} }
if (release) if (release) {
__xdp_return(skb_frag_netmem(frag), mem_type, false, NULL); __xdp_return(netmem, mem_type, false, zc_frag);
} else {
if (!tail)
skb_frag_off_add(frag, shrink);
skb_frag_size_sub(frag, shrink);
}
out:
return release; return release;
} }
@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
len_free += shrink; len_free += shrink;
offset -= shrink; offset -= shrink;
if (bpf_xdp_shrink_data(xdp, frag, shrink)) { if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
n_frags_free++; n_frags_free++;
} else {
skb_frag_size_sub(frag, shrink);
break;
}
} }
sinfo->nr_frags -= n_frags_free; sinfo->nr_frags -= n_frags_free;
sinfo->xdp_frags_size -= len_free; sinfo->xdp_frags_size -= len_free;
if (unlikely(!sinfo->nr_frags)) { if (unlikely(!sinfo->nr_frags)) {
xdp_buff_clear_frags_flag(xdp); xdp_buff_clear_frags_flag(xdp);
xdp_buff_clear_frag_pfmemalloc(xdp);
xdp->data_end -= offset; xdp->data_end -= offset;
} }
@ -12205,6 +12213,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
return 0; return 0;
} }
/**
* bpf_xdp_pull_data() - Pull in non-linear xdp data.
* @x: &xdp_md associated with the XDP buffer
* @len: length of data to be made directly accessible in the linear part
*
* Pull in data in case the XDP buffer associated with @x is non-linear and
* not all @len are in the linear data area.
*
* Direct packet access allows reading and writing linear XDP data through
* packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
* ends up in the linear part of the xdp_buff depends on the NIC and its
* configuration. When a frag-capable XDP program wants to directly access
* headers that may be in the non-linear area, call this kfunc to make sure
* the data is available in the linear area. Alternatively, use dynptr or
* bpf_xdp_{load,store}_bytes() to access data without pulling.
*
* This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
* headers in the non-linear data area.
*
* A call to this kfunc may reduce headroom. If there is not enough tailroom
* in the linear data area, metadata and data will be shifted down.
*
* A call to this kfunc is susceptible to change the buffer geometry.
* Therefore, at load time, all checks on pointers previously done by the
* verifier are invalidated and must be performed again, if the kfunc is used
* in combination with direct packet access.
*
* Return:
* * %0 - success
* * %-EINVAL - invalid len
*/
__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
{
struct xdp_buff *xdp = (struct xdp_buff *)x;
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
int i, delta, shift, headroom, tailroom, n_frags_free = 0;
void *data_hard_end = xdp_data_hard_end(xdp);
int data_len = xdp->data_end - xdp->data;
void *start;
if (len <= data_len)
return 0;
if (unlikely(len > xdp_get_buff_len(xdp)))
return -EINVAL;
start = xdp_data_meta_unsupported(xdp) ? xdp->data : xdp->data_meta;
headroom = start - xdp->data_hard_start - sizeof(struct xdp_frame);
tailroom = data_hard_end - xdp->data_end;
delta = len - data_len;
if (unlikely(delta > tailroom + headroom))
return -EINVAL;
shift = delta - tailroom;
if (shift > 0) {
memmove(start - shift, start, xdp->data_end - start);
xdp->data_meta -= shift;
xdp->data -= shift;
xdp->data_end -= shift;
}
for (i = 0; i < sinfo->nr_frags && delta; i++) {
skb_frag_t *frag = &sinfo->frags[i];
u32 shrink = min_t(u32, delta, skb_frag_size(frag));
memcpy(xdp->data_end, skb_frag_address(frag), shrink);
xdp->data_end += shrink;
sinfo->xdp_frags_size -= shrink;
delta -= shrink;
if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
n_frags_free++;
}
if (unlikely(n_frags_free)) {
memmove(sinfo->frags, sinfo->frags + n_frags_free,
(sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));
sinfo->nr_frags -= n_frags_free;
if (!sinfo->nr_frags) {
xdp_buff_clear_frags_flag(xdp);
xdp_buff_clear_frag_pfmemalloc(xdp);
}
}
return 0;
}
__bpf_kfunc_end_defs(); __bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@ -12232,6 +12332,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp) BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
BTF_ID_FLAGS(func, bpf_xdp_pull_data)
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp) BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr) BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)

View File

@ -97,9 +97,7 @@ void test_xdp_context_test_run(void)
/* Meta data must be 255 bytes or smaller */ /* Meta data must be 255 bytes or smaller */
test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0); test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
/* Total size of data must match data_end - data_meta */ /* Total size of data must be data_end - data_meta or larger */
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
sizeof(data) - 1, 0, 0, 0);
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
sizeof(data) + 1, 0, 0, 0); sizeof(data) + 1, 0, 0, 0);

View File

@ -0,0 +1,179 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
#include "test_xdp_pull_data.skel.h"
#define PULL_MAX (1 << 31)
#define PULL_PLUS_ONE (1 << 30)
#define XDP_PACKET_HEADROOM 256
/* Find headroom and tailroom occupied by struct xdp_frame and struct
* skb_shared_info so that we can calculate the maximum pull lengths for
* test cases. They might not be the real size of the structures due to
* cache alignment.
*/
static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz)
{
LIBBPF_OPTS(bpf_test_run_opts, topts);
struct xdp_md ctx = {};
int prog_fd, err;
__u8 *buf;
buf = calloc(frame_sz, sizeof(__u8));
if (!ASSERT_OK_PTR(buf, "calloc buf"))
return -ENOMEM;
topts.data_in = buf;
topts.data_out = buf;
topts.data_size_in = frame_sz;
topts.data_size_out = frame_sz;
/* Pass a data_end larger than the linear space available to make sure
* bpf_prog_test_run_xdp() will fill the linear data area so that
* xdp_find_sizes can infer the size of struct skb_shared_info
*/
ctx.data_end = frame_sz;
topts.ctx_in = &ctx;
topts.ctx_out = &ctx;
topts.ctx_size_in = sizeof(ctx);
topts.ctx_size_out = sizeof(ctx);
prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes);
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "bpf_prog_test_run_opts");
free(buf);
return err;
}
/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024]
* so caller expecting XDP_PASS should always pass pull_len no less than 1024
*/
static void run_test(struct test_xdp_pull_data *skel, int retval,
int frame_sz, int buff_len, int meta_len, int data_len,
int pull_len)
{
LIBBPF_OPTS(bpf_test_run_opts, topts);
struct xdp_md ctx = {};
int prog_fd, err;
__u8 *buf;
buf = calloc(buff_len, sizeof(__u8));
if (!ASSERT_OK_PTR(buf, "calloc buf"))
return;
buf[meta_len + 1023] = 0xaa;
buf[meta_len + 1024] = 0xbb;
buf[meta_len + 1025] = 0xcc;
topts.data_in = buf;
topts.data_out = buf;
topts.data_size_in = buff_len;
topts.data_size_out = buff_len;
ctx.data = meta_len;
ctx.data_end = meta_len + data_len;
topts.ctx_in = &ctx;
topts.ctx_out = &ctx;
topts.ctx_size_in = sizeof(ctx);
topts.ctx_size_out = sizeof(ctx);
skel->bss->data_len = data_len;
if (pull_len & PULL_MAX) {
int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz;
int tailroom = frame_sz - XDP_PACKET_HEADROOM -
data_len - skel->bss->sinfo_sz;
pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0;
pull_len += headroom + tailroom + data_len;
}
skel->bss->pull_len = pull_len;
prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog);
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "bpf_prog_test_run_opts");
ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval");
if (retval == XDP_DROP)
goto out;
ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size");
ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size");
/* Make sure data around xdp->data_end was not messed up by
* bpf_xdp_pull_data()
*/
ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]");
ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]");
ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]");
out:
free(buf);
}
static void test_xdp_pull_data_basic(void)
{
u32 pg_sz, max_meta_len, max_data_len;
struct test_xdp_pull_data *skel;
skel = test_xdp_pull_data__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load"))
return;
pg_sz = sysconf(_SC_PAGE_SIZE);
if (find_xdp_sizes(skel, pg_sz))
goto out;
max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz;
max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz;
/* linear xdp pkt, pull 0 byte */
run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048);
/* multi-buf pkt, pull results in linear xdp pkt */
run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048);
/* multi-buf pkt, pull 1 byte to linear data area */
run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025);
/* multi-buf pkt, pull 0 byte to linear data area */
run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025);
/* multi-buf pkt, empty linear data area, pull requires memmove */
run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX);
/* multi-buf pkt, no headroom */
run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX);
/* multi-buf pkt, no tailroom, pull requires memmove */
run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX);
/* Test cases with invalid pull length */
/* linear xdp pkt, pull more than total data len */
run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049);
/* multi-buf pkt with no space left in linear data area */
run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len,
PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, empty linear data area */
run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, no headroom */
run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024,
PULL_MAX | PULL_PLUS_ONE);
/* multi-buf pkt, no tailroom */
run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len,
PULL_MAX | PULL_PLUS_ONE);
out:
test_xdp_pull_data__destroy(skel);
}
void test_xdp_pull_data(void)
{
if (test__start_subtest("xdp_pull_data"))
test_xdp_pull_data_basic();
}

View File

@ -0,0 +1,48 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
int xdpf_sz;
int sinfo_sz;
int data_len;
int pull_len;
#define XDP_PACKET_HEADROOM 256
SEC("xdp.frags")
int xdp_find_sizes(struct xdp_md *ctx)
{
xdpf_sz = sizeof(struct xdp_frame);
sinfo_sz = __PAGE_SIZE - XDP_PACKET_HEADROOM -
(ctx->data_end - ctx->data);
return XDP_PASS;
}
SEC("xdp.frags")
int xdp_pull_data_prog(struct xdp_md *ctx)
{
__u8 *data_end = (void *)(long)ctx->data_end;
__u8 *data = (void *)(long)ctx->data;
__u8 *val_p;
int err;
if (data_len != data_end - data)
return XDP_DROP;
err = bpf_xdp_pull_data(ctx, pull_len);
if (err)
return XDP_DROP;
val_p = (void *)(long)ctx->data + 1024;
if (val_p + 1 > (void *)(long)ctx->data_end)
return XDP_DROP;
if (*val_p != 0xbb)
return XDP_DROP;
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";

View File

@ -14,6 +14,8 @@
#define MAX_PAYLOAD_LEN 5000 #define MAX_PAYLOAD_LEN 5000
#define MAX_HDR_LEN 64 #define MAX_HDR_LEN 64
extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak;
enum { enum {
XDP_MODE = 0, XDP_MODE = 0,
XDP_PORT = 1, XDP_PORT = 1,
@ -68,30 +70,57 @@ static void record_stats(struct xdp_md *ctx, __u32 stat_type)
static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
{ {
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct udphdr *udph = NULL; struct udphdr *udph = NULL;
struct ethhdr *eth = data; void *data, *data_end;
struct ethhdr *eth;
int err;
err = bpf_xdp_pull_data(ctx, sizeof(*eth));
if (err)
return NULL;
data_end = (void *)(long)ctx->data_end;
data = eth = (void *)(long)ctx->data;
if (data + sizeof(*eth) > data_end) if (data + sizeof(*eth) > data_end)
return NULL; return NULL;
if (eth->h_proto == bpf_htons(ETH_P_IP)) { if (eth->h_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = data + sizeof(*eth); struct iphdr *iph;
err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
sizeof(*udph));
if (err)
return NULL;
data_end = (void *)(long)ctx->data_end;
data = (void *)(long)ctx->data;
iph = data + sizeof(*eth);
if (iph + 1 > (struct iphdr *)data_end || if (iph + 1 > (struct iphdr *)data_end ||
iph->protocol != IPPROTO_UDP) iph->protocol != IPPROTO_UDP)
return NULL; return NULL;
udph = (void *)eth + sizeof(*iph) + sizeof(*eth); udph = data + sizeof(*iph) + sizeof(*eth);
} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
struct ipv6hdr *ipv6h = data + sizeof(*eth); struct ipv6hdr *ipv6h;
err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
sizeof(*udph));
if (err)
return NULL;
data_end = (void *)(long)ctx->data_end;
data = (void *)(long)ctx->data;
ipv6h = data + sizeof(*eth);
if (ipv6h + 1 > (struct ipv6hdr *)data_end || if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
ipv6h->nexthdr != IPPROTO_UDP) ipv6h->nexthdr != IPPROTO_UDP)
return NULL; return NULL;
udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); udph = data + sizeof(*ipv6h) + sizeof(*eth);
} else { } else {
return NULL; return NULL;
} }
@ -145,17 +174,34 @@ static void swap_machdr(void *data)
static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
{ {
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct udphdr *udph = NULL; struct udphdr *udph = NULL;
struct ethhdr *eth = data; void *data, *data_end;
struct ethhdr *eth;
int err;
err = bpf_xdp_pull_data(ctx, sizeof(*eth));
if (err)
return XDP_PASS;
data_end = (void *)(long)ctx->data_end;
data = eth = (void *)(long)ctx->data;
if (data + sizeof(*eth) > data_end) if (data + sizeof(*eth) > data_end)
return XDP_PASS; return XDP_PASS;
if (eth->h_proto == bpf_htons(ETH_P_IP)) { if (eth->h_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = data + sizeof(*eth); struct iphdr *iph;
__be32 tmp_ip = iph->saddr; __be32 tmp_ip;
err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
sizeof(*udph));
if (err)
return XDP_PASS;
data_end = (void *)(long)ctx->data_end;
data = (void *)(long)ctx->data;
iph = data + sizeof(*eth);
if (iph + 1 > (struct iphdr *)data_end || if (iph + 1 > (struct iphdr *)data_end ||
iph->protocol != IPPROTO_UDP) iph->protocol != IPPROTO_UDP)
@ -169,8 +215,10 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_PASS; return XDP_PASS;
record_stats(ctx, STATS_RX); record_stats(ctx, STATS_RX);
eth = data;
swap_machdr((void *)eth); swap_machdr((void *)eth);
tmp_ip = iph->saddr;
iph->saddr = iph->daddr; iph->saddr = iph->daddr;
iph->daddr = tmp_ip; iph->daddr = tmp_ip;
@ -179,8 +227,18 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_TX; return XDP_TX;
} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
struct ipv6hdr *ipv6h = data + sizeof(*eth);
struct in6_addr tmp_ipv6; struct in6_addr tmp_ipv6;
struct ipv6hdr *ipv6h;
err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
sizeof(*udph));
if (err)
return XDP_PASS;
data_end = (void *)(long)ctx->data_end;
data = (void *)(long)ctx->data;
ipv6h = data + sizeof(*eth);
if (ipv6h + 1 > (struct ipv6hdr *)data_end || if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
ipv6h->nexthdr != IPPROTO_UDP) ipv6h->nexthdr != IPPROTO_UDP)
@ -194,6 +252,7 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_PASS; return XDP_PASS;
record_stats(ctx, STATS_RX); record_stats(ctx, STATS_RX);
eth = data;
swap_machdr((void *)eth); swap_machdr((void *)eth);
__builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));