mirror of https://github.com/torvalds/linux.git
Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp'
Daniel Borkmann says: ==================== netkit: Support for io_uring zero-copy and AF_XDP Containers use virtual netdevs to route traffic from a physical netdev in the host namespace. They do not have access to the physical netdev in the host and thus can't use memory providers or AF_XDP that require reconfiguring/restarting queues in the physical netdev. This patchset adds the concept of queue leasing to virtual netdevs that allow containers to use memory providers and AF_XDP at native speed. Leased queues are bound to a real queue in a physical netdev and act as a proxy. Memory providers and AF_XDP operations take an ifindex and queue id, so containers would pass in an ifindex for a virtual netdev and a queue id of a leased queue, which then gets proxied to the underlying real queue. We have implemented support for this concept in netkit and tested the latter against Nvidia ConnectX-6 (mlx5) as well as Broadcom BCM957504 (bnxt_en) 100G NICs. For more details see the individual patches. ==================== Link: https://patch.msgid.link/20260402231031.447597-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
1508922588
|
|
@ -339,6 +339,15 @@ attribute-sets:
|
|||
doc: XSK information for this queue, if any.
|
||||
type: nest
|
||||
nested-attributes: xsk-info
|
||||
-
|
||||
name: lease
|
||||
doc: |
|
||||
A queue from a virtual device can have a lease which refers to
|
||||
another queue from a physical device. This is useful for memory
|
||||
providers and AF_XDP operations which take an ifindex and queue id
|
||||
to allow applications to bind against virtual devices in containers.
|
||||
type: nest
|
||||
nested-attributes: lease
|
||||
-
|
||||
name: qstats
|
||||
doc: |
|
||||
|
|
@ -537,6 +546,26 @@ attribute-sets:
|
|||
name: id
|
||||
-
|
||||
name: type
|
||||
-
|
||||
name: lease
|
||||
attributes:
|
||||
-
|
||||
name: ifindex
|
||||
doc: The netdev ifindex to lease the queue from.
|
||||
type: u32
|
||||
checks:
|
||||
min: 1
|
||||
-
|
||||
name: queue
|
||||
doc: The netdev queue to lease from.
|
||||
type: nest
|
||||
nested-attributes: queue-id
|
||||
-
|
||||
name: netns-id
|
||||
doc: The network namespace id of the netdev.
|
||||
type: s32
|
||||
checks:
|
||||
min: 0
|
||||
-
|
||||
name: dmabuf
|
||||
attributes:
|
||||
|
|
@ -686,6 +715,7 @@ operations:
|
|||
- dmabuf
|
||||
- io-uring
|
||||
- xsk
|
||||
- lease
|
||||
dump:
|
||||
request:
|
||||
attributes:
|
||||
|
|
@ -797,6 +827,22 @@ operations:
|
|||
reply:
|
||||
attributes:
|
||||
- id
|
||||
-
|
||||
name: queue-create
|
||||
doc: |
|
||||
Create a new queue for the given netdevice. Whether this operation
|
||||
is supported depends on the device and the driver.
|
||||
attribute-set: queue
|
||||
flags: [admin-perm]
|
||||
do:
|
||||
request:
|
||||
attributes:
|
||||
- ifindex
|
||||
- type
|
||||
- lease
|
||||
reply: &queue-create-op
|
||||
attributes:
|
||||
- id
|
||||
|
||||
kernel-family:
|
||||
headers: ["net/netdev_netlink.h"]
|
||||
|
|
|
|||
|
|
@ -825,6 +825,13 @@ definitions:
|
|||
entries:
|
||||
- name: none
|
||||
- name: default
|
||||
-
|
||||
name: netkit-pairing
|
||||
type: enum
|
||||
enum-name: netkit-pairing
|
||||
entries:
|
||||
- name: pair
|
||||
- name: single
|
||||
-
|
||||
name: ovpn-mode
|
||||
enum-name: ovpn-mode
|
||||
|
|
@ -2299,6 +2306,10 @@ attribute-sets:
|
|||
-
|
||||
name: tailroom
|
||||
type: u16
|
||||
-
|
||||
name: pairing
|
||||
type: u32
|
||||
enum: netkit-pairing
|
||||
-
|
||||
name: linkinfo-ovpn-attrs
|
||||
name-prefix: ifla-ovpn-
|
||||
|
|
|
|||
|
|
@ -329,6 +329,12 @@ by setting ``request_ops_lock`` to true. Code comments and docs refer
|
|||
to drivers which have ops called under the instance lock as "ops locked".
|
||||
See also the documentation of the ``lock`` member of struct net_device.
|
||||
|
||||
There is also a case of taking two per-netdev locks in sequence when netdev
|
||||
queues are leased, that is, the netdev-scope lock is taken for both the
|
||||
virtual and the physical device. To prevent deadlocks, the virtual device's
|
||||
lock must always be acquired before the physical device's (see
|
||||
``netdev_nl_queue_create_doit``).
|
||||
|
||||
In the future, there will be an option for individual
|
||||
drivers to opt out of using ``rtnl_lock`` and instead perform their control
|
||||
operations directly under the netdev instance lock.
|
||||
|
|
|
|||
|
|
@ -9,11 +9,21 @@
|
|||
#include <linux/bpf_mprog.h>
|
||||
#include <linux/indirect_call_wrapper.h>
|
||||
|
||||
#include <net/netdev_lock.h>
|
||||
#include <net/netdev_queues.h>
|
||||
#include <net/netdev_rx_queue.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
#include <net/netkit.h>
|
||||
#include <net/dst.h>
|
||||
#include <net/tcx.h>
|
||||
|
||||
#define DRV_NAME "netkit"
|
||||
#define NETKIT_DRV_NAME "netkit"
|
||||
|
||||
#define NETKIT_NUM_RX_QUEUES_MAX 1024
|
||||
#define NETKIT_NUM_TX_QUEUES_MAX 1
|
||||
|
||||
#define NETKIT_NUM_RX_QUEUES_REAL 1
|
||||
#define NETKIT_NUM_TX_QUEUES_REAL 1
|
||||
|
||||
struct netkit {
|
||||
__cacheline_group_begin(netkit_fastpath);
|
||||
|
|
@ -26,6 +36,7 @@ struct netkit {
|
|||
|
||||
__cacheline_group_begin(netkit_slowpath);
|
||||
enum netkit_mode mode;
|
||||
enum netkit_pairing pair;
|
||||
bool primary;
|
||||
u32 headroom;
|
||||
__cacheline_group_end(netkit_slowpath);
|
||||
|
|
@ -36,6 +47,8 @@ struct netkit_link {
|
|||
struct net_device *dev;
|
||||
};
|
||||
|
||||
static struct rtnl_link_ops netkit_link_ops;
|
||||
|
||||
static __always_inline int
|
||||
netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
|
||||
enum netkit_action ret)
|
||||
|
|
@ -135,6 +148,10 @@ static int netkit_open(struct net_device *dev)
|
|||
struct netkit *nk = netkit_priv(dev);
|
||||
struct net_device *peer = rtnl_dereference(nk->peer);
|
||||
|
||||
if (nk->pair == NETKIT_DEVICE_SINGLE) {
|
||||
netif_carrier_on(dev);
|
||||
return 0;
|
||||
}
|
||||
if (!peer)
|
||||
return -ENOTCONN;
|
||||
if (peer->flags & IFF_UP) {
|
||||
|
|
@ -194,16 +211,17 @@ static void netkit_set_headroom(struct net_device *dev, int headroom)
|
|||
|
||||
rcu_read_lock();
|
||||
peer = rcu_dereference(nk->peer);
|
||||
if (unlikely(!peer))
|
||||
goto out;
|
||||
if (!peer) {
|
||||
nk->headroom = headroom;
|
||||
dev->needed_headroom = headroom;
|
||||
} else {
|
||||
nk2 = netkit_priv(peer);
|
||||
nk->headroom = headroom;
|
||||
headroom = max(nk->headroom, nk2->headroom);
|
||||
|
||||
nk2 = netkit_priv(peer);
|
||||
nk->headroom = headroom;
|
||||
headroom = max(nk->headroom, nk2->headroom);
|
||||
|
||||
peer->needed_headroom = headroom;
|
||||
dev->needed_headroom = headroom;
|
||||
out:
|
||||
peer->needed_headroom = headroom;
|
||||
dev->needed_headroom = headroom;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
|
@ -219,9 +237,96 @@ static void netkit_get_stats(struct net_device *dev,
|
|||
stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
|
||||
}
|
||||
|
||||
static bool netkit_xsk_supported_at_phys(const struct net_device *dev)
|
||||
{
|
||||
if (!dev->netdev_ops->ndo_bpf ||
|
||||
!dev->netdev_ops->ndo_xdp_xmit ||
|
||||
!dev->netdev_ops->ndo_xsk_wakeup)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int netkit_xsk(struct net_device *dev, struct netdev_bpf *xdp)
|
||||
{
|
||||
struct netkit *nk = netkit_priv(dev);
|
||||
struct netdev_bpf xdp_lower;
|
||||
struct netdev_rx_queue *rxq;
|
||||
struct net_device *phys;
|
||||
bool create = false;
|
||||
int ret = -EBUSY;
|
||||
|
||||
switch (xdp->command) {
|
||||
case XDP_SETUP_XSK_POOL:
|
||||
if (nk->pair == NETKIT_DEVICE_PAIR)
|
||||
return -EOPNOTSUPP;
|
||||
if (xdp->xsk.queue_id >= dev->real_num_rx_queues)
|
||||
return -EINVAL;
|
||||
|
||||
rxq = __netif_get_rx_queue(dev, xdp->xsk.queue_id);
|
||||
if (!rxq->lease)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
phys = rxq->lease->dev;
|
||||
if (!netkit_xsk_supported_at_phys(phys))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
create = xdp->xsk.pool;
|
||||
memcpy(&xdp_lower, xdp, sizeof(xdp_lower));
|
||||
xdp_lower.xsk.queue_id = get_netdev_rx_queue_index(rxq->lease);
|
||||
break;
|
||||
case XDP_SETUP_PROG:
|
||||
return -EOPNOTSUPP;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
netdev_lock(phys);
|
||||
if (create &&
|
||||
(phys->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
if (!create || !dev_get_min_mp_channel_count(phys))
|
||||
ret = phys->netdev_ops->ndo_bpf(phys, &xdp_lower);
|
||||
out:
|
||||
netdev_unlock(phys);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int netkit_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
|
||||
{
|
||||
struct netdev_rx_queue *rxq, *rxq_lease;
|
||||
struct net_device *phys;
|
||||
|
||||
if (queue_id >= dev->real_num_rx_queues)
|
||||
return -EINVAL;
|
||||
|
||||
rxq = __netif_get_rx_queue(dev, queue_id);
|
||||
rxq_lease = READ_ONCE(rxq->lease);
|
||||
if (unlikely(!rxq_lease))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* netkit_xsk already validated full xsk support, hence it's
|
||||
* fine to call into ndo_xsk_wakeup right away given this
|
||||
* was a prerequisite to get here in the first place. The
|
||||
* phys xsk support cannot change without tearing down the
|
||||
* device (which clears the lease first).
|
||||
*/
|
||||
phys = rxq_lease->dev;
|
||||
return phys->netdev_ops->ndo_xsk_wakeup(phys,
|
||||
get_netdev_rx_queue_index(rxq_lease), flags);
|
||||
}
|
||||
|
||||
static int netkit_init(struct net_device *dev)
|
||||
{
|
||||
netdev_lockdep_set_classes(dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void netkit_uninit(struct net_device *dev);
|
||||
|
||||
static const struct net_device_ops netkit_netdev_ops = {
|
||||
.ndo_init = netkit_init,
|
||||
.ndo_open = netkit_open,
|
||||
.ndo_stop = netkit_close,
|
||||
.ndo_start_xmit = netkit_xmit,
|
||||
|
|
@ -232,19 +337,104 @@ static const struct net_device_ops netkit_netdev_ops = {
|
|||
.ndo_get_peer_dev = netkit_peer_dev,
|
||||
.ndo_get_stats64 = netkit_get_stats,
|
||||
.ndo_uninit = netkit_uninit,
|
||||
.ndo_bpf = netkit_xsk,
|
||||
.ndo_xsk_wakeup = netkit_xsk_wakeup,
|
||||
.ndo_features_check = passthru_features_check,
|
||||
};
|
||||
|
||||
static void netkit_get_drvinfo(struct net_device *dev,
|
||||
struct ethtool_drvinfo *info)
|
||||
{
|
||||
strscpy(info->driver, DRV_NAME, sizeof(info->driver));
|
||||
strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver));
|
||||
}
|
||||
|
||||
static const struct ethtool_ops netkit_ethtool_ops = {
|
||||
.get_drvinfo = netkit_get_drvinfo,
|
||||
};
|
||||
|
||||
static int netkit_queue_create(struct net_device *dev,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct netkit *nk = netkit_priv(dev);
|
||||
u32 rxq_count_old, rxq_count_new;
|
||||
int err;
|
||||
|
||||
rxq_count_old = dev->real_num_rx_queues;
|
||||
rxq_count_new = rxq_count_old + 1;
|
||||
|
||||
/* In paired mode, only the non-primary (peer) device can
|
||||
* create leased queues since the primary is the management
|
||||
* side. In single device mode, leasing is always allowed.
|
||||
*/
|
||||
if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary) {
|
||||
NL_SET_ERR_MSG(extack,
|
||||
"netkit can only lease against the peer device");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
err = netif_set_real_num_rx_queues(dev, rxq_count_new);
|
||||
if (err) {
|
||||
if (rxq_count_new > dev->num_rx_queues)
|
||||
NL_SET_ERR_MSG(extack,
|
||||
"netkit maximum queue limit reached");
|
||||
else
|
||||
NL_SET_ERR_MSG_FMT(extack,
|
||||
"netkit cannot create more queues err=%d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
return rxq_count_old;
|
||||
}
|
||||
|
||||
static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = {
|
||||
.ndo_queue_create = netkit_queue_create,
|
||||
};
|
||||
|
||||
static struct net_device *netkit_alloc(struct nlattr *tb[],
|
||||
const char *ifname,
|
||||
unsigned char name_assign_type,
|
||||
unsigned int num_tx_queues,
|
||||
unsigned int num_rx_queues)
|
||||
{
|
||||
const struct rtnl_link_ops *ops = &netkit_link_ops;
|
||||
struct net_device *dev;
|
||||
|
||||
if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX ||
|
||||
num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
dev = alloc_netdev_mqs(ops->priv_size, ifname,
|
||||
name_assign_type, ops->setup,
|
||||
num_tx_queues, num_rx_queues);
|
||||
if (dev) {
|
||||
dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL;
|
||||
dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL;
|
||||
}
|
||||
return dev;
|
||||
}
|
||||
|
||||
static void netkit_queue_unlease(struct net_device *dev)
|
||||
{
|
||||
struct netdev_rx_queue *rxq, *rxq_lease;
|
||||
struct net_device *dev_lease;
|
||||
int i;
|
||||
|
||||
if (dev->real_num_rx_queues == 1)
|
||||
return;
|
||||
|
||||
netdev_lock(dev);
|
||||
for (i = 1; i < dev->real_num_rx_queues; i++) {
|
||||
rxq = __netif_get_rx_queue(dev, i);
|
||||
rxq_lease = rxq->lease;
|
||||
dev_lease = rxq_lease->dev;
|
||||
|
||||
netdev_lock(dev_lease);
|
||||
netdev_rx_queue_unlease(rxq, rxq_lease);
|
||||
netdev_unlock(dev_lease);
|
||||
}
|
||||
netdev_unlock(dev);
|
||||
}
|
||||
|
||||
static void netkit_setup(struct net_device *dev)
|
||||
{
|
||||
static const netdev_features_t netkit_features_hw_vlan =
|
||||
|
|
@ -275,8 +465,9 @@ static void netkit_setup(struct net_device *dev)
|
|||
dev->priv_flags |= IFF_DISABLE_NETPOLL;
|
||||
dev->lltx = true;
|
||||
|
||||
dev->ethtool_ops = &netkit_ethtool_ops;
|
||||
dev->netdev_ops = &netkit_netdev_ops;
|
||||
dev->netdev_ops = &netkit_netdev_ops;
|
||||
dev->ethtool_ops = &netkit_ethtool_ops;
|
||||
dev->queue_mgmt_ops = &netkit_queue_mgmt_ops;
|
||||
|
||||
dev->features |= netkit_features;
|
||||
dev->hw_features = netkit_features;
|
||||
|
|
@ -325,8 +516,6 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct rtnl_link_ops netkit_link_ops;
|
||||
|
||||
static int netkit_new_link(struct net_device *dev,
|
||||
struct rtnl_newlink_params *params,
|
||||
struct netlink_ext_ack *extack)
|
||||
|
|
@ -335,15 +524,17 @@ static int netkit_new_link(struct net_device *dev,
|
|||
enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
|
||||
enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
|
||||
struct nlattr *peer_tb[IFLA_MAX + 1], **tbp, *attr;
|
||||
enum netkit_pairing pair = NETKIT_DEVICE_PAIR;
|
||||
enum netkit_action policy_prim = NETKIT_PASS;
|
||||
enum netkit_action policy_peer = NETKIT_PASS;
|
||||
bool seen_peer = false, seen_scrub = false;
|
||||
struct nlattr **data = params->data;
|
||||
enum netkit_mode mode = NETKIT_L3;
|
||||
unsigned char ifname_assign_type;
|
||||
struct nlattr **tb = params->tb;
|
||||
u16 headroom = 0, tailroom = 0;
|
||||
struct ifinfomsg *ifmp = NULL;
|
||||
struct net_device *peer;
|
||||
struct net_device *peer = NULL;
|
||||
char ifname[IFNAMSIZ];
|
||||
struct netkit *nk;
|
||||
int err;
|
||||
|
|
@ -380,6 +571,13 @@ static int netkit_new_link(struct net_device *dev,
|
|||
headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]);
|
||||
if (data[IFLA_NETKIT_TAILROOM])
|
||||
tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]);
|
||||
if (data[IFLA_NETKIT_PAIRING])
|
||||
pair = nla_get_u32(data[IFLA_NETKIT_PAIRING]);
|
||||
|
||||
seen_scrub = data[IFLA_NETKIT_SCRUB];
|
||||
seen_peer = data[IFLA_NETKIT_PEER_INFO] ||
|
||||
data[IFLA_NETKIT_PEER_SCRUB] ||
|
||||
data[IFLA_NETKIT_PEER_POLICY];
|
||||
}
|
||||
|
||||
if (ifmp && tbp[IFLA_IFNAME]) {
|
||||
|
|
@ -392,45 +590,47 @@ static int netkit_new_link(struct net_device *dev,
|
|||
if (mode != NETKIT_L2 &&
|
||||
(tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
|
||||
return -EOPNOTSUPP;
|
||||
if (pair == NETKIT_DEVICE_SINGLE &&
|
||||
(tb != tbp || seen_peer || seen_scrub ||
|
||||
policy_prim != NETKIT_PASS))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
peer = rtnl_create_link(peer_net, ifname, ifname_assign_type,
|
||||
&netkit_link_ops, tbp, extack);
|
||||
if (IS_ERR(peer))
|
||||
return PTR_ERR(peer);
|
||||
if (pair == NETKIT_DEVICE_PAIR) {
|
||||
peer = rtnl_create_link(peer_net, ifname, ifname_assign_type,
|
||||
&netkit_link_ops, tbp, extack);
|
||||
if (IS_ERR(peer))
|
||||
return PTR_ERR(peer);
|
||||
|
||||
netif_inherit_tso_max(peer, dev);
|
||||
if (headroom) {
|
||||
peer->needed_headroom = headroom;
|
||||
dev->needed_headroom = headroom;
|
||||
netif_inherit_tso_max(peer, dev);
|
||||
if (headroom)
|
||||
peer->needed_headroom = headroom;
|
||||
if (tailroom)
|
||||
peer->needed_tailroom = tailroom;
|
||||
if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
|
||||
eth_hw_addr_random(peer);
|
||||
if (ifmp && dev->ifindex)
|
||||
peer->ifindex = ifmp->ifi_index;
|
||||
|
||||
nk = netkit_priv(peer);
|
||||
nk->primary = false;
|
||||
nk->policy = policy_peer;
|
||||
nk->scrub = scrub_peer;
|
||||
nk->mode = mode;
|
||||
nk->pair = pair;
|
||||
nk->headroom = headroom;
|
||||
bpf_mprog_bundle_init(&nk->bundle);
|
||||
|
||||
err = register_netdevice(peer);
|
||||
if (err < 0)
|
||||
goto err_register_peer;
|
||||
netif_carrier_off(peer);
|
||||
if (mode == NETKIT_L2)
|
||||
dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
|
||||
|
||||
err = rtnl_configure_link(peer, NULL, 0, NULL);
|
||||
if (err < 0)
|
||||
goto err_configure_peer;
|
||||
}
|
||||
if (tailroom) {
|
||||
peer->needed_tailroom = tailroom;
|
||||
dev->needed_tailroom = tailroom;
|
||||
}
|
||||
|
||||
if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
|
||||
eth_hw_addr_random(peer);
|
||||
if (ifmp && dev->ifindex)
|
||||
peer->ifindex = ifmp->ifi_index;
|
||||
|
||||
nk = netkit_priv(peer);
|
||||
nk->primary = false;
|
||||
nk->policy = policy_peer;
|
||||
nk->scrub = scrub_peer;
|
||||
nk->mode = mode;
|
||||
nk->headroom = headroom;
|
||||
bpf_mprog_bundle_init(&nk->bundle);
|
||||
|
||||
err = register_netdevice(peer);
|
||||
if (err < 0)
|
||||
goto err_register_peer;
|
||||
netif_carrier_off(peer);
|
||||
if (mode == NETKIT_L2)
|
||||
dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
|
||||
|
||||
err = rtnl_configure_link(peer, NULL, 0, NULL);
|
||||
if (err < 0)
|
||||
goto err_configure_peer;
|
||||
|
||||
if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
|
||||
eth_hw_addr_random(dev);
|
||||
|
|
@ -438,15 +638,23 @@ static int netkit_new_link(struct net_device *dev,
|
|||
nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
|
||||
else
|
||||
strscpy(dev->name, "nk%d", IFNAMSIZ);
|
||||
if (headroom)
|
||||
dev->needed_headroom = headroom;
|
||||
if (tailroom)
|
||||
dev->needed_tailroom = tailroom;
|
||||
|
||||
nk = netkit_priv(dev);
|
||||
nk->primary = true;
|
||||
nk->policy = policy_prim;
|
||||
nk->scrub = scrub_prim;
|
||||
nk->mode = mode;
|
||||
nk->pair = pair;
|
||||
nk->headroom = headroom;
|
||||
bpf_mprog_bundle_init(&nk->bundle);
|
||||
|
||||
if (pair == NETKIT_DEVICE_SINGLE)
|
||||
xdp_set_features_flag(dev, NETDEV_XDP_ACT_XSK);
|
||||
|
||||
err = register_netdevice(dev);
|
||||
if (err < 0)
|
||||
goto err_configure_peer;
|
||||
|
|
@ -455,10 +663,12 @@ static int netkit_new_link(struct net_device *dev,
|
|||
dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);
|
||||
|
||||
rcu_assign_pointer(netkit_priv(dev)->peer, peer);
|
||||
rcu_assign_pointer(netkit_priv(peer)->peer, dev);
|
||||
if (peer)
|
||||
rcu_assign_pointer(netkit_priv(peer)->peer, dev);
|
||||
return 0;
|
||||
err_configure_peer:
|
||||
unregister_netdevice(peer);
|
||||
if (peer)
|
||||
unregister_netdevice(peer);
|
||||
return err;
|
||||
err_register_peer:
|
||||
free_netdev(peer);
|
||||
|
|
@ -518,6 +728,8 @@ static struct net_device *netkit_dev_fetch(struct net *net, u32 ifindex, u32 whi
|
|||
nk = netkit_priv(dev);
|
||||
if (!nk->primary)
|
||||
return ERR_PTR(-EACCES);
|
||||
if (nk->pair == NETKIT_DEVICE_SINGLE)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
if (which == BPF_NETKIT_PEER) {
|
||||
dev = rcu_dereference_rtnl(nk->peer);
|
||||
if (!dev)
|
||||
|
|
@ -844,6 +1056,7 @@ static void netkit_release_all(struct net_device *dev)
|
|||
static void netkit_uninit(struct net_device *dev)
|
||||
{
|
||||
netkit_release_all(dev);
|
||||
netkit_queue_unlease(dev);
|
||||
}
|
||||
|
||||
static void netkit_del_link(struct net_device *dev, struct list_head *head)
|
||||
|
|
@ -856,7 +1069,15 @@ static void netkit_del_link(struct net_device *dev, struct list_head *head)
|
|||
if (peer) {
|
||||
nk = netkit_priv(peer);
|
||||
RCU_INIT_POINTER(nk->peer, NULL);
|
||||
unregister_netdevice_queue(peer, head);
|
||||
/* Guard against the peer already being in an unregister
|
||||
* list (e.g. same-namespace teardown where the peer is
|
||||
* in the caller's dev_kill_list). list_move_tail() on an
|
||||
* already-queued device would otherwise corrupt that
|
||||
* list's iteration. This situation can occur via netkit
|
||||
* notifier, hence guard against this scenario.
|
||||
*/
|
||||
if (!unregister_netdevice_queued(peer))
|
||||
unregister_netdevice_queue(peer, head);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -879,6 +1100,7 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
|
|||
{ IFLA_NETKIT_PEER_INFO, "peer info" },
|
||||
{ IFLA_NETKIT_HEADROOM, "headroom" },
|
||||
{ IFLA_NETKIT_TAILROOM, "tailroom" },
|
||||
{ IFLA_NETKIT_PAIRING, "pairing" },
|
||||
};
|
||||
|
||||
if (!nk->primary) {
|
||||
|
|
@ -898,9 +1120,11 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
|
|||
}
|
||||
|
||||
if (data[IFLA_NETKIT_POLICY]) {
|
||||
err = -EOPNOTSUPP;
|
||||
attr = data[IFLA_NETKIT_POLICY];
|
||||
policy = nla_get_u32(attr);
|
||||
err = netkit_check_policy(policy, attr, extack);
|
||||
if (nk->pair == NETKIT_DEVICE_PAIR)
|
||||
err = netkit_check_policy(policy, attr, extack);
|
||||
if (err)
|
||||
return err;
|
||||
WRITE_ONCE(nk->policy, policy);
|
||||
|
|
@ -921,6 +1145,50 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void netkit_check_lease_unregister(struct net_device *dev)
|
||||
{
|
||||
LIST_HEAD(list_kill);
|
||||
u32 q_idx;
|
||||
|
||||
if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING ||
|
||||
!dev->dev.parent)
|
||||
return;
|
||||
|
||||
netdev_lock_ops(dev);
|
||||
for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) {
|
||||
struct net_device *tmp = dev;
|
||||
struct netdev_rx_queue *rxq;
|
||||
u32 tmp_q_idx = q_idx;
|
||||
|
||||
rxq = __netif_get_rx_queue_lease(&tmp, &tmp_q_idx,
|
||||
NETIF_PHYS_TO_VIRT);
|
||||
if (rxq && tmp != dev &&
|
||||
tmp->netdev_ops == &netkit_netdev_ops) {
|
||||
/* A single phys device can have multiple queues leased
|
||||
* to one netkit device. We can only queue that netkit
|
||||
* device once to the list_kill. Queues of that phys
|
||||
* device can be leased with different individual netkit
|
||||
* devices, hence we batch via list_kill.
|
||||
*/
|
||||
if (unregister_netdevice_queued(tmp))
|
||||
continue;
|
||||
netkit_del_link(tmp, &list_kill);
|
||||
}
|
||||
}
|
||||
netdev_unlock_ops(dev);
|
||||
unregister_netdevice_many(&list_kill);
|
||||
}
|
||||
|
||||
static int netkit_notifier(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||
|
||||
if (event == NETDEV_UNREGISTER)
|
||||
netkit_check_lease_unregister(dev);
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static size_t netkit_get_size(const struct net_device *dev)
|
||||
{
|
||||
return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
|
||||
|
|
@ -931,6 +1199,7 @@ static size_t netkit_get_size(const struct net_device *dev)
|
|||
nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
|
||||
nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */
|
||||
nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */
|
||||
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PAIRING */
|
||||
0;
|
||||
}
|
||||
|
||||
|
|
@ -951,6 +1220,8 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
|
|||
return -EMSGSIZE;
|
||||
if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom))
|
||||
return -EMSGSIZE;
|
||||
if (nla_put_u32(skb, IFLA_NETKIT_PAIRING, nk->pair))
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (peer) {
|
||||
nk = netkit_priv(peer);
|
||||
|
|
@ -972,13 +1243,15 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
|
|||
[IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 },
|
||||
[IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
|
||||
[IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
|
||||
[IFLA_NETKIT_PAIRING] = NLA_POLICY_MAX(NLA_U32, NETKIT_DEVICE_SINGLE),
|
||||
[IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
|
||||
.reject_message = "Primary attribute is read-only" },
|
||||
};
|
||||
|
||||
static struct rtnl_link_ops netkit_link_ops = {
|
||||
.kind = DRV_NAME,
|
||||
.kind = NETKIT_DRV_NAME,
|
||||
.priv_size = sizeof(struct netkit),
|
||||
.alloc = netkit_alloc,
|
||||
.setup = netkit_setup,
|
||||
.newlink = netkit_new_link,
|
||||
.dellink = netkit_del_link,
|
||||
|
|
@ -992,26 +1265,39 @@ static struct rtnl_link_ops netkit_link_ops = {
|
|||
.maxtype = IFLA_NETKIT_MAX,
|
||||
};
|
||||
|
||||
static __init int netkit_init(void)
|
||||
static struct notifier_block netkit_netdev_notifier = {
|
||||
.notifier_call = netkit_notifier,
|
||||
};
|
||||
|
||||
static __init int netkit_mod_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT ||
|
||||
(int)NETKIT_PASS != (int)TCX_PASS ||
|
||||
(int)NETKIT_DROP != (int)TCX_DROP ||
|
||||
(int)NETKIT_REDIRECT != (int)TCX_REDIRECT);
|
||||
|
||||
return rtnl_link_register(&netkit_link_ops);
|
||||
ret = rtnl_link_register(&netkit_link_ops);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = register_netdevice_notifier(&netkit_netdev_notifier);
|
||||
if (ret)
|
||||
rtnl_link_unregister(&netkit_link_ops);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __exit void netkit_exit(void)
|
||||
static __exit void netkit_mod_exit(void)
|
||||
{
|
||||
unregister_netdevice_notifier(&netkit_netdev_notifier);
|
||||
rtnl_link_unregister(&netkit_link_ops);
|
||||
}
|
||||
|
||||
module_init(netkit_init);
|
||||
module_exit(netkit_exit);
|
||||
module_init(netkit_mod_init);
|
||||
module_exit(netkit_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("BPF-programmable network device");
|
||||
MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>");
|
||||
MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_RTNL_LINK(DRV_NAME);
|
||||
MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME);
|
||||
|
|
|
|||
|
|
@ -2561,7 +2561,14 @@ struct net_device {
|
|||
* Also protects some fields in:
|
||||
* struct napi_struct, struct netdev_queue, struct netdev_rx_queue
|
||||
*
|
||||
* Ordering: take after rtnl_lock.
|
||||
* Ordering:
|
||||
*
|
||||
* - take after rtnl_lock
|
||||
*
|
||||
* - for the case of netdev queue leasing, the netdev-scope lock is
|
||||
* taken for both the virtual and the physical device; to prevent
|
||||
* deadlocks, the virtual device's lock must always be acquired
|
||||
* before the physical device's (see netdev_nl_queue_create_doit)
|
||||
*/
|
||||
struct mutex lock;
|
||||
|
||||
|
|
@ -3413,6 +3420,8 @@ static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
|
|||
int register_netdevice(struct net_device *dev);
|
||||
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
|
||||
void unregister_netdevice_many(struct list_head *head);
|
||||
bool unregister_netdevice_queued(const struct net_device *dev);
|
||||
|
||||
static inline void unregister_netdevice(struct net_device *dev)
|
||||
{
|
||||
unregister_netdevice_queue(dev, NULL);
|
||||
|
|
|
|||
|
|
@ -150,6 +150,11 @@ enum {
|
|||
* When NIC-wide config is changed the callback will
|
||||
* be invoked for all queues.
|
||||
*
|
||||
* @ndo_queue_create: Create a new RX queue on a virtual device that will
|
||||
* be paired with a physical device's queue via leasing.
|
||||
* Return the new queue id on success, negative error
|
||||
* on failure.
|
||||
*
|
||||
* @supported_params: Bitmask of supported parameters, see QCFG_*.
|
||||
*
|
||||
* Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while
|
||||
|
|
@ -178,6 +183,8 @@ struct netdev_queue_mgmt_ops {
|
|||
struct netlink_ext_ack *extack);
|
||||
struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev,
|
||||
int idx);
|
||||
int (*ndo_queue_create)(struct net_device *dev,
|
||||
struct netlink_ext_ack *extack);
|
||||
|
||||
unsigned int supported_params;
|
||||
};
|
||||
|
|
@ -185,7 +192,7 @@ struct netdev_queue_mgmt_ops {
|
|||
void netdev_queue_config(struct net_device *dev, int rxq,
|
||||
struct netdev_queue_config *qcfg);
|
||||
|
||||
bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx);
|
||||
bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx);
|
||||
|
||||
/**
|
||||
* DOC: Lockless queue stopping / waking helpers.
|
||||
|
|
@ -373,6 +380,14 @@ static inline unsigned int netif_xmit_timeout_ms(struct netdev_queue *txq)
|
|||
get_desc, start_thrs); \
|
||||
})
|
||||
|
||||
struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx);
|
||||
|
||||
#endif
|
||||
struct device *netdev_queue_get_dma_dev(struct net_device *dev,
|
||||
unsigned int idx,
|
||||
enum netdev_queue_type type);
|
||||
bool netdev_can_create_queue(const struct net_device *dev,
|
||||
struct netlink_ext_ack *extack);
|
||||
bool netdev_can_lease_queue(const struct net_device *dev,
|
||||
struct netlink_ext_ack *extack);
|
||||
bool netdev_queue_busy(struct net_device *dev, unsigned int idx,
|
||||
enum netdev_queue_type type,
|
||||
struct netlink_ext_ack *extack);
|
||||
#endif /* _LINUX_NET_QUEUES_H */
|
||||
|
|
|
|||
|
|
@ -31,6 +31,14 @@ struct netdev_rx_queue {
|
|||
struct napi_struct *napi;
|
||||
struct netdev_queue_config qcfg;
|
||||
struct pp_memory_provider_params mp_params;
|
||||
|
||||
/* If a queue is leased, then the lease pointer is always
|
||||
* valid. From the physical device it points to the virtual
|
||||
* queue, and from the virtual device it points to the
|
||||
* physical queue.
|
||||
*/
|
||||
struct netdev_rx_queue *lease;
|
||||
netdevice_tracker lease_tracker;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/*
|
||||
|
|
@ -59,6 +67,23 @@ get_netdev_rx_queue_index(struct netdev_rx_queue *queue)
|
|||
return index;
|
||||
}
|
||||
|
||||
int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
|
||||
enum netif_lease_dir {
|
||||
NETIF_VIRT_TO_PHYS,
|
||||
NETIF_PHYS_TO_VIRT,
|
||||
};
|
||||
|
||||
#endif
|
||||
struct netdev_rx_queue *
|
||||
__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq,
|
||||
enum netif_lease_dir dir);
|
||||
|
||||
struct netdev_rx_queue *
|
||||
netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq);
|
||||
void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
|
||||
struct net_device *dev);
|
||||
|
||||
int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq);
|
||||
void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
|
||||
struct netdev_rx_queue *rxq_src);
|
||||
void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
|
||||
struct netdev_rx_queue *rxq_src);
|
||||
#endif /* _LINUX_NETDEV_RX_QUEUE_H */
|
||||
|
|
|
|||
|
|
@ -23,14 +23,10 @@ bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr);
|
|||
void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov);
|
||||
void net_mp_niov_clear_page_pool(struct net_iov *niov);
|
||||
|
||||
int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx,
|
||||
struct pp_memory_provider_params *p);
|
||||
int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
|
||||
int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
|
||||
const struct pp_memory_provider_params *p,
|
||||
struct netlink_ext_ack *extack);
|
||||
void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
|
||||
struct pp_memory_provider_params *old_p);
|
||||
void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
|
||||
void netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx,
|
||||
const struct pp_memory_provider_params *old_p);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1296,6 +1296,11 @@ enum netkit_mode {
|
|||
NETKIT_L3,
|
||||
};
|
||||
|
||||
enum netkit_pairing {
|
||||
NETKIT_DEVICE_PAIR,
|
||||
NETKIT_DEVICE_SINGLE,
|
||||
};
|
||||
|
||||
/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
|
||||
* the BPF program if attached. This also means the latter can
|
||||
* consume the two fields if they were populated earlier.
|
||||
|
|
@ -1320,6 +1325,7 @@ enum {
|
|||
IFLA_NETKIT_PEER_SCRUB,
|
||||
IFLA_NETKIT_HEADROOM,
|
||||
IFLA_NETKIT_TAILROOM,
|
||||
IFLA_NETKIT_PAIRING,
|
||||
__IFLA_NETKIT_MAX,
|
||||
};
|
||||
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
|
||||
|
|
|
|||
|
|
@ -160,6 +160,7 @@ enum {
|
|||
NETDEV_A_QUEUE_DMABUF,
|
||||
NETDEV_A_QUEUE_IO_URING,
|
||||
NETDEV_A_QUEUE_XSK,
|
||||
NETDEV_A_QUEUE_LEASE,
|
||||
|
||||
__NETDEV_A_QUEUE_MAX,
|
||||
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
|
||||
|
|
@ -202,6 +203,15 @@ enum {
|
|||
NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
|
||||
};
|
||||
|
||||
enum {
|
||||
NETDEV_A_LEASE_IFINDEX = 1,
|
||||
NETDEV_A_LEASE_QUEUE,
|
||||
NETDEV_A_LEASE_NETNS_ID,
|
||||
|
||||
__NETDEV_A_LEASE_MAX,
|
||||
NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1)
|
||||
};
|
||||
|
||||
enum {
|
||||
NETDEV_A_DMABUF_IFINDEX = 1,
|
||||
NETDEV_A_DMABUF_QUEUES,
|
||||
|
|
@ -228,6 +238,7 @@ enum {
|
|||
NETDEV_CMD_BIND_RX,
|
||||
NETDEV_CMD_NAPI_SET,
|
||||
NETDEV_CMD_BIND_TX,
|
||||
NETDEV_CMD_QUEUE_CREATE,
|
||||
|
||||
__NETDEV_CMD_MAX,
|
||||
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
|
||||
|
|
|
|||
|
|
@ -552,8 +552,11 @@ static void io_close_queue(struct io_zcrx_ifq *ifq)
|
|||
}
|
||||
|
||||
if (netdev) {
|
||||
if (ifq->if_rxq != -1)
|
||||
net_mp_close_rxq(netdev, ifq->if_rxq, &p);
|
||||
if (ifq->if_rxq != -1) {
|
||||
netdev_lock(netdev);
|
||||
netif_mp_close_rxq(netdev, ifq->if_rxq, &p);
|
||||
netdev_unlock(netdev);
|
||||
}
|
||||
netdev_put(netdev, &netdev_tracker);
|
||||
}
|
||||
ifq->if_rxq = -1;
|
||||
|
|
@ -826,7 +829,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
|||
}
|
||||
netdev_hold(ifq->netdev, &ifq->netdev_tracker, GFP_KERNEL);
|
||||
|
||||
ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, reg.if_rxq);
|
||||
ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, reg.if_rxq,
|
||||
NETDEV_QUEUE_TYPE_RX);
|
||||
if (!ifq->dev) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto netdev_put_unlock;
|
||||
|
|
@ -841,7 +845,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
|||
mp_param.rx_page_size = 1U << ifq->niov_shift;
|
||||
mp_param.mp_ops = &io_uring_pp_zc_ops;
|
||||
mp_param.mp_priv = ifq;
|
||||
ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL);
|
||||
ret = netif_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL);
|
||||
if (ret)
|
||||
goto netdev_put_unlock;
|
||||
netdev_unlock(ifq->netdev);
|
||||
|
|
|
|||
|
|
@ -1121,6 +1121,14 @@ netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex)
|
|||
return __netdev_put_lock_ops_compat(dev, net);
|
||||
}
|
||||
|
||||
struct net_device *
|
||||
netdev_put_lock(struct net_device *dev, struct net *net,
|
||||
netdevice_tracker *tracker)
|
||||
{
|
||||
netdev_tracker_free(dev, tracker);
|
||||
return __netdev_put_lock(dev, net);
|
||||
}
|
||||
|
||||
struct net_device *
|
||||
netdev_xa_find_lock(struct net *net, struct net_device *dev,
|
||||
unsigned long *index)
|
||||
|
|
@ -12342,10 +12350,8 @@ static void dev_memory_provider_uninstall(struct net_device *dev)
|
|||
|
||||
for (i = 0; i < dev->real_num_rx_queues; i++) {
|
||||
struct netdev_rx_queue *rxq = &dev->_rx[i];
|
||||
struct pp_memory_provider_params *p = &rxq->mp_params;
|
||||
|
||||
if (p->mp_ops && p->mp_ops->uninstall)
|
||||
p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
|
||||
__netif_mp_uninstall_rxq(rxq, &rxq->mp_params);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -12378,6 +12384,12 @@ static void netif_close_many_and_unlock_cond(struct list_head *close_head)
|
|||
#endif
|
||||
}
|
||||
|
||||
bool unregister_netdevice_queued(const struct net_device *dev)
|
||||
{
|
||||
ASSERT_RTNL();
|
||||
return !list_empty(&dev->unreg_list);
|
||||
}
|
||||
|
||||
void unregister_netdevice_many_notify(struct list_head *head,
|
||||
u32 portid, const struct nlmsghdr *nlh)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ struct net;
|
|||
struct netlink_ext_ack;
|
||||
struct netdev_queue_config;
|
||||
struct cpumask;
|
||||
struct pp_memory_provider_params;
|
||||
|
||||
/* Random bits of netdevice that don't need to be exposed */
|
||||
#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
|
||||
|
|
@ -31,6 +32,8 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
|
|||
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
|
||||
|
||||
struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
|
||||
struct net_device *netdev_put_lock(struct net_device *dev, struct net *net,
|
||||
netdevice_tracker *tracker);
|
||||
struct net_device *
|
||||
netdev_xa_find_lock(struct net *net, struct net_device *dev,
|
||||
unsigned long *index);
|
||||
|
|
@ -96,6 +99,15 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx,
|
|||
struct netdev_queue_config *qcfg,
|
||||
struct netlink_ext_ack *extack);
|
||||
|
||||
bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx);
|
||||
bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx);
|
||||
|
||||
void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
|
||||
const struct pp_memory_provider_params *p);
|
||||
|
||||
void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
|
||||
struct netdev_rx_queue *virt_rxq);
|
||||
|
||||
/* netdev management, shared between various uAPI entry points */
|
||||
struct netdev_name_node {
|
||||
struct hlist_node hlist;
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
|
|||
|
||||
rxq_idx = get_netdev_rx_queue_index(rxq);
|
||||
|
||||
__net_mp_close_rxq(binding->dev, rxq_idx, &mp_params);
|
||||
netif_mp_close_rxq(binding->dev, rxq_idx, &mp_params);
|
||||
}
|
||||
|
||||
percpu_ref_kill(&binding->ref);
|
||||
|
|
@ -163,7 +163,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
|
|||
u32 xa_idx;
|
||||
int err;
|
||||
|
||||
err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
|
||||
err = netif_mp_open_rxq(dev, rxq_idx, &mp_params, extack);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
|
@ -176,7 +176,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
|
|||
return 0;
|
||||
|
||||
err_close_rxq:
|
||||
__net_mp_close_rxq(dev, rxq_idx, &mp_params);
|
||||
netif_mp_close_rxq(dev, rxq_idx, &mp_params);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,12 @@ static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range
|
|||
};
|
||||
|
||||
/* Common nested types */
|
||||
const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1] = {
|
||||
[NETDEV_A_LEASE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
|
||||
[NETDEV_A_LEASE_QUEUE] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy),
|
||||
[NETDEV_A_LEASE_NETNS_ID] = NLA_POLICY_MIN(NLA_S32, 0),
|
||||
};
|
||||
|
||||
const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = {
|
||||
[NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
|
||||
[NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
|
||||
|
|
@ -107,6 +113,13 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1]
|
|||
[NETDEV_A_DMABUF_FD] = { .type = NLA_U32, },
|
||||
};
|
||||
|
||||
/* NETDEV_CMD_QUEUE_CREATE - do */
|
||||
static const struct nla_policy netdev_queue_create_nl_policy[NETDEV_A_QUEUE_LEASE + 1] = {
|
||||
[NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
|
||||
[NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
|
||||
[NETDEV_A_QUEUE_LEASE] = NLA_POLICY_NESTED(netdev_lease_nl_policy),
|
||||
};
|
||||
|
||||
/* Ops table for netdev */
|
||||
static const struct genl_split_ops netdev_nl_ops[] = {
|
||||
{
|
||||
|
|
@ -205,6 +218,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
|
|||
.maxattr = NETDEV_A_DMABUF_FD,
|
||||
.flags = GENL_CMD_CAP_DO,
|
||||
},
|
||||
{
|
||||
.cmd = NETDEV_CMD_QUEUE_CREATE,
|
||||
.doit = netdev_nl_queue_create_doit,
|
||||
.policy = netdev_queue_create_nl_policy,
|
||||
.maxattr = NETDEV_A_QUEUE_LEASE,
|
||||
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
|
||||
},
|
||||
};
|
||||
|
||||
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#include <net/netdev_netlink.h>
|
||||
|
||||
/* Common nested types */
|
||||
extern const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1];
|
||||
extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
|
||||
extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1];
|
||||
|
||||
|
|
@ -36,6 +37,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
|
|||
int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
|
||||
int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info);
|
||||
int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info);
|
||||
int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info);
|
||||
|
||||
enum {
|
||||
NETDEV_NLGRP_MGMT,
|
||||
|
|
|
|||
|
|
@ -386,12 +386,63 @@ static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev,
|
||||
u32 q_idx, u32 q_type)
|
||||
{
|
||||
struct net_device *orig_netdev = netdev;
|
||||
struct nlattr *nest_lease, *nest_queue;
|
||||
struct netdev_rx_queue *rxq;
|
||||
struct net *net, *peer_net;
|
||||
|
||||
rxq = __netif_get_rx_queue_lease(&netdev, &q_idx,
|
||||
NETIF_PHYS_TO_VIRT);
|
||||
if (!rxq || orig_netdev == netdev)
|
||||
return 0;
|
||||
|
||||
nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE);
|
||||
if (!nest_lease)
|
||||
goto nla_put_failure;
|
||||
|
||||
nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE);
|
||||
if (!nest_queue)
|
||||
goto nla_put_failure;
|
||||
if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx))
|
||||
goto nla_put_failure;
|
||||
if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type))
|
||||
goto nla_put_failure;
|
||||
nla_nest_end(rsp, nest_queue);
|
||||
|
||||
if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX,
|
||||
READ_ONCE(netdev->ifindex)))
|
||||
goto nla_put_failure;
|
||||
|
||||
rcu_read_lock();
|
||||
peer_net = dev_net_rcu(netdev);
|
||||
net = dev_net_rcu(orig_netdev);
|
||||
if (!net_eq(net, peer_net)) {
|
||||
s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC);
|
||||
|
||||
if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id))
|
||||
goto nla_put_failure_unlock;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
nla_nest_end(rsp, nest_lease);
|
||||
return 0;
|
||||
|
||||
nla_put_failure_unlock:
|
||||
rcu_read_unlock();
|
||||
nla_put_failure:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int
|
||||
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
|
||||
u32 q_idx, u32 q_type, const struct genl_info *info)
|
||||
{
|
||||
struct pp_memory_provider_params *params;
|
||||
struct netdev_rx_queue *rxq;
|
||||
struct net_device *orig_netdev = netdev;
|
||||
struct netdev_rx_queue *rxq, *rxq_lease;
|
||||
struct netdev_queue *txq;
|
||||
void *hdr;
|
||||
|
||||
|
|
@ -409,17 +460,22 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
|
|||
rxq = __netif_get_rx_queue(netdev, q_idx);
|
||||
if (nla_put_napi_id(rsp, rxq->napi))
|
||||
goto nla_put_failure;
|
||||
if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type))
|
||||
goto nla_put_failure;
|
||||
|
||||
rxq_lease = netif_get_rx_queue_lease_locked(&netdev, &q_idx);
|
||||
if (rxq_lease)
|
||||
rxq = rxq_lease;
|
||||
params = &rxq->mp_params;
|
||||
if (params->mp_ops &&
|
||||
params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
|
||||
goto nla_put_failure;
|
||||
goto nla_put_failure_lease;
|
||||
#ifdef CONFIG_XDP_SOCKETS
|
||||
if (rxq->pool)
|
||||
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
|
||||
goto nla_put_failure;
|
||||
goto nla_put_failure_lease;
|
||||
#endif
|
||||
|
||||
netif_put_rx_queue_lease_locked(orig_netdev, netdev);
|
||||
break;
|
||||
case NETDEV_QUEUE_TYPE_TX:
|
||||
txq = netdev_get_tx_queue(netdev, q_idx);
|
||||
|
|
@ -437,6 +493,8 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
|
|||
|
||||
return 0;
|
||||
|
||||
nla_put_failure_lease:
|
||||
netif_put_rx_queue_lease_locked(orig_netdev, netdev);
|
||||
nla_put_failure:
|
||||
genlmsg_cancel(rsp, hdr);
|
||||
return -EMSGSIZE;
|
||||
|
|
@ -918,7 +976,8 @@ netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap,
|
|||
for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) {
|
||||
struct device *rxq_dma_dev;
|
||||
|
||||
rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx);
|
||||
rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx,
|
||||
NETDEV_QUEUE_TYPE_RX);
|
||||
if (dma_dev && rxq_dma_dev != dma_dev) {
|
||||
NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)",
|
||||
rxq_idx, prev_rxq_idx);
|
||||
|
|
@ -1095,7 +1154,7 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
|
|||
goto err_unlock_netdev;
|
||||
}
|
||||
|
||||
dma_dev = netdev_queue_get_dma_dev(netdev, 0);
|
||||
dma_dev = netdev_queue_get_dma_dev(netdev, 0, NETDEV_QUEUE_TYPE_TX);
|
||||
binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE,
|
||||
dmabuf_fd, priv, info->extack);
|
||||
if (IS_ERR(binding)) {
|
||||
|
|
@ -1120,6 +1179,173 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info)
|
|||
return err;
|
||||
}
|
||||
|
||||
int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1;
|
||||
const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1;
|
||||
int err, ifindex, ifindex_lease, queue_id, queue_id_lease;
|
||||
struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
|
||||
struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)];
|
||||
struct netdev_rx_queue *rxq, *rxq_lease;
|
||||
struct net_device *dev, *dev_lease;
|
||||
netdevice_tracker dev_tracker;
|
||||
s32 netns_lease = -1;
|
||||
struct nlattr *nest;
|
||||
struct sk_buff *rsp;
|
||||
struct net *net;
|
||||
void *hdr;
|
||||
|
||||
if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) ||
|
||||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
|
||||
GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE))
|
||||
return -EINVAL;
|
||||
if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) !=
|
||||
NETDEV_QUEUE_TYPE_RX) {
|
||||
NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
|
||||
|
||||
nest = info->attrs[NETDEV_A_QUEUE_LEASE];
|
||||
err = nla_parse_nested(ltb, lmaxtype, nest,
|
||||
netdev_lease_nl_policy, info->extack);
|
||||
if (err < 0)
|
||||
return err;
|
||||
if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) ||
|
||||
NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE))
|
||||
return -EINVAL;
|
||||
if (ltb[NETDEV_A_LEASE_NETNS_ID]) {
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return -EPERM;
|
||||
netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]);
|
||||
}
|
||||
|
||||
ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]);
|
||||
|
||||
nest = ltb[NETDEV_A_LEASE_QUEUE];
|
||||
err = nla_parse_nested(qtb, qmaxtype, nest,
|
||||
netdev_queue_id_nl_policy, info->extack);
|
||||
if (err < 0)
|
||||
return err;
|
||||
if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) ||
|
||||
NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE))
|
||||
return -EINVAL;
|
||||
if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
|
||||
NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]);
|
||||
|
||||
rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
||||
if (!rsp)
|
||||
return -ENOMEM;
|
||||
|
||||
hdr = genlmsg_iput(rsp, info);
|
||||
if (!hdr) {
|
||||
err = -EMSGSIZE;
|
||||
goto err_genlmsg_free;
|
||||
}
|
||||
|
||||
/* Locking order is always from the virtual to the physical device
|
||||
* since this is also the same order when applications open the
|
||||
* memory provider later on.
|
||||
*/
|
||||
dev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
|
||||
if (!dev) {
|
||||
err = -ENODEV;
|
||||
goto err_genlmsg_free;
|
||||
}
|
||||
if (!netdev_can_create_queue(dev, info->extack)) {
|
||||
err = -EINVAL;
|
||||
goto err_unlock_dev;
|
||||
}
|
||||
|
||||
net = genl_info_net(info);
|
||||
if (netns_lease >= 0) {
|
||||
net = get_net_ns_by_id(net, netns_lease);
|
||||
if (!net) {
|
||||
err = -ENONET;
|
||||
goto err_unlock_dev;
|
||||
}
|
||||
}
|
||||
|
||||
dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker,
|
||||
GFP_KERNEL);
|
||||
if (!dev_lease) {
|
||||
err = -ENODEV;
|
||||
goto err_put_netns;
|
||||
}
|
||||
if (!netdev_can_lease_queue(dev_lease, info->extack)) {
|
||||
netdev_put(dev_lease, &dev_tracker);
|
||||
err = -EINVAL;
|
||||
goto err_put_netns;
|
||||
}
|
||||
|
||||
dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker);
|
||||
if (!dev_lease) {
|
||||
err = -ENODEV;
|
||||
goto err_put_netns;
|
||||
}
|
||||
if (queue_id_lease >= dev_lease->real_num_rx_queues) {
|
||||
err = -ERANGE;
|
||||
NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]);
|
||||
goto err_unlock_dev_lease;
|
||||
}
|
||||
if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX,
|
||||
info->extack)) {
|
||||
err = -EBUSY;
|
||||
goto err_unlock_dev_lease;
|
||||
}
|
||||
|
||||
rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease);
|
||||
rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1);
|
||||
|
||||
/* Leasing queues from different physical devices is currently
|
||||
* not supported. Capabilities such as XDP features and DMA
|
||||
* device may differ between physical devices, and computing
|
||||
* a correct intersection for the virtual device is not yet
|
||||
* implemented.
|
||||
*/
|
||||
if (rxq->lease && rxq->lease->dev != dev_lease) {
|
||||
err = -EOPNOTSUPP;
|
||||
NL_SET_ERR_MSG(info->extack,
|
||||
"Leasing queues from different devices not supported");
|
||||
goto err_unlock_dev_lease;
|
||||
}
|
||||
|
||||
queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack);
|
||||
if (queue_id < 0) {
|
||||
err = queue_id;
|
||||
goto err_unlock_dev_lease;
|
||||
}
|
||||
rxq = __netif_get_rx_queue(dev, queue_id);
|
||||
|
||||
netdev_rx_queue_lease(rxq, rxq_lease);
|
||||
|
||||
nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id);
|
||||
genlmsg_end(rsp, hdr);
|
||||
|
||||
netdev_unlock(dev_lease);
|
||||
netdev_unlock(dev);
|
||||
if (netns_lease >= 0)
|
||||
put_net(net);
|
||||
|
||||
return genlmsg_reply(rsp, info);
|
||||
|
||||
err_unlock_dev_lease:
|
||||
netdev_unlock(dev_lease);
|
||||
err_put_netns:
|
||||
if (netns_lease >= 0)
|
||||
put_net(net);
|
||||
err_unlock_dev:
|
||||
netdev_unlock(dev);
|
||||
err_genlmsg_free:
|
||||
nlmsg_free(rsp);
|
||||
return err;
|
||||
}
|
||||
|
||||
void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
|
||||
{
|
||||
INIT_LIST_HEAD(&priv->bindings);
|
||||
|
|
|
|||
|
|
@ -1,18 +1,13 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <net/netdev_queues.h>
|
||||
#include <net/netdev_rx_queue.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
|
||||
/**
|
||||
* netdev_queue_get_dma_dev() - get dma device for zero-copy operations
|
||||
* @dev: net_device
|
||||
* @idx: queue index
|
||||
*
|
||||
* Get dma device for zero-copy operations to be used for this queue.
|
||||
* When such device is not available or valid, the function will return NULL.
|
||||
*
|
||||
* Return: Device or NULL on error
|
||||
*/
|
||||
struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx)
|
||||
#include "dev.h"
|
||||
|
||||
static struct device *
|
||||
__netdev_queue_get_dma_dev(struct net_device *dev, unsigned int idx)
|
||||
{
|
||||
const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops;
|
||||
struct device *dma_dev;
|
||||
|
|
@ -25,3 +20,93 @@ struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx)
|
|||
return dma_dev && dma_dev->dma_mask ? dma_dev : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* netdev_queue_get_dma_dev() - get dma device for zero-copy operations
|
||||
* @dev: net_device
|
||||
* @idx: queue index
|
||||
* @type: queue type (RX or TX)
|
||||
*
|
||||
* Get dma device for zero-copy operations to be used for this queue. If
|
||||
* the queue is an RX queue leased from a physical queue, we retrieve the
|
||||
* physical queue's dma device. When the dma device is not available or
|
||||
* valid, the function will return NULL.
|
||||
*
|
||||
* Return: Device or NULL on error
|
||||
*/
|
||||
struct device *netdev_queue_get_dma_dev(struct net_device *dev,
|
||||
unsigned int idx,
|
||||
enum netdev_queue_type type)
|
||||
{
|
||||
struct net_device *orig_dev = dev;
|
||||
struct device *dma_dev;
|
||||
|
||||
/* Only RX side supports queue leasing today. */
|
||||
if (type != NETDEV_QUEUE_TYPE_RX || !netif_rxq_is_leased(dev, idx))
|
||||
return __netdev_queue_get_dma_dev(dev, idx);
|
||||
|
||||
if (!netif_get_rx_queue_lease_locked(&dev, &idx))
|
||||
return NULL;
|
||||
|
||||
dma_dev = __netdev_queue_get_dma_dev(dev, idx);
|
||||
netif_put_rx_queue_lease_locked(orig_dev, dev);
|
||||
return dma_dev;
|
||||
}
|
||||
|
||||
bool netdev_can_create_queue(const struct net_device *dev,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
if (dev->dev.parent) {
|
||||
NL_SET_ERR_MSG(extack, "Device is not a virtual device");
|
||||
return false;
|
||||
}
|
||||
if (!dev->queue_mgmt_ops ||
|
||||
!dev->queue_mgmt_ops->ndo_queue_create) {
|
||||
NL_SET_ERR_MSG(extack, "Device does not support queue creation");
|
||||
return false;
|
||||
}
|
||||
if (dev->real_num_rx_queues < 1 ||
|
||||
dev->real_num_tx_queues < 1) {
|
||||
NL_SET_ERR_MSG(extack, "Device must have at least one real queue");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool netdev_can_lease_queue(const struct net_device *dev,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
if (!dev->dev.parent) {
|
||||
NL_SET_ERR_MSG(extack, "Lease device is a virtual device");
|
||||
return false;
|
||||
}
|
||||
if (!netif_device_present(dev)) {
|
||||
NL_SET_ERR_MSG(extack, "Lease device has been removed from the system");
|
||||
return false;
|
||||
}
|
||||
if (!dev->queue_mgmt_ops) {
|
||||
NL_SET_ERR_MSG(extack, "Lease device does not support queue management operations");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool netdev_queue_busy(struct net_device *dev, unsigned int idx,
|
||||
enum netdev_queue_type type,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
if (xsk_get_pool_from_qid(dev, idx)) {
|
||||
NL_SET_ERR_MSG(extack, "Device queue in use by AF_XDP");
|
||||
return true;
|
||||
}
|
||||
if (type == NETDEV_QUEUE_TYPE_TX)
|
||||
return false;
|
||||
if (netif_rxq_is_leased(dev, idx)) {
|
||||
NL_SET_ERR_MSG(extack, "Device queue in use due to queue leasing");
|
||||
return true;
|
||||
}
|
||||
if (netif_rxq_has_mp(dev, idx)) {
|
||||
NL_SET_ERR_MSG(extack, "Device queue in use by memory provider");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,15 +10,109 @@
|
|||
#include "dev.h"
|
||||
#include "page_pool_priv.h"
|
||||
|
||||
/* See also page_pool_is_unreadable() */
|
||||
bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx)
|
||||
void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst,
|
||||
struct netdev_rx_queue *rxq_src)
|
||||
{
|
||||
struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx);
|
||||
netdev_assert_locked(rxq_src->dev);
|
||||
netdev_assert_locked(rxq_dst->dev);
|
||||
|
||||
return !!rxq->mp_params.mp_ops;
|
||||
netdev_hold(rxq_src->dev, &rxq_src->lease_tracker, GFP_KERNEL);
|
||||
|
||||
WRITE_ONCE(rxq_src->lease, rxq_dst);
|
||||
WRITE_ONCE(rxq_dst->lease, rxq_src);
|
||||
}
|
||||
|
||||
void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
|
||||
struct netdev_rx_queue *rxq_src)
|
||||
{
|
||||
netdev_assert_locked(rxq_dst->dev);
|
||||
netdev_assert_locked(rxq_src->dev);
|
||||
|
||||
netif_rxq_cleanup_unlease(rxq_src, rxq_dst);
|
||||
|
||||
WRITE_ONCE(rxq_src->lease, NULL);
|
||||
WRITE_ONCE(rxq_dst->lease, NULL);
|
||||
|
||||
netdev_put(rxq_src->dev, &rxq_src->lease_tracker);
|
||||
}
|
||||
|
||||
bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx)
|
||||
{
|
||||
if (rxq_idx < dev->real_num_rx_queues)
|
||||
return READ_ONCE(__netif_get_rx_queue(dev, rxq_idx)->lease);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Virtual devices eligible for leasing have no dev->dev.parent, while
|
||||
* physical devices always have one. Use this to enforce the correct
|
||||
* lease traversal direction.
|
||||
*/
|
||||
static bool netif_lease_dir_ok(const struct net_device *dev,
|
||||
enum netif_lease_dir dir)
|
||||
{
|
||||
if (dir == NETIF_VIRT_TO_PHYS && !dev->dev.parent)
|
||||
return true;
|
||||
if (dir == NETIF_PHYS_TO_VIRT && dev->dev.parent)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
struct netdev_rx_queue *
|
||||
__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx,
|
||||
enum netif_lease_dir dir)
|
||||
{
|
||||
struct net_device *orig_dev = *dev;
|
||||
struct netdev_rx_queue *rxq = __netif_get_rx_queue(orig_dev, *rxq_idx);
|
||||
|
||||
if (rxq->lease) {
|
||||
if (!netif_lease_dir_ok(orig_dev, dir))
|
||||
return NULL;
|
||||
rxq = rxq->lease;
|
||||
*rxq_idx = get_netdev_rx_queue_index(rxq);
|
||||
*dev = rxq->dev;
|
||||
}
|
||||
return rxq;
|
||||
}
|
||||
|
||||
struct netdev_rx_queue *
|
||||
netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx)
|
||||
{
|
||||
struct net_device *orig_dev = *dev;
|
||||
struct netdev_rx_queue *rxq;
|
||||
|
||||
/* Locking order is always from the virtual to the physical device
|
||||
* see netdev_nl_queue_create_doit().
|
||||
*/
|
||||
netdev_ops_assert_locked(orig_dev);
|
||||
rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS);
|
||||
if (rxq && orig_dev != *dev)
|
||||
netdev_lock(*dev);
|
||||
return rxq;
|
||||
}
|
||||
|
||||
void netif_put_rx_queue_lease_locked(struct net_device *orig_dev,
|
||||
struct net_device *dev)
|
||||
{
|
||||
if (orig_dev != dev)
|
||||
netdev_unlock(dev);
|
||||
}
|
||||
|
||||
/* See also page_pool_is_unreadable() */
|
||||
bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx)
|
||||
{
|
||||
if (rxq_idx < dev->real_num_rx_queues)
|
||||
return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_ops;
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL(netif_rxq_has_unreadable_mp);
|
||||
|
||||
bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx)
|
||||
{
|
||||
if (rxq_idx < dev->real_num_rx_queues)
|
||||
return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_priv;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int netdev_rx_queue_reconfig(struct net_device *dev,
|
||||
unsigned int rxq_idx,
|
||||
struct netdev_queue_config *qcfg_old,
|
||||
|
|
@ -108,9 +202,9 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
|
|||
}
|
||||
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
|
||||
|
||||
int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
|
||||
const struct pp_memory_provider_params *p,
|
||||
struct netlink_ext_ack *extack)
|
||||
static int __netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
|
||||
const struct pp_memory_provider_params *p,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops;
|
||||
struct netdev_queue_config qcfg[2];
|
||||
|
|
@ -120,12 +214,6 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
|
|||
if (!qops)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (rxq_idx >= dev->real_num_rx_queues) {
|
||||
NL_SET_ERR_MSG(extack, "rx queue index out of range");
|
||||
return -ERANGE;
|
||||
}
|
||||
rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
|
||||
|
||||
if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
|
||||
NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
|
||||
return -EINVAL;
|
||||
|
|
@ -172,27 +260,48 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
|
|||
return ret;
|
||||
}
|
||||
|
||||
int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
|
||||
struct pp_memory_provider_params *p)
|
||||
int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
|
||||
const struct pp_memory_provider_params *p,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct net_device *orig_dev = dev;
|
||||
int ret;
|
||||
|
||||
netdev_lock(dev);
|
||||
ret = __net_mp_open_rxq(dev, rxq_idx, p, NULL);
|
||||
netdev_unlock(dev);
|
||||
if (!netdev_need_ops_lock(dev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (rxq_idx >= dev->real_num_rx_queues) {
|
||||
NL_SET_ERR_MSG(extack, "rx queue index out of range");
|
||||
return -ERANGE;
|
||||
}
|
||||
rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
|
||||
|
||||
if (!netif_rxq_is_leased(dev, rxq_idx))
|
||||
return __netif_mp_open_rxq(dev, rxq_idx, p, extack);
|
||||
|
||||
if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) {
|
||||
NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev");
|
||||
return -EBUSY;
|
||||
}
|
||||
if (!dev->dev.parent) {
|
||||
NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev");
|
||||
ret = -EOPNOTSUPP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack);
|
||||
out:
|
||||
netif_put_rx_queue_lease_locked(orig_dev, dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
|
||||
const struct pp_memory_provider_params *old_p)
|
||||
static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
|
||||
const struct pp_memory_provider_params *old_p)
|
||||
{
|
||||
struct netdev_queue_config qcfg[2];
|
||||
struct netdev_rx_queue *rxq;
|
||||
int err;
|
||||
|
||||
if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
|
||||
return;
|
||||
|
||||
rxq = __netif_get_rx_queue(dev, ifq_idx);
|
||||
|
||||
/* Callers holding a netdev ref may get here after we already
|
||||
|
|
@ -214,10 +323,47 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
|
|||
WARN_ON(err && err != -ENETDOWN);
|
||||
}
|
||||
|
||||
void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx,
|
||||
struct pp_memory_provider_params *old_p)
|
||||
void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
|
||||
const struct pp_memory_provider_params *old_p)
|
||||
{
|
||||
netdev_lock(dev);
|
||||
__net_mp_close_rxq(dev, ifq_idx, old_p);
|
||||
netdev_unlock(dev);
|
||||
struct net_device *orig_dev = dev;
|
||||
|
||||
if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
|
||||
return;
|
||||
if (!netif_rxq_is_leased(dev, ifq_idx))
|
||||
return __netif_mp_close_rxq(dev, ifq_idx, old_p);
|
||||
|
||||
if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &ifq_idx)))
|
||||
return;
|
||||
|
||||
__netif_mp_close_rxq(dev, ifq_idx, old_p);
|
||||
netif_put_rx_queue_lease_locked(orig_dev, dev);
|
||||
}
|
||||
|
||||
void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
|
||||
const struct pp_memory_provider_params *p)
|
||||
{
|
||||
if (p->mp_ops && p->mp_ops->uninstall)
|
||||
p->mp_ops->uninstall(p->mp_priv, rxq);
|
||||
}
|
||||
|
||||
/* Clean up memory provider state when a queue lease is torn down. If
|
||||
* a memory provider was installed on the physical queue via the lease,
|
||||
* close it now. The memory provider is a property of the queue itself,
|
||||
* and it was _guaranteed_ to be installed on the physical queue via
|
||||
* the lease redirection. The extra __netif_mp_close_rxq is needed
|
||||
* since the physical queue can outlive the virtual queue in the lease
|
||||
* case, so it needs to be reconfigured to clear the memory provider.
|
||||
*/
|
||||
void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
|
||||
struct netdev_rx_queue *virt_rxq)
|
||||
{
|
||||
struct pp_memory_provider_params *p = &phys_rxq->mp_params;
|
||||
unsigned int ifq_idx = get_netdev_rx_queue_index(phys_rxq);
|
||||
|
||||
if (!p->mp_ops)
|
||||
return;
|
||||
|
||||
__netif_mp_uninstall_rxq(virt_rxq, p);
|
||||
__netif_mp_close_rxq(phys_rxq->dev, ifq_idx, p);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <net/xdp_sock_drv.h>
|
||||
#include <net/netdev_queues.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "netlink.h"
|
||||
|
|
@ -109,7 +109,7 @@ ethnl_set_channels_validate(struct ethnl_req_info *req_info,
|
|||
static int
|
||||
ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
|
||||
{
|
||||
unsigned int from_channel, old_total, i;
|
||||
unsigned int old_combined, old_rx, old_tx, i;
|
||||
bool mod = false, mod_combined = false;
|
||||
struct net_device *dev = req_info->dev;
|
||||
struct ethtool_channels channels = {};
|
||||
|
|
@ -118,8 +118,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
|
|||
int ret;
|
||||
|
||||
dev->ethtool_ops->get_channels(dev, &channels);
|
||||
old_total = channels.combined_count +
|
||||
max(channels.rx_count, channels.tx_count);
|
||||
old_combined = channels.combined_count;
|
||||
old_rx = channels.rx_count;
|
||||
old_tx = channels.tx_count;
|
||||
|
||||
ethnl_update_u32(&channels.rx_count, tb[ETHTOOL_A_CHANNELS_RX_COUNT],
|
||||
&mod);
|
||||
|
|
@ -169,14 +170,19 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Disabling channels, query zero-copy AF_XDP sockets */
|
||||
from_channel = channels.combined_count +
|
||||
min(channels.rx_count, channels.tx_count);
|
||||
for (i = from_channel; i < old_total; i++)
|
||||
if (xsk_get_pool_from_qid(dev, i)) {
|
||||
GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets");
|
||||
/* ensure channels are not busy at the moment */
|
||||
for (i = channels.combined_count + channels.rx_count;
|
||||
i < old_combined + old_rx; i++) {
|
||||
if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_RX,
|
||||
info->extack))
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
for (i = channels.combined_count + channels.tx_count;
|
||||
i < old_combined + old_tx; i++) {
|
||||
if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_TX,
|
||||
info->extack))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = dev->ethtool_ops->set_channels(dev, &channels);
|
||||
return ret < 0 ? ret : 1;
|
||||
|
|
|
|||
|
|
@ -27,12 +27,12 @@
|
|||
#include <linux/net.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/ethtool_netlink.h>
|
||||
#include <net/devlink.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
#include <net/flow_offload.h>
|
||||
#include <net/netdev_lock.h>
|
||||
#include <linux/ethtool_netlink.h>
|
||||
#include <net/netdev_queues.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
|
@ -2250,7 +2250,6 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
|
|||
void __user *useraddr)
|
||||
{
|
||||
struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };
|
||||
u16 from_channel, to_channel;
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
|
|
@ -2284,13 +2283,17 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Disabling channels, query zero-copy AF_XDP sockets */
|
||||
from_channel = channels.combined_count +
|
||||
min(channels.rx_count, channels.tx_count);
|
||||
to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count);
|
||||
for (i = from_channel; i < to_channel; i++)
|
||||
if (xsk_get_pool_from_qid(dev, i))
|
||||
/* Disabling channels, query busy queues (AF_XDP, queue leasing) */
|
||||
for (i = channels.combined_count + channels.rx_count;
|
||||
i < curr.combined_count + curr.rx_count; i++) {
|
||||
if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_RX, NULL))
|
||||
return -EINVAL;
|
||||
}
|
||||
for (i = channels.combined_count + channels.tx_count;
|
||||
i < curr.combined_count + curr.tx_count; i++) {
|
||||
if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_TX, NULL))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = dev->ethtool_ops->set_channels(dev, &channels);
|
||||
if (!ret)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@
|
|||
#include <linux/netdevice.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <net/netdev_queues.h>
|
||||
#include <net/xdp_sock_drv.h>
|
||||
#include <net/busy_poll.h>
|
||||
#include <net/netdev_lock.h>
|
||||
|
|
@ -117,10 +119,18 @@ EXPORT_SYMBOL(xsk_get_pool_from_qid);
|
|||
|
||||
void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
|
||||
{
|
||||
if (queue_id < dev->num_rx_queues)
|
||||
dev->_rx[queue_id].pool = NULL;
|
||||
if (queue_id < dev->num_tx_queues)
|
||||
dev->_tx[queue_id].pool = NULL;
|
||||
struct net_device *orig_dev = dev;
|
||||
unsigned int id = queue_id;
|
||||
|
||||
if (id < dev->real_num_rx_queues)
|
||||
WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &id));
|
||||
|
||||
if (id < dev->num_rx_queues)
|
||||
dev->_rx[id].pool = NULL;
|
||||
if (id < dev->num_tx_queues)
|
||||
dev->_tx[id].pool = NULL;
|
||||
|
||||
netif_put_rx_queue_lease_locked(orig_dev, dev);
|
||||
}
|
||||
|
||||
/* The buffer pool is stored both in the _rx struct and the _tx struct as we do
|
||||
|
|
@ -130,17 +140,30 @@ void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
|
|||
int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
|
||||
u16 queue_id)
|
||||
{
|
||||
if (queue_id >= max_t(unsigned int,
|
||||
dev->real_num_rx_queues,
|
||||
dev->real_num_tx_queues))
|
||||
struct net_device *orig_dev = dev;
|
||||
unsigned int id = queue_id;
|
||||
int ret = 0;
|
||||
|
||||
if (id >= max(dev->real_num_rx_queues,
|
||||
dev->real_num_tx_queues))
|
||||
return -EINVAL;
|
||||
|
||||
if (queue_id < dev->real_num_rx_queues)
|
||||
dev->_rx[queue_id].pool = pool;
|
||||
if (queue_id < dev->real_num_tx_queues)
|
||||
dev->_tx[queue_id].pool = pool;
|
||||
if (id < dev->real_num_rx_queues) {
|
||||
if (!netif_get_rx_queue_lease_locked(&dev, &id))
|
||||
return -EBUSY;
|
||||
if (xsk_get_pool_from_qid(dev, id)) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (id < dev->real_num_rx_queues)
|
||||
dev->_rx[id].pool = pool;
|
||||
if (id < dev->real_num_tx_queues)
|
||||
dev->_tx[id].pool = pool;
|
||||
out:
|
||||
netif_put_rx_queue_lease_locked(orig_dev, dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
|
||||
|
|
@ -330,12 +353,36 @@ static bool xsk_is_bound(struct xdp_sock *xs)
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool xsk_dev_queue_valid(const struct xdp_sock *xs,
|
||||
const struct xdp_rxq_info *info)
|
||||
{
|
||||
struct net_device *dev = xs->dev;
|
||||
u32 queue_index = xs->queue_id;
|
||||
struct netdev_rx_queue *rxq;
|
||||
|
||||
if (info->dev == dev &&
|
||||
info->queue_index == queue_index)
|
||||
return true;
|
||||
|
||||
if (queue_index < dev->real_num_rx_queues) {
|
||||
rxq = READ_ONCE(__netif_get_rx_queue(dev, queue_index)->lease);
|
||||
if (!rxq)
|
||||
return false;
|
||||
|
||||
dev = rxq->dev;
|
||||
queue_index = get_netdev_rx_queue_index(rxq);
|
||||
|
||||
return info->dev == dev &&
|
||||
info->queue_index == queue_index;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
|
||||
{
|
||||
if (!xsk_is_bound(xs))
|
||||
return -ENXIO;
|
||||
|
||||
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
|
||||
if (!xsk_dev_queue_valid(xs, xdp->rxq))
|
||||
return -EINVAL;
|
||||
|
||||
if (len > __xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
|
||||
|
|
|
|||
|
|
@ -160,6 +160,7 @@ enum {
|
|||
NETDEV_A_QUEUE_DMABUF,
|
||||
NETDEV_A_QUEUE_IO_URING,
|
||||
NETDEV_A_QUEUE_XSK,
|
||||
NETDEV_A_QUEUE_LEASE,
|
||||
|
||||
__NETDEV_A_QUEUE_MAX,
|
||||
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
|
||||
|
|
@ -202,6 +203,15 @@ enum {
|
|||
NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
|
||||
};
|
||||
|
||||
enum {
|
||||
NETDEV_A_LEASE_IFINDEX = 1,
|
||||
NETDEV_A_LEASE_QUEUE,
|
||||
NETDEV_A_LEASE_NETNS_ID,
|
||||
|
||||
__NETDEV_A_LEASE_MAX,
|
||||
NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1)
|
||||
};
|
||||
|
||||
enum {
|
||||
NETDEV_A_DMABUF_IFINDEX = 1,
|
||||
NETDEV_A_DMABUF_QUEUES,
|
||||
|
|
@ -228,6 +238,7 @@ enum {
|
|||
NETDEV_CMD_BIND_RX,
|
||||
NETDEV_CMD_NAPI_SET,
|
||||
NETDEV_CMD_BIND_TX,
|
||||
NETDEV_CMD_QUEUE_CREATE,
|
||||
|
||||
__NETDEV_CMD_MAX,
|
||||
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ TEST_PROGS = \
|
|||
loopback.sh \
|
||||
nic_timestamp.py \
|
||||
nk_netns.py \
|
||||
nk_qlease.py \
|
||||
pp_alloc_fail.py \
|
||||
rss_api.py \
|
||||
rss_ctx.py \
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ try:
|
|||
# Import one by one to avoid pylint false positives
|
||||
from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
|
||||
from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
|
||||
NlError, RtnlFamily, DevlinkFamily, PSPFamily
|
||||
NlError, RtnlFamily, DevlinkFamily, PSPFamily, Netlink
|
||||
from net.lib.py import CmdExitFailure
|
||||
from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
|
||||
fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, \
|
||||
|
|
@ -36,7 +36,7 @@ try:
|
|||
|
||||
__all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
|
||||
"EthtoolFamily", "NetdevFamily", "NetshaperFamily",
|
||||
"NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
|
||||
"NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "Netlink",
|
||||
"CmdExitFailure",
|
||||
"bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
|
||||
"fd_read_timeout", "ip", "rand_port", "rand_ports",
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue