mirror of https://github.com/torvalds/linux.git
net-sysfs: use rps_tag_ptr and remove metadata from rps_dev_flow_table
Instead of storing the @log at the beginning of rps_dev_flow_table use 5 low order bits of the rps_tag_ptr to store the log of the size. This removes a potential cache line miss (for light traffic). This allows us to switch to one high-order allocation instead of vmalloc() when CONFIG_RFS_ACCEL is not set. Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20260302181432.1836150-8-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
b2cc61857e
commit
a435163d31
|
|
@ -8,13 +8,14 @@
|
|||
#include <net/xdp.h>
|
||||
#include <net/page_pool/types.h>
|
||||
#include <net/netdev_queues.h>
|
||||
#include <net/rps-types.h>
|
||||
|
||||
/* This structure contains an instance of an RX queue. */
|
||||
struct netdev_rx_queue {
|
||||
struct xdp_rxq_info xdp_rxq;
|
||||
#ifdef CONFIG_RPS
|
||||
struct rps_map __rcu *rps_map;
|
||||
struct rps_dev_flow_table __rcu *rps_flow_table;
|
||||
rps_tag_ptr rps_flow_table;
|
||||
#endif
|
||||
struct kobject kobj;
|
||||
const struct attribute_group **groups;
|
||||
|
|
|
|||
|
|
@ -39,16 +39,6 @@ struct rps_dev_flow {
|
|||
};
|
||||
#define RPS_NO_FILTER 0xffff
|
||||
|
||||
/*
|
||||
* The rps_dev_flow_table structure contains a table of flow mappings.
|
||||
*/
|
||||
struct rps_dev_flow_table {
|
||||
u8 log;
|
||||
struct rps_dev_flow flows[];
|
||||
};
|
||||
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
|
||||
((_num) * sizeof(struct rps_dev_flow)))
|
||||
|
||||
/*
|
||||
* The rps_sock_flow_table contains mappings of flows to the last CPU
|
||||
* on which they were processed by the application (set in recvmsg).
|
||||
|
|
|
|||
|
|
@ -4968,16 +4968,16 @@ EXPORT_SYMBOL(rps_needed);
|
|||
struct static_key_false rfs_needed __read_mostly;
|
||||
EXPORT_SYMBOL(rfs_needed);
|
||||
|
||||
static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table)
|
||||
static u32 rfs_slot(u32 hash, rps_tag_ptr tag_ptr)
|
||||
{
|
||||
return hash_32(hash, flow_table->log);
|
||||
return hash_32(hash, rps_tag_to_log(tag_ptr));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RFS_ACCEL
|
||||
/**
|
||||
* rps_flow_is_active - check whether the flow is recently active.
|
||||
* @rflow: Specific flow to check activity.
|
||||
* @flow_table: per-queue flowtable that @rflow belongs to.
|
||||
* @log: ilog2(hashsize).
|
||||
* @cpu: CPU saved in @rflow.
|
||||
*
|
||||
* If the CPU has processed many packets since the flow's last activity
|
||||
|
|
@ -4986,7 +4986,7 @@ static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table)
|
|||
* Return: true if flow was recently active.
|
||||
*/
|
||||
static bool rps_flow_is_active(struct rps_dev_flow *rflow,
|
||||
struct rps_dev_flow_table *flow_table,
|
||||
u8 log,
|
||||
unsigned int cpu)
|
||||
{
|
||||
unsigned int flow_last_active;
|
||||
|
|
@ -4999,7 +4999,7 @@ static bool rps_flow_is_active(struct rps_dev_flow *rflow,
|
|||
flow_last_active = READ_ONCE(rflow->last_qtail);
|
||||
|
||||
return (int)(sd_input_head - flow_last_active) <
|
||||
(int)(10 << flow_table->log);
|
||||
(int)(10 << log);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -5011,9 +5011,10 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
u32 head;
|
||||
#ifdef CONFIG_RFS_ACCEL
|
||||
struct netdev_rx_queue *rxqueue;
|
||||
struct rps_dev_flow_table *flow_table;
|
||||
struct rps_dev_flow *flow_table;
|
||||
struct rps_dev_flow *old_rflow;
|
||||
struct rps_dev_flow *tmp_rflow;
|
||||
rps_tag_ptr q_tag_ptr;
|
||||
unsigned int tmp_cpu;
|
||||
u16 rxq_index;
|
||||
u32 flow_id;
|
||||
|
|
@ -5028,16 +5029,18 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
goto out;
|
||||
|
||||
rxqueue = dev->_rx + rxq_index;
|
||||
flow_table = rcu_dereference(rxqueue->rps_flow_table);
|
||||
if (!flow_table)
|
||||
q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table);
|
||||
if (!q_tag_ptr)
|
||||
goto out;
|
||||
|
||||
flow_id = rfs_slot(hash, flow_table);
|
||||
tmp_rflow = &flow_table->flows[flow_id];
|
||||
flow_id = rfs_slot(hash, q_tag_ptr);
|
||||
flow_table = rps_tag_to_table(q_tag_ptr);
|
||||
tmp_rflow = flow_table + flow_id;
|
||||
tmp_cpu = READ_ONCE(tmp_rflow->cpu);
|
||||
|
||||
if (READ_ONCE(tmp_rflow->filter) != RPS_NO_FILTER) {
|
||||
if (rps_flow_is_active(tmp_rflow, flow_table,
|
||||
if (rps_flow_is_active(tmp_rflow,
|
||||
rps_tag_to_log(q_tag_ptr),
|
||||
tmp_cpu)) {
|
||||
if (hash != READ_ONCE(tmp_rflow->hash) ||
|
||||
next_cpu == tmp_cpu)
|
||||
|
|
@ -5076,8 +5079,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
struct rps_dev_flow **rflowp)
|
||||
{
|
||||
struct netdev_rx_queue *rxqueue = dev->_rx;
|
||||
struct rps_dev_flow_table *flow_table;
|
||||
rps_tag_ptr global_tag_ptr;
|
||||
rps_tag_ptr global_tag_ptr, q_tag_ptr;
|
||||
struct rps_map *map;
|
||||
int cpu = -1;
|
||||
u32 tcpu;
|
||||
|
|
@ -5098,9 +5100,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
|
||||
/* Avoid computing hash if RFS/RPS is not active for this rxqueue */
|
||||
|
||||
flow_table = rcu_dereference(rxqueue->rps_flow_table);
|
||||
q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table);
|
||||
map = rcu_dereference(rxqueue->rps_map);
|
||||
if (!flow_table && !map)
|
||||
if (!q_tag_ptr && !map)
|
||||
goto done;
|
||||
|
||||
skb_reset_network_header(skb);
|
||||
|
|
@ -5109,8 +5111,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
goto done;
|
||||
|
||||
global_tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table);
|
||||
if (flow_table && global_tag_ptr) {
|
||||
if (q_tag_ptr && global_tag_ptr) {
|
||||
struct rps_sock_flow_table *sock_flow_table;
|
||||
struct rps_dev_flow *flow_table;
|
||||
struct rps_dev_flow *rflow;
|
||||
u32 next_cpu;
|
||||
u32 flow_id;
|
||||
|
|
@ -5130,7 +5133,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||
/* OK, now we know there is a match,
|
||||
* we can look at the local (per receive queue) flow table
|
||||
*/
|
||||
rflow = &flow_table->flows[rfs_slot(hash, flow_table)];
|
||||
flow_id = rfs_slot(hash, q_tag_ptr);
|
||||
flow_table = rps_tag_to_table(q_tag_ptr);
|
||||
rflow = flow_table + flow_id;
|
||||
tcpu = rflow->cpu;
|
||||
|
||||
/*
|
||||
|
|
@ -5190,19 +5195,23 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
|
|||
u32 flow_id, u16 filter_id)
|
||||
{
|
||||
struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
|
||||
struct rps_dev_flow_table *flow_table;
|
||||
struct rps_dev_flow *flow_table;
|
||||
struct rps_dev_flow *rflow;
|
||||
rps_tag_ptr q_tag_ptr;
|
||||
bool expire = true;
|
||||
u8 log;
|
||||
|
||||
rcu_read_lock();
|
||||
flow_table = rcu_dereference(rxqueue->rps_flow_table);
|
||||
if (flow_table && flow_id < (1UL << flow_table->log)) {
|
||||
q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table);
|
||||
log = rps_tag_to_log(q_tag_ptr);
|
||||
if (q_tag_ptr && flow_id < (1UL << log)) {
|
||||
unsigned int cpu;
|
||||
|
||||
rflow = &flow_table->flows[flow_id];
|
||||
flow_table = rps_tag_to_table(q_tag_ptr);
|
||||
rflow = flow_table + flow_id;
|
||||
cpu = READ_ONCE(rflow->cpu);
|
||||
if (READ_ONCE(rflow->filter) == filter_id &&
|
||||
rps_flow_is_active(rflow, flow_table, cpu))
|
||||
rps_flow_is_active(rflow, log, cpu))
|
||||
expire = false;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
|
|
|||
|
|
@ -1060,14 +1060,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
|
|||
static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
|
||||
char *buf)
|
||||
{
|
||||
struct rps_dev_flow_table *flow_table;
|
||||
unsigned long val = 0;
|
||||
rps_tag_ptr tag_ptr;
|
||||
|
||||
rcu_read_lock();
|
||||
flow_table = rcu_dereference(queue->rps_flow_table);
|
||||
if (flow_table)
|
||||
val = 1UL << flow_table->log;
|
||||
rcu_read_unlock();
|
||||
tag_ptr = READ_ONCE(queue->rps_flow_table);
|
||||
if (tag_ptr)
|
||||
val = 1UL << rps_tag_to_log(tag_ptr);
|
||||
|
||||
return sysfs_emit(buf, "%lu\n", val);
|
||||
}
|
||||
|
|
@ -1075,8 +1073,10 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
|
|||
static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
rps_tag_ptr otag, tag_ptr = 0UL;
|
||||
struct rps_dev_flow *table;
|
||||
unsigned long mask, count;
|
||||
struct rps_dev_flow_table *table, *old_table;
|
||||
size_t sz;
|
||||
int rc;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
|
|
@ -1093,38 +1093,36 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
|
|||
*/
|
||||
while ((mask | (mask >> 1)) != mask)
|
||||
mask |= (mask >> 1);
|
||||
/* On 64 bit arches, must check mask fits in table->mask (u32),
|
||||
* and on 32bit arches, must check
|
||||
* RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
|
||||
*/
|
||||
#if BITS_PER_LONG > 32
|
||||
if (mask > (unsigned long)(u32)mask)
|
||||
|
||||
/* Do not accept too large tables. */
|
||||
if (mask > (INT_MAX / sizeof(*table) - 1))
|
||||
return -EINVAL;
|
||||
#else
|
||||
if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
|
||||
/ sizeof(struct rps_dev_flow)) {
|
||||
/* Enforce a limit to prevent overflow */
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
|
||||
|
||||
sz = max_t(size_t, sizeof(*table) * (mask + 1),
|
||||
PAGE_SIZE);
|
||||
if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
|
||||
is_power_of_2(sizeof(*table)))
|
||||
table = kvmalloc(sz, GFP_KERNEL);
|
||||
else
|
||||
table = vmalloc(sz);
|
||||
if (!table)
|
||||
return -ENOMEM;
|
||||
|
||||
table->log = ilog2(mask) + 1;
|
||||
for (count = 0; count <= mask; count++) {
|
||||
table->flows[count].cpu = RPS_NO_CPU;
|
||||
table->flows[count].filter = RPS_NO_FILTER;
|
||||
tag_ptr = (rps_tag_ptr)table;
|
||||
if (rps_tag_to_log(tag_ptr)) {
|
||||
pr_err_once("store_rps_dev_flow_table_cnt() got a non page aligned allocation.\n");
|
||||
kvfree(table);
|
||||
return -ENOMEM;
|
||||
}
|
||||
tag_ptr |= (ilog2(mask) + 1);
|
||||
for (count = 0; count <= mask; count++) {
|
||||
table[count].cpu = RPS_NO_CPU;
|
||||
table[count].filter = RPS_NO_FILTER;
|
||||
}
|
||||
} else {
|
||||
table = NULL;
|
||||
}
|
||||
|
||||
old_table = unrcu_pointer(xchg(&queue->rps_flow_table,
|
||||
RCU_INITIALIZER(table)));
|
||||
|
||||
if (old_table)
|
||||
kvfree_rcu_mightsleep(old_table);
|
||||
otag = xchg(&queue->rps_flow_table, tag_ptr);
|
||||
if (otag)
|
||||
kvfree_rcu_mightsleep(rps_tag_to_table(otag));
|
||||
|
||||
return len;
|
||||
}
|
||||
|
|
@ -1150,7 +1148,7 @@ static void rx_queue_release(struct kobject *kobj)
|
|||
{
|
||||
struct netdev_rx_queue *queue = to_rx_queue(kobj);
|
||||
#ifdef CONFIG_RPS
|
||||
struct rps_dev_flow_table *old_table;
|
||||
rps_tag_ptr tag_ptr;
|
||||
struct rps_map *map;
|
||||
|
||||
map = rcu_dereference_protected(queue->rps_map, 1);
|
||||
|
|
@ -1159,9 +1157,9 @@ static void rx_queue_release(struct kobject *kobj)
|
|||
kfree_rcu(map, rcu);
|
||||
}
|
||||
|
||||
old_table = unrcu_pointer(xchg(&queue->rps_flow_table, NULL));
|
||||
if (old_table)
|
||||
kvfree_rcu_mightsleep(old_table);
|
||||
tag_ptr = xchg(&queue->rps_flow_table, 0UL);
|
||||
if (tag_ptr)
|
||||
kvfree_rcu_mightsleep(rps_tag_to_table(tag_ptr));
|
||||
#endif
|
||||
|
||||
memset(kobj, 0, sizeof(*kobj));
|
||||
|
|
|
|||
Loading…
Reference in New Issue