mirror of https://github.com/torvalds/linux.git
net: add NUMA awareness to skb_attempt_defer_free()
Instead of sharing sd->defer_list & sd->defer_count with many cpus, add one pair for each NUMA node. Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Jason Xing <kerneljasonxing@gmail.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20250928084934.3266948-4-edumazet@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
parent
844c9db7f7
commit
5628f3fe3b
|
|
@ -3536,10 +3536,6 @@ struct softnet_data {
|
||||||
|
|
||||||
struct numa_drop_counters drop_counters;
|
struct numa_drop_counters drop_counters;
|
||||||
|
|
||||||
/* Another possibly contended cache line */
|
|
||||||
struct llist_head defer_list ____cacheline_aligned_in_smp;
|
|
||||||
atomic_long_t defer_count;
|
|
||||||
|
|
||||||
int defer_ipi_scheduled ____cacheline_aligned_in_smp;
|
int defer_ipi_scheduled ____cacheline_aligned_in_smp;
|
||||||
call_single_data_t defer_csd;
|
call_single_data_t defer_csd;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,16 @@
|
||||||
#ifndef _NET_HOTDATA_H
|
#ifndef _NET_HOTDATA_H
|
||||||
#define _NET_HOTDATA_H
|
#define _NET_HOTDATA_H
|
||||||
|
|
||||||
|
#include <linux/llist.h>
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/netdevice.h>
|
#include <linux/netdevice.h>
|
||||||
#include <net/protocol.h>
|
#include <net/protocol.h>
|
||||||
|
|
||||||
|
struct skb_defer_node {
|
||||||
|
struct llist_head defer_list;
|
||||||
|
atomic_long_t defer_count;
|
||||||
|
} ____cacheline_aligned_in_smp;
|
||||||
|
|
||||||
/* Read mostly data used in network fast paths. */
|
/* Read mostly data used in network fast paths. */
|
||||||
struct net_hotdata {
|
struct net_hotdata {
|
||||||
#if IS_ENABLED(CONFIG_INET)
|
#if IS_ENABLED(CONFIG_INET)
|
||||||
|
|
@ -30,6 +36,7 @@ struct net_hotdata {
|
||||||
struct rps_sock_flow_table __rcu *rps_sock_flow_table;
|
struct rps_sock_flow_table __rcu *rps_sock_flow_table;
|
||||||
u32 rps_cpu_mask;
|
u32 rps_cpu_mask;
|
||||||
#endif
|
#endif
|
||||||
|
struct skb_defer_node __percpu *skb_defer_nodes;
|
||||||
int gro_normal_batch;
|
int gro_normal_batch;
|
||||||
int netdev_budget;
|
int netdev_budget;
|
||||||
int netdev_budget_usecs;
|
int netdev_budget_usecs;
|
||||||
|
|
|
||||||
|
|
@ -5180,8 +5180,9 @@ static void napi_schedule_rps(struct softnet_data *sd)
|
||||||
__napi_schedule_irqoff(&mysd->backlog);
|
__napi_schedule_irqoff(&mysd->backlog);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu)
|
void kick_defer_list_purge(unsigned int cpu)
|
||||||
{
|
{
|
||||||
|
struct softnet_data *sd = &per_cpu(softnet_data, cpu);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
if (use_backlog_threads()) {
|
if (use_backlog_threads()) {
|
||||||
|
|
@ -6715,20 +6716,26 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(napi_complete_done);
|
EXPORT_SYMBOL(napi_complete_done);
|
||||||
|
|
||||||
static void skb_defer_free_flush(struct softnet_data *sd)
|
static void skb_defer_free_flush(void)
|
||||||
{
|
{
|
||||||
struct llist_node *free_list;
|
struct llist_node *free_list;
|
||||||
struct sk_buff *skb, *next;
|
struct sk_buff *skb, *next;
|
||||||
|
struct skb_defer_node *sdn;
|
||||||
|
int node;
|
||||||
|
|
||||||
if (llist_empty(&sd->defer_list))
|
for_each_node(node) {
|
||||||
return;
|
sdn = this_cpu_ptr(net_hotdata.skb_defer_nodes) + node;
|
||||||
atomic_long_set(&sd->defer_count, 0);
|
|
||||||
free_list = llist_del_all(&sd->defer_list);
|
if (llist_empty(&sdn->defer_list))
|
||||||
|
continue;
|
||||||
|
atomic_long_set(&sdn->defer_count, 0);
|
||||||
|
free_list = llist_del_all(&sdn->defer_list);
|
||||||
|
|
||||||
llist_for_each_entry_safe(skb, next, free_list, ll_node) {
|
llist_for_each_entry_safe(skb, next, free_list, ll_node) {
|
||||||
napi_consume_skb(skb, 1);
|
napi_consume_skb(skb, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(CONFIG_NET_RX_BUSY_POLL)
|
#if defined(CONFIG_NET_RX_BUSY_POLL)
|
||||||
|
|
||||||
|
|
@ -6854,7 +6861,7 @@ static void __napi_busy_loop(unsigned int napi_id,
|
||||||
if (work > 0)
|
if (work > 0)
|
||||||
__NET_ADD_STATS(dev_net(napi->dev),
|
__NET_ADD_STATS(dev_net(napi->dev),
|
||||||
LINUX_MIB_BUSYPOLLRXPACKETS, work);
|
LINUX_MIB_BUSYPOLLRXPACKETS, work);
|
||||||
skb_defer_free_flush(this_cpu_ptr(&softnet_data));
|
skb_defer_free_flush();
|
||||||
bpf_net_ctx_clear(bpf_net_ctx);
|
bpf_net_ctx_clear(bpf_net_ctx);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
|
|
||||||
|
|
@ -7713,7 +7720,7 @@ static void napi_threaded_poll_loop(struct napi_struct *napi)
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
net_rps_action_and_irq_enable(sd);
|
net_rps_action_and_irq_enable(sd);
|
||||||
}
|
}
|
||||||
skb_defer_free_flush(sd);
|
skb_defer_free_flush();
|
||||||
bpf_net_ctx_clear(bpf_net_ctx);
|
bpf_net_ctx_clear(bpf_net_ctx);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
|
|
||||||
|
|
@ -7755,7 +7762,7 @@ static __latent_entropy void net_rx_action(void)
|
||||||
for (;;) {
|
for (;;) {
|
||||||
struct napi_struct *n;
|
struct napi_struct *n;
|
||||||
|
|
||||||
skb_defer_free_flush(sd);
|
skb_defer_free_flush();
|
||||||
|
|
||||||
if (list_empty(&list)) {
|
if (list_empty(&list)) {
|
||||||
if (list_empty(&repoll)) {
|
if (list_empty(&repoll)) {
|
||||||
|
|
@ -12989,7 +12996,6 @@ static int __init net_dev_init(void)
|
||||||
sd->cpu = i;
|
sd->cpu = i;
|
||||||
#endif
|
#endif
|
||||||
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
|
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
|
||||||
init_llist_head(&sd->defer_list);
|
|
||||||
|
|
||||||
gro_init(&sd->backlog.gro);
|
gro_init(&sd->backlog.gro);
|
||||||
sd->backlog.poll = process_backlog;
|
sd->backlog.poll = process_backlog;
|
||||||
|
|
@ -12999,6 +13005,11 @@ static int __init net_dev_init(void)
|
||||||
if (net_page_pool_create(i))
|
if (net_page_pool_create(i))
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
net_hotdata.skb_defer_nodes =
|
||||||
|
__alloc_percpu(sizeof(struct skb_defer_node) * nr_node_ids,
|
||||||
|
__alignof__(struct skb_defer_node));
|
||||||
|
if (!net_hotdata.skb_defer_nodes)
|
||||||
|
goto out;
|
||||||
if (use_backlog_threads())
|
if (use_backlog_threads())
|
||||||
smpboot_register_percpu_thread(&backlog_threads);
|
smpboot_register_percpu_thread(&backlog_threads);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -357,7 +357,7 @@ static inline void napi_assert_will_not_race(const struct napi_struct *napi)
|
||||||
WARN_ON(READ_ONCE(napi->list_owner) != -1);
|
WARN_ON(READ_ONCE(napi->list_owner) != -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
|
void kick_defer_list_purge(unsigned int cpu);
|
||||||
|
|
||||||
#define XMIT_RECURSION_LIMIT 8
|
#define XMIT_RECURSION_LIMIT 8
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7185,9 +7185,9 @@ static void kfree_skb_napi_cache(struct sk_buff *skb)
|
||||||
*/
|
*/
|
||||||
void skb_attempt_defer_free(struct sk_buff *skb)
|
void skb_attempt_defer_free(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
|
struct skb_defer_node *sdn;
|
||||||
unsigned long defer_count;
|
unsigned long defer_count;
|
||||||
int cpu = skb->alloc_cpu;
|
int cpu = skb->alloc_cpu;
|
||||||
struct softnet_data *sd;
|
|
||||||
unsigned int defer_max;
|
unsigned int defer_max;
|
||||||
bool kick;
|
bool kick;
|
||||||
|
|
||||||
|
|
@ -7201,14 +7201,15 @@ nodefer: kfree_skb_napi_cache(skb);
|
||||||
DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
|
DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
|
||||||
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
|
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
|
||||||
|
|
||||||
sd = &per_cpu(softnet_data, cpu);
|
sdn = per_cpu_ptr(net_hotdata.skb_defer_nodes, cpu) + numa_node_id();
|
||||||
|
|
||||||
defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
|
defer_max = READ_ONCE(net_hotdata.sysctl_skb_defer_max);
|
||||||
defer_count = atomic_long_inc_return(&sd->defer_count);
|
defer_count = atomic_long_inc_return(&sdn->defer_count);
|
||||||
|
|
||||||
if (defer_count >= defer_max)
|
if (defer_count >= defer_max)
|
||||||
goto nodefer;
|
goto nodefer;
|
||||||
|
|
||||||
llist_add(&skb->ll_node, &sd->defer_list);
|
llist_add(&skb->ll_node, &sdn->defer_list);
|
||||||
|
|
||||||
/* Send an IPI every time queue reaches half capacity. */
|
/* Send an IPI every time queue reaches half capacity. */
|
||||||
kick = (defer_count - 1) == (defer_max >> 1);
|
kick = (defer_count - 1) == (defer_max >> 1);
|
||||||
|
|
@ -7217,7 +7218,7 @@ nodefer: kfree_skb_napi_cache(skb);
|
||||||
* if we are unlucky enough (this seems very unlikely).
|
* if we are unlucky enough (this seems very unlikely).
|
||||||
*/
|
*/
|
||||||
if (unlikely(kick))
|
if (unlikely(kick))
|
||||||
kick_defer_list_purge(sd, cpu);
|
kick_defer_list_purge(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
|
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue