mirror of https://github.com/torvalds/linux.git
idpf: do not linearize big TSO packets
idpf has a limit on number of scatter-gather frags that can be used per segment. Currently, idpf_tx_start() checks if the limit is hit and forces a linearization of the whole packet. This requires high order allocations that can fail under memory pressure. A full size BIG-TCP packet would require order-7 alocation on x86_64 :/ We can move the check earlier from idpf_features_check() for TSO packets, to force GSO in this case, removing the cost of a big copy. This means that a linearization will eventually happen with sizes smaller than one MSS. __idpf_chk_linearize() is renamed to idpf_chk_tso_segment() and moved to idpf_lib.c Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Przemek Kitszel <przemyslaw.kitszel@intel.com> Cc: Jacob Keller <jacob.e.keller@intel.com> Cc: Madhu Chittim <madhu.chittim@intel.com> Cc: Pavan Kumar Linga <pavan.kumar.linga@intel.com> Cc: Willem de Bruijn <willemb@google.com> Cc: Andrew Lunn <andrew+netdev@lunn.ch> Reviewed-by: Joshua Hay <joshua.a.hay@intel.com> Tested-by: Brian Vazquez <brianvv@google.com> Acked-by: Tony Nguyen <anthony.l.nguyen@intel.com> Link: https://patch.msgid.link/20250818195934.757936-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
cd31182c80
commit
02614eee26
|
|
@ -148,6 +148,7 @@ enum idpf_vport_state {
|
|||
* @link_speed_mbps: Link speed in mbps
|
||||
* @vport_idx: Relative vport index
|
||||
* @max_tx_hdr_size: Max header length hardware can support
|
||||
* @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
|
||||
* @state: See enum idpf_vport_state
|
||||
* @netstats: Packet and byte stats
|
||||
* @stats_lock: Lock to protect stats update
|
||||
|
|
@ -159,6 +160,7 @@ struct idpf_netdev_priv {
|
|||
u32 link_speed_mbps;
|
||||
u16 vport_idx;
|
||||
u16 max_tx_hdr_size;
|
||||
u16 tx_max_bufs;
|
||||
enum idpf_vport_state state;
|
||||
struct rtnl_link_stats64 netstats;
|
||||
spinlock_t stats_lock;
|
||||
|
|
|
|||
|
|
@ -776,6 +776,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
|
|||
np->vport_idx = vport->idx;
|
||||
np->vport_id = vport->vport_id;
|
||||
np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter);
|
||||
np->tx_max_bufs = idpf_get_max_tx_bufs(adapter);
|
||||
|
||||
spin_lock_init(&np->stats_lock);
|
||||
|
||||
|
|
@ -2271,6 +2272,92 @@ static int idpf_change_mtu(struct net_device *netdev, int new_mtu)
|
|||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* idpf_chk_tso_segment - Check skb is not using too many buffers
|
||||
* @skb: send buffer
|
||||
* @max_bufs: maximum number of buffers
|
||||
*
|
||||
* For TSO we need to count the TSO header and segment payload separately. As
|
||||
* such we need to check cases where we have max_bufs-1 fragments or more as we
|
||||
* can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1
|
||||
* for the segment payload in the first descriptor, and another max_buf-1 for
|
||||
* the fragments.
|
||||
*
|
||||
* Returns true if the packet needs to be software segmented by core stack.
|
||||
*/
|
||||
static bool idpf_chk_tso_segment(const struct sk_buff *skb,
|
||||
unsigned int max_bufs)
|
||||
{
|
||||
const struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||
const skb_frag_t *frag, *stale;
|
||||
int nr_frags, sum;
|
||||
|
||||
/* no need to check if number of frags is less than max_bufs - 1 */
|
||||
nr_frags = shinfo->nr_frags;
|
||||
if (nr_frags < (max_bufs - 1))
|
||||
return false;
|
||||
|
||||
/* We need to walk through the list and validate that each group
|
||||
* of max_bufs-2 fragments totals at least gso_size.
|
||||
*/
|
||||
nr_frags -= max_bufs - 2;
|
||||
frag = &shinfo->frags[0];
|
||||
|
||||
/* Initialize size to the negative value of gso_size minus 1. We use
|
||||
* this as the worst case scenario in which the frag ahead of us only
|
||||
* provides one byte which is why we are limited to max_bufs-2
|
||||
* descriptors for a single transmit as the header and previous
|
||||
* fragment are already consuming 2 descriptors.
|
||||
*/
|
||||
sum = 1 - shinfo->gso_size;
|
||||
|
||||
/* Add size of frags 0 through 4 to create our initial sum */
|
||||
sum += skb_frag_size(frag++);
|
||||
sum += skb_frag_size(frag++);
|
||||
sum += skb_frag_size(frag++);
|
||||
sum += skb_frag_size(frag++);
|
||||
sum += skb_frag_size(frag++);
|
||||
|
||||
/* Walk through fragments adding latest fragment, testing it, and
|
||||
* then removing stale fragments from the sum.
|
||||
*/
|
||||
for (stale = &shinfo->frags[0];; stale++) {
|
||||
int stale_size = skb_frag_size(stale);
|
||||
|
||||
sum += skb_frag_size(frag++);
|
||||
|
||||
/* The stale fragment may present us with a smaller
|
||||
* descriptor than the actual fragment size. To account
|
||||
* for that we need to remove all the data on the front and
|
||||
* figure out what the remainder would be in the last
|
||||
* descriptor associated with the fragment.
|
||||
*/
|
||||
if (stale_size > IDPF_TX_MAX_DESC_DATA) {
|
||||
int align_pad = -(skb_frag_off(stale)) &
|
||||
(IDPF_TX_MAX_READ_REQ_SIZE - 1);
|
||||
|
||||
sum -= align_pad;
|
||||
stale_size -= align_pad;
|
||||
|
||||
do {
|
||||
sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
|
||||
stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
|
||||
} while (stale_size > IDPF_TX_MAX_DESC_DATA);
|
||||
}
|
||||
|
||||
/* if sum is negative we failed to make sufficient progress */
|
||||
if (sum < 0)
|
||||
return true;
|
||||
|
||||
if (!nr_frags--)
|
||||
break;
|
||||
|
||||
sum -= stale_size;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* idpf_features_check - Validate packet conforms to limits
|
||||
* @skb: skb buffer
|
||||
|
|
@ -2292,12 +2379,15 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb,
|
|||
if (skb->ip_summed != CHECKSUM_PARTIAL)
|
||||
return features;
|
||||
|
||||
/* We cannot support GSO if the MSS is going to be less than
|
||||
* 88 bytes. If it is then we need to drop support for GSO.
|
||||
*/
|
||||
if (skb_is_gso(skb) &&
|
||||
(skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS))
|
||||
features &= ~NETIF_F_GSO_MASK;
|
||||
if (skb_is_gso(skb)) {
|
||||
/* We cannot support GSO if the MSS is going to be less than
|
||||
* 88 bytes. If it is then we need to drop support for GSO.
|
||||
*/
|
||||
if (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS)
|
||||
features &= ~NETIF_F_GSO_MASK;
|
||||
else if (idpf_chk_tso_segment(skb, np->tx_max_bufs))
|
||||
features &= ~NETIF_F_GSO_MASK;
|
||||
}
|
||||
|
||||
/* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */
|
||||
len = skb_network_offset(skb);
|
||||
|
|
|
|||
|
|
@ -16,8 +16,28 @@ struct idpf_tx_stash {
|
|||
#define idpf_tx_buf_compl_tag(buf) (*(u32 *)&(buf)->priv)
|
||||
LIBETH_SQE_CHECK_PRIV(u32);
|
||||
|
||||
static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
|
||||
unsigned int count);
|
||||
/**
|
||||
* idpf_chk_linearize - Check if skb exceeds max descriptors per packet
|
||||
* @skb: send buffer
|
||||
* @max_bufs: maximum scatter gather buffers for single packet
|
||||
* @count: number of buffers this packet needs
|
||||
*
|
||||
* Make sure we don't exceed maximum scatter gather buffers for a single
|
||||
* packet.
|
||||
* TSO case has been handled earlier from idpf_features_check().
|
||||
*/
|
||||
static bool idpf_chk_linearize(const struct sk_buff *skb,
|
||||
unsigned int max_bufs,
|
||||
unsigned int count)
|
||||
{
|
||||
if (likely(count <= max_bufs))
|
||||
return false;
|
||||
|
||||
if (skb_is_gso(skb))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* idpf_buf_lifo_push - push a buffer pointer onto stack
|
||||
|
|
@ -2627,111 +2647,6 @@ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* __idpf_chk_linearize - Check skb is not using too many buffers
|
||||
* @skb: send buffer
|
||||
* @max_bufs: maximum number of buffers
|
||||
*
|
||||
* For TSO we need to count the TSO header and segment payload separately. As
|
||||
* such we need to check cases where we have max_bufs-1 fragments or more as we
|
||||
* can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1
|
||||
* for the segment payload in the first descriptor, and another max_buf-1 for
|
||||
* the fragments.
|
||||
*/
|
||||
static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs)
|
||||
{
|
||||
const struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||
const skb_frag_t *frag, *stale;
|
||||
int nr_frags, sum;
|
||||
|
||||
/* no need to check if number of frags is less than max_bufs - 1 */
|
||||
nr_frags = shinfo->nr_frags;
|
||||
if (nr_frags < (max_bufs - 1))
|
||||
return false;
|
||||
|
||||
/* We need to walk through the list and validate that each group
|
||||
* of max_bufs-2 fragments totals at least gso_size.
|
||||
*/
|
||||
nr_frags -= max_bufs - 2;
|
||||
frag = &shinfo->frags[0];
|
||||
|
||||
/* Initialize size to the negative value of gso_size minus 1. We use
|
||||
* this as the worst case scenario in which the frag ahead of us only
|
||||
* provides one byte which is why we are limited to max_bufs-2
|
||||
* descriptors for a single transmit as the header and previous
|
||||
* fragment are already consuming 2 descriptors.
|
||||
*/
|
||||
sum = 1 - shinfo->gso_size;
|
||||
|
||||
/* Add size of frags 0 through 4 to create our initial sum */
|
||||
sum += skb_frag_size(frag++);
|
||||
sum += skb_frag_size(frag++);
|
||||
sum += skb_frag_size(frag++);
|
||||
sum += skb_frag_size(frag++);
|
||||
sum += skb_frag_size(frag++);
|
||||
|
||||
/* Walk through fragments adding latest fragment, testing it, and
|
||||
* then removing stale fragments from the sum.
|
||||
*/
|
||||
for (stale = &shinfo->frags[0];; stale++) {
|
||||
int stale_size = skb_frag_size(stale);
|
||||
|
||||
sum += skb_frag_size(frag++);
|
||||
|
||||
/* The stale fragment may present us with a smaller
|
||||
* descriptor than the actual fragment size. To account
|
||||
* for that we need to remove all the data on the front and
|
||||
* figure out what the remainder would be in the last
|
||||
* descriptor associated with the fragment.
|
||||
*/
|
||||
if (stale_size > IDPF_TX_MAX_DESC_DATA) {
|
||||
int align_pad = -(skb_frag_off(stale)) &
|
||||
(IDPF_TX_MAX_READ_REQ_SIZE - 1);
|
||||
|
||||
sum -= align_pad;
|
||||
stale_size -= align_pad;
|
||||
|
||||
do {
|
||||
sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
|
||||
stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED;
|
||||
} while (stale_size > IDPF_TX_MAX_DESC_DATA);
|
||||
}
|
||||
|
||||
/* if sum is negative we failed to make sufficient progress */
|
||||
if (sum < 0)
|
||||
return true;
|
||||
|
||||
if (!nr_frags--)
|
||||
break;
|
||||
|
||||
sum -= stale_size;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* idpf_chk_linearize - Check if skb exceeds max descriptors per packet
|
||||
* @skb: send buffer
|
||||
* @max_bufs: maximum scatter gather buffers for single packet
|
||||
* @count: number of buffers this packet needs
|
||||
*
|
||||
* Make sure we don't exceed maximum scatter gather buffers for a single
|
||||
* packet. We have to do some special checking around the boundary (max_bufs-1)
|
||||
* if TSO is on since we need count the TSO header and payload separately.
|
||||
* E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO
|
||||
* header, 1 for segment payload, and then 7 for the fragments.
|
||||
*/
|
||||
static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
|
||||
unsigned int count)
|
||||
{
|
||||
if (likely(count < max_bufs))
|
||||
return false;
|
||||
if (skb_is_gso(skb))
|
||||
return __idpf_chk_linearize(skb, max_bufs);
|
||||
|
||||
return count > max_bufs;
|
||||
}
|
||||
|
||||
/**
|
||||
* idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring
|
||||
|
|
|
|||
Loading…
Reference in New Issue