RDMA v6.19 merge window pull request

- Minor driver bug fixes and updates to cxgb4, rxe, rdmavt, bnxt_re, mlx5
 
 - Many bug fix patches for irdma
 
 - WQ_PERCPU annotations and system_dfl_wq changes
 
 - Improved mlx5 support for "other eswitches" and multiple PFs
 
 - 1600Gbps link speed reporting support. Four Digits Now!
 
 - New driver bng_en for latest generation Broadcom NICs
 
 - Bonding support for hns
 
 - Adjust mlx5's hmm based ODP to work with the very large address space
   created by the new 5 level paging default on x86
 
 - Lockdep fixups in rxe and siw
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCaS9AIgAKCRCFwuHvBreF
 YXS2AP99rRv3hue5jLELuDqyPOORTzVfpKfAOcR2V3l30AP3mwEA1j/jontz1Ak+
 oTNrL7Tv1rAxopB24yTLuFmIXEXHCAE=
 =3PJv
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has another new RDMA driver 'bng_en' for latest generation
  Broadcom NICs. There might be one more new driver still to come.

  Otherwise it is a fairly quite cycle. Summary:

   - Minor driver bug fixes and updates to cxgb4, rxe, rdmavt, bnxt_re,
     mlx5

   - Many bug fix patches for irdma

   - WQ_PERCPU annotations and system_dfl_wq changes

   - Improved mlx5 support for "other eswitches" and multiple PFs

   - 1600Gbps link speed reporting support. Four Digits Now!

   - New driver bng_en for latest generation Broadcom NICs

   - Bonding support for hns

   - Adjust mlx5's hmm based ODP to work with the very large address
     space created by the new 5 level paging default on x86

   - Lockdep fixups in rxe and siw"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (65 commits)
  RDMA/rxe: reclassify sockets in order to avoid false positives from lockdep
  RDMA/siw: reclassify sockets in order to avoid false positives from lockdep
  RDMA/bng_re: Remove prefetch instruction
  RDMA/core: Reduce cond_resched() frequency in __ib_umem_release
  RDMA/irdma: Fix SRQ shadow area address initialization
  RDMA/irdma: Remove doorbell elision logic
  RDMA/irdma: Do not set IBK_LOCAL_DMA_LKEY for GEN3+
  RDMA/irdma: Do not directly rely on IB_PD_UNSAFE_GLOBAL_RKEY
  RDMA/irdma: Add missing mutex destroy
  RDMA/irdma: Fix SIGBUS in AEQ destroy
  RDMA/irdma: Add a missing kfree of struct irdma_pci_f for GEN2
  RDMA/irdma: Fix data race in irdma_free_pble
  RDMA/irdma: Fix data race in irdma_sc_ccq_arm
  RDMA/mlx5: Add support for 1600_8x lane speed
  RDMA/core: Add new IB rate for XDR (8x) support
  IB/mlx5: Reduce IMR KSM size when 5-level paging is enabled
  RDMA/bnxt_re: Pass correct flag for dma mr creation
  RDMA/bnxt_re: Fix the inline size for GenP7 devices
  RDMA/hns: Support reset recovery for bond
  RDMA/hns: Support link state reporting for bond
  ...
This commit is contained in:
Linus Torvalds 2025-12-04 18:54:37 -08:00
commit 55aa394a5e
89 changed files with 5066 additions and 473 deletions

View File

@ -5243,6 +5243,13 @@ W: http://www.broadcom.com
F: drivers/infiniband/hw/bnxt_re/
F: include/uapi/rdma/bnxt_re-abi.h
BROADCOM 800 GIGABIT ROCE DRIVER
M: Siva Reddy Kallam <siva.kallam@broadcom.com>
L: linux-rdma@vger.kernel.org
S: Supported
W: http://www.broadcom.com
F: drivers/infiniband/hw/bng_re/
BROADCOM NVRAM DRIVER
M: Rafał Miłecki <zajec5@gmail.com>
L: linux-mips@vger.kernel.org

View File

@ -80,6 +80,7 @@ config INFINIBAND_VIRT_DMA
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
if !UML
source "drivers/infiniband/hw/bnxt_re/Kconfig"
source "drivers/infiniband/hw/bng_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/erdma/Kconfig"

View File

@ -34,7 +34,6 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */
#define CM_DIRECT_RETRY_CTX ((void *) 1UL)
#define CM_MRA_SETTING 24 /* 4.096us * 2^24 = ~68.7 seconds */
@ -1057,6 +1056,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
{
struct cm_id_private *cm_id_priv;
enum ib_cm_state old_state;
unsigned long timeout;
struct cm_work *work;
int ret;
@ -1167,10 +1167,9 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
cm_deref_id(cm_id_priv);
timeout = msecs_to_jiffies((cm_id_priv->max_cm_retries * cm_id_priv->timeout_ms * 5) / 4);
do {
ret = wait_for_completion_timeout(&cm_id_priv->comp,
msecs_to_jiffies(
CM_DESTROY_ID_WAIT_TIMEOUT));
ret = wait_for_completion_timeout(&cm_id_priv->comp, timeout);
if (!ret) /* timeout happened */
cm_destroy_id_wait_timeout(cm_id, old_state);
} while (!ret);
@ -4518,7 +4517,7 @@ static int __init ib_cm_init(void)
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
INIT_LIST_HEAD(&cm.timewait_list);
cm.wq = alloc_workqueue("ib_cm", 0, 1);
cm.wq = alloc_workqueue("ib_cm", WQ_PERCPU, 1);
if (!cm.wq) {
ret = -ENOMEM;
goto error2;

View File

@ -4475,6 +4475,8 @@ int rdma_connect_locked(struct rdma_cm_id *id,
container_of(id, struct rdma_id_private, id);
int ret;
lockdep_assert_held(&id_priv->handler_mutex);
if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;

View File

@ -3021,7 +3021,7 @@ static int __init ib_core_init(void)
{
int ret = -ENOMEM;
ib_wq = alloc_workqueue("infiniband", 0, 0);
ib_wq = alloc_workqueue("infiniband", WQ_PERCPU, 0);
if (!ib_wq)
return -ENOMEM;
@ -3031,7 +3031,7 @@ static int __init ib_core_init(void)
goto err;
ib_comp_wq = alloc_workqueue("ib-comp-wq",
WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS | WQ_PERCPU, 0);
if (!ib_comp_wq)
goto err_unbound;

View File

@ -175,7 +175,7 @@ void rdma_restrack_new(struct rdma_restrack_entry *res,
EXPORT_SYMBOL(rdma_restrack_new);
/**
* rdma_restrack_add() - add object to the reource tracking database
* rdma_restrack_add() - add object to the resource tracking database
* @res: resource entry
*/
void rdma_restrack_add(struct rdma_restrack_entry *res)
@ -277,7 +277,7 @@ int rdma_restrack_put(struct rdma_restrack_entry *res)
EXPORT_SYMBOL(rdma_restrack_put);
/**
* rdma_restrack_del() - delete object from the reource tracking database
* rdma_restrack_del() - delete object from the resource tracking database
* @res: resource entry
*/
void rdma_restrack_del(struct rdma_restrack_entry *res)

View File

@ -366,7 +366,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
xa_lock(&ctx_table);
if (xa_load(&ctx_table, ctx->id) == ctx)
queue_work(system_unbound_wq, &ctx->close_work);
queue_work(system_dfl_wq, &ctx->close_work);
xa_unlock(&ctx_table);
}
return 0;

View File

@ -45,6 +45,8 @@
#include "uverbs.h"
#define RESCHED_LOOP_CNT_THRESHOLD 0x1000
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
bool make_dirty = umem->writable && dirty;
@ -55,10 +57,14 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
DMA_BIDIRECTIONAL, 0);
for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i)
for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
unpin_user_page_range_dirty_lock(sg_page(sg),
DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
if (i && !(i % RESCHED_LOOP_CNT_THRESHOLD))
cond_resched();
}
sg_free_append_table(&umem->sgt_append);
}

View File

@ -148,6 +148,7 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
case IB_RATE_400_GBPS: return 160;
case IB_RATE_600_GBPS: return 240;
case IB_RATE_800_GBPS: return 320;
case IB_RATE_1600_GBPS: return 640;
default: return -1;
}
}
@ -178,6 +179,7 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
case 160: return IB_RATE_400_GBPS;
case 240: return IB_RATE_600_GBPS;
case 320: return IB_RATE_800_GBPS;
case 640: return IB_RATE_1600_GBPS;
default: return IB_RATE_PORT_CURRENT;
}
}
@ -208,6 +210,7 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
case IB_RATE_400_GBPS: return 425000;
case IB_RATE_600_GBPS: return 637500;
case IB_RATE_800_GBPS: return 850000;
case IB_RATE_1600_GBPS: return 1700000;
default: return -1;
}
}

View File

@ -13,5 +13,6 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns/
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
obj-$(CONFIG_INFINIBAND_BNG_RE) += bng_re/
obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/
obj-$(CONFIG_INFINIBAND_IONIC) += ionic/

View File

@ -0,0 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_BNG_RE
tristate "Broadcom Next generation RoCE HCA support"
depends on 64BIT
depends on INET && DCB && BNGE
help
This driver supports Broadcom Next generation
50/100/200/400/800 gigabit RoCE HCAs. The module
will be called bng_re. To compile this driver
as a module, choose M here.

View File

@ -0,0 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnge -I $(srctree)/drivers/infiniband/hw/bnxt_re
obj-$(CONFIG_INFINIBAND_BNG_RE) += bng_re.o
bng_re-y := bng_dev.o bng_fw.o \
bng_res.o bng_sp.o \
bng_debugfs.o

View File

@ -0,0 +1,39 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2025 Broadcom.
#include <linux/debugfs.h>
#include <linux/pci.h>
#include <rdma/ib_verbs.h>
#include "bng_res.h"
#include "bng_fw.h"
#include "bnge.h"
#include "bnge_auxr.h"
#include "bng_re.h"
#include "bng_debugfs.h"
static struct dentry *bng_re_debugfs_root;
void bng_re_debugfs_add_pdev(struct bng_re_dev *rdev)
{
struct pci_dev *pdev = rdev->aux_dev->pdev;
rdev->dbg_root =
debugfs_create_dir(dev_name(&pdev->dev), bng_re_debugfs_root);
}
void bng_re_debugfs_rem_pdev(struct bng_re_dev *rdev)
{
debugfs_remove_recursive(rdev->dbg_root);
rdev->dbg_root = NULL;
}
void bng_re_register_debugfs(void)
{
bng_re_debugfs_root = debugfs_create_dir("bng_re", NULL);
}
void bng_re_unregister_debugfs(void)
{
debugfs_remove(bng_re_debugfs_root);
}

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (c) 2025 Broadcom.
#ifndef __BNG_RE_DEBUGFS__
#define __BNG_RE_DEBUGFS__
void bng_re_debugfs_add_pdev(struct bng_re_dev *rdev);
void bng_re_debugfs_rem_pdev(struct bng_re_dev *rdev);
void bng_re_register_debugfs(void);
void bng_re_unregister_debugfs(void);
#endif

View File

@ -0,0 +1,534 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2025 Broadcom.
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/auxiliary_bus.h>
#include <rdma/ib_verbs.h>
#include "bng_res.h"
#include "bng_sp.h"
#include "bng_fw.h"
#include "bnge.h"
#include "bnge_auxr.h"
#include "bng_re.h"
#include "bnge_hwrm.h"
#include "bng_debugfs.h"
MODULE_AUTHOR("Siva Reddy Kallam <siva.kallam@broadcom.com>");
MODULE_DESCRIPTION(BNG_RE_DESC);
MODULE_LICENSE("Dual BSD/GPL");
static struct bng_re_dev *bng_re_dev_add(struct auxiliary_device *adev,
struct bnge_auxr_dev *aux_dev)
{
struct bng_re_dev *rdev;
/* Allocate bng_re_dev instance */
rdev = ib_alloc_device(bng_re_dev, ibdev);
if (!rdev) {
pr_err("%s: bng_re_dev allocation failure!", KBUILD_MODNAME);
return NULL;
}
/* Assign auxiliary device specific data */
rdev->netdev = aux_dev->net;
rdev->aux_dev = aux_dev;
rdev->adev = adev;
rdev->fn_id = rdev->aux_dev->pdev->devfn;
return rdev;
}
static int bng_re_register_netdev(struct bng_re_dev *rdev)
{
struct bnge_auxr_dev *aux_dev;
aux_dev = rdev->aux_dev;
return bnge_register_dev(aux_dev, rdev->adev);
}
static void bng_re_destroy_chip_ctx(struct bng_re_dev *rdev)
{
struct bng_re_chip_ctx *chip_ctx;
if (!rdev->chip_ctx)
return;
kfree(rdev->dev_attr);
rdev->dev_attr = NULL;
chip_ctx = rdev->chip_ctx;
rdev->chip_ctx = NULL;
rdev->rcfw.res = NULL;
rdev->bng_res.cctx = NULL;
rdev->bng_res.pdev = NULL;
kfree(chip_ctx);
}
static int bng_re_setup_chip_ctx(struct bng_re_dev *rdev)
{
struct bng_re_chip_ctx *chip_ctx;
struct bnge_auxr_dev *aux_dev;
int rc = -ENOMEM;
aux_dev = rdev->aux_dev;
rdev->bng_res.pdev = aux_dev->pdev;
rdev->rcfw.res = &rdev->bng_res;
chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
if (!chip_ctx)
return -ENOMEM;
chip_ctx->chip_num = aux_dev->chip_num;
chip_ctx->hw_stats_size = aux_dev->hw_ring_stats_size;
rdev->chip_ctx = chip_ctx;
rdev->bng_res.cctx = rdev->chip_ctx;
rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL);
if (!rdev->dev_attr)
goto free_chip_ctx;
rdev->bng_res.dattr = rdev->dev_attr;
return 0;
free_chip_ctx:
kfree(rdev->chip_ctx);
rdev->chip_ctx = NULL;
return rc;
}
static void bng_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
{
hdr->req_type = cpu_to_le16(opcd);
hdr->cmpl_ring = cpu_to_le16(-1);
hdr->target_id = cpu_to_le16(-1);
}
static void bng_re_fill_fw_msg(struct bnge_fw_msg *fw_msg, void *msg,
int msg_len, void *resp, int resp_max_len,
int timeout)
{
fw_msg->msg = msg;
fw_msg->msg_len = msg_len;
fw_msg->resp = resp;
fw_msg->resp_max_len = resp_max_len;
fw_msg->timeout = timeout;
}
static int bng_re_net_ring_free(struct bng_re_dev *rdev,
u16 fw_ring_id, int type)
{
struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
struct hwrm_ring_free_input req = {};
struct hwrm_ring_free_output resp;
struct bnge_fw_msg fw_msg = {};
int rc = -EINVAL;
if (!rdev)
return rc;
if (!aux_dev)
return rc;
bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
req.ring_type = type;
req.ring_id = cpu_to_le16(fw_ring_id);
bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
rc = bnge_send_msg(aux_dev, &fw_msg);
if (rc)
ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
req.ring_id, rc);
return rc;
}
static int bng_re_net_ring_alloc(struct bng_re_dev *rdev,
struct bng_re_ring_attr *ring_attr,
u16 *fw_ring_id)
{
struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
struct hwrm_ring_alloc_input req = {};
struct hwrm_ring_alloc_output resp;
struct bnge_fw_msg fw_msg = {};
int rc = -EINVAL;
if (!aux_dev)
return rc;
bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
req.enables = 0;
req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]);
if (ring_attr->pages > 1) {
/* Page size is in log2 units */
req.page_size = BNGE_PAGE_SHIFT;
req.page_tbl_depth = 1;
}
req.fbo = 0;
/* Association of ring index with doorbell index and MSIX number */
req.logical_id = cpu_to_le16(ring_attr->lrid);
req.length = cpu_to_le32(ring_attr->depth + 1);
req.ring_type = ring_attr->type;
req.int_mode = ring_attr->mode;
bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
rc = bnge_send_msg(aux_dev, &fw_msg);
if (!rc)
*fw_ring_id = le16_to_cpu(resp.ring_id);
return rc;
}
static int bng_re_stats_ctx_free(struct bng_re_dev *rdev)
{
struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
struct hwrm_stat_ctx_free_input req = {};
struct hwrm_stat_ctx_free_output resp = {};
struct bnge_fw_msg fw_msg = {};
int rc = -EINVAL;
if (!aux_dev)
return rc;
bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
req.stat_ctx_id = cpu_to_le32(rdev->stats_ctx.fw_id);
bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
rc = bnge_send_msg(aux_dev, &fw_msg);
if (rc)
ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
rc);
return rc;
}
static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev)
{
struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
struct bng_re_stats *stats = &rdev->stats_ctx;
struct hwrm_stat_ctx_alloc_output resp = {};
struct hwrm_stat_ctx_alloc_input req = {};
struct bnge_fw_msg fw_msg = {};
int rc = -EINVAL;
stats->fw_id = BNGE_INVALID_STATS_CTX_ID;
if (!aux_dev)
return rc;
bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
req.update_period_ms = cpu_to_le32(1000);
req.stats_dma_addr = cpu_to_le64(stats->dma_map);
req.stats_dma_length = cpu_to_le16(rdev->chip_ctx->hw_stats_size);
req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
rc = bnge_send_msg(aux_dev, &fw_msg);
if (!rc)
stats->fw_id = le32_to_cpu(resp.stat_ctx_id);
return rc;
}
static void bng_re_query_hwrm_version(struct bng_re_dev *rdev)
{
struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
struct hwrm_ver_get_output ver_get_resp = {};
struct hwrm_ver_get_input ver_get_req = {};
struct bng_re_chip_ctx *cctx;
struct bnge_fw_msg fw_msg = {};
int rc;
bng_re_init_hwrm_hdr((void *)&ver_get_req, HWRM_VER_GET);
ver_get_req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
ver_get_req.hwrm_intf_min = HWRM_VERSION_MINOR;
ver_get_req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
bng_re_fill_fw_msg(&fw_msg, (void *)&ver_get_req, sizeof(ver_get_req),
(void *)&ver_get_resp, sizeof(ver_get_resp),
BNGE_DFLT_HWRM_CMD_TIMEOUT);
rc = bnge_send_msg(aux_dev, &fw_msg);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
rc);
return;
}
cctx = rdev->chip_ctx;
cctx->hwrm_intf_ver =
(u64)le16_to_cpu(ver_get_resp.hwrm_intf_major) << 48 |
(u64)le16_to_cpu(ver_get_resp.hwrm_intf_minor) << 32 |
(u64)le16_to_cpu(ver_get_resp.hwrm_intf_build) << 16 |
le16_to_cpu(ver_get_resp.hwrm_intf_patch);
cctx->hwrm_cmd_max_timeout = le16_to_cpu(ver_get_resp.max_req_timeout);
if (!cctx->hwrm_cmd_max_timeout)
cctx->hwrm_cmd_max_timeout = BNG_ROCE_FW_MAX_TIMEOUT;
}
static void bng_re_dev_uninit(struct bng_re_dev *rdev)
{
int rc;
bng_re_debugfs_rem_pdev(rdev);
if (test_and_clear_bit(BNG_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
rc = bng_re_deinit_rcfw(&rdev->rcfw);
if (rc)
ibdev_warn(&rdev->ibdev,
"Failed to deinitialize RCFW: %#x", rc);
bng_re_stats_ctx_free(rdev);
bng_re_free_stats_ctx_mem(rdev->bng_res.pdev, &rdev->stats_ctx);
bng_re_disable_rcfw_channel(&rdev->rcfw);
bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id,
RING_ALLOC_REQ_RING_TYPE_NQ);
bng_re_free_rcfw_channel(&rdev->rcfw);
}
kfree(rdev->nqr);
rdev->nqr = NULL;
bng_re_destroy_chip_ctx(rdev);
if (test_and_clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
bnge_unregister_dev(rdev->aux_dev);
}
static int bng_re_dev_init(struct bng_re_dev *rdev)
{
struct bng_re_ring_attr rattr = {};
struct bng_re_creq_ctx *creq;
u32 db_offt;
int vid;
u8 type;
int rc;
/* Registered a new RoCE device instance to netdev */
rc = bng_re_register_netdev(rdev);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to register with netedev: %#x\n", rc);
return -EINVAL;
}
set_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
if (rdev->aux_dev->auxr_info->msix_requested < BNG_RE_MIN_MSIX) {
ibdev_err(&rdev->ibdev,
"RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
rdev->aux_dev->auxr_info->msix_requested);
bnge_unregister_dev(rdev->aux_dev);
clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
return -EINVAL;
}
ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
rdev->aux_dev->auxr_info->msix_requested);
rc = bng_re_setup_chip_ctx(rdev);
if (rc) {
bnge_unregister_dev(rdev->aux_dev);
clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
return -EINVAL;
}
bng_re_query_hwrm_version(rdev);
rc = bng_re_alloc_fw_channel(&rdev->bng_res, &rdev->rcfw);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate RCFW Channel: %#x\n", rc);
goto fail;
}
/* Allocate nq record memory */
rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL);
if (!rdev->nqr) {
bng_re_destroy_chip_ctx(rdev);
bnge_unregister_dev(rdev->aux_dev);
clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
return -ENOMEM;
}
rdev->nqr->num_msix = rdev->aux_dev->auxr_info->msix_requested;
memcpy(rdev->nqr->msix_entries, rdev->aux_dev->msix_info,
sizeof(struct bnge_msix_info) * rdev->nqr->num_msix);
type = RING_ALLOC_REQ_RING_TYPE_NQ;
creq = &rdev->rcfw.creq;
rattr.dma_arr = creq->hwq.pbl[BNG_PBL_LVL_0].pg_map_arr;
rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
rattr.type = type;
rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
rattr.depth = BNG_FW_CREQE_MAX_CNT - 1;
rattr.lrid = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].ring_idx;
rc = bng_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
goto free_rcfw;
}
db_offt = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].db_offset;
vid = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].vector;
rc = bng_re_enable_fw_channel(&rdev->rcfw,
vid, db_offt);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
rc);
goto free_ring;
}
rc = bng_re_get_dev_attr(&rdev->rcfw);
if (rc)
goto disable_rcfw;
bng_re_debugfs_add_pdev(rdev);
rc = bng_re_alloc_stats_ctx_mem(rdev->bng_res.pdev, rdev->chip_ctx,
&rdev->stats_ctx);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate stats context: %#x\n", rc);
goto disable_rcfw;
}
rc = bng_re_stats_ctx_alloc(rdev);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate QPLIB context: %#x\n", rc);
goto free_stats_ctx;
}
rc = bng_re_init_rcfw(&rdev->rcfw, &rdev->stats_ctx);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to initialize RCFW: %#x\n", rc);
goto free_sctx;
}
set_bit(BNG_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
return 0;
free_sctx:
bng_re_stats_ctx_free(rdev);
free_stats_ctx:
bng_re_free_stats_ctx_mem(rdev->bng_res.pdev, &rdev->stats_ctx);
disable_rcfw:
bng_re_disable_rcfw_channel(&rdev->rcfw);
free_ring:
bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
free_rcfw:
bng_re_free_rcfw_channel(&rdev->rcfw);
fail:
bng_re_dev_uninit(rdev);
return rc;
}
static int bng_re_add_device(struct auxiliary_device *adev)
{
struct bnge_auxr_priv *auxr_priv =
container_of(adev, struct bnge_auxr_priv, aux_dev);
struct bng_re_en_dev_info *dev_info;
struct bng_re_dev *rdev;
int rc;
dev_info = auxiliary_get_drvdata(adev);
rdev = bng_re_dev_add(adev, auxr_priv->auxr_dev);
if (!rdev) {
rc = -ENOMEM;
goto exit;
}
dev_info->rdev = rdev;
rc = bng_re_dev_init(rdev);
if (rc)
goto re_dev_dealloc;
return 0;
re_dev_dealloc:
ib_dealloc_device(&rdev->ibdev);
exit:
return rc;
}
static void bng_re_remove_device(struct bng_re_dev *rdev,
struct auxiliary_device *aux_dev)
{
bng_re_dev_uninit(rdev);
ib_dealloc_device(&rdev->ibdev);
}
static int bng_re_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct bnge_auxr_priv *aux_priv =
container_of(adev, struct bnge_auxr_priv, aux_dev);
struct bng_re_en_dev_info *en_info;
int rc;
en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
if (!en_info)
return -ENOMEM;
en_info->auxr_dev = aux_priv->auxr_dev;
auxiliary_set_drvdata(adev, en_info);
rc = bng_re_add_device(adev);
if (rc)
kfree(en_info);
return rc;
}
static void bng_re_remove(struct auxiliary_device *adev)
{
struct bng_re_en_dev_info *dev_info = auxiliary_get_drvdata(adev);
struct bng_re_dev *rdev;
rdev = dev_info->rdev;
if (rdev)
bng_re_remove_device(rdev, adev);
kfree(dev_info);
}
static const struct auxiliary_device_id bng_re_id_table[] = {
{ .name = BNG_RE_ADEV_NAME ".rdma", },
{},
};
MODULE_DEVICE_TABLE(auxiliary, bng_re_id_table);
static struct auxiliary_driver bng_re_driver = {
.name = "rdma",
.probe = bng_re_probe,
.remove = bng_re_remove,
.id_table = bng_re_id_table,
};
static int __init bng_re_mod_init(void)
{
int rc;
bng_re_register_debugfs();
rc = auxiliary_driver_register(&bng_re_driver);
if (rc) {
pr_err("%s: Failed to register auxiliary driver\n",
KBUILD_MODNAME);
goto unreg_debugfs;
}
return 0;
unreg_debugfs:
bng_re_unregister_debugfs();
return rc;
}
static void __exit bng_re_mod_exit(void)
{
auxiliary_driver_unregister(&bng_re_driver);
bng_re_unregister_debugfs();
}
module_init(bng_re_mod_init);
module_exit(bng_re_mod_exit);

View File

@ -0,0 +1,767 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2025 Broadcom.
#include <linux/pci.h>
#include "roce_hsi.h"
#include "bng_res.h"
#include "bng_fw.h"
#include "bng_sp.h"
/**
* bng_re_map_rc - map return type based on opcode
* @opcode: roce slow path opcode
*
* case #1
* Firmware initiated error recovery is a safe state machine and
* driver can consider all the underlying rdma resources are free.
* In this state, it is safe to return success for opcodes related to
* destroying rdma resources (like destroy qp, destroy cq etc.).
*
* case #2
* If driver detect potential firmware stall, it is not safe state machine
* and the driver can not consider all the underlying rdma resources are
* freed.
* In this state, it is not safe to return success for opcodes related to
* destroying rdma resources (like destroy qp, destroy cq etc.).
*
* Scope of this helper function is only for case #1.
*
* Returns:
* 0 to communicate success to caller.
* Non zero error code to communicate failure to caller.
*/
static int bng_re_map_rc(u8 opcode)
{
switch (opcode) {
case CMDQ_BASE_OPCODE_DESTROY_QP:
case CMDQ_BASE_OPCODE_DESTROY_SRQ:
case CMDQ_BASE_OPCODE_DESTROY_CQ:
case CMDQ_BASE_OPCODE_DEALLOCATE_KEY:
case CMDQ_BASE_OPCODE_DEREGISTER_MR:
case CMDQ_BASE_OPCODE_DELETE_GID:
case CMDQ_BASE_OPCODE_DESTROY_QP1:
case CMDQ_BASE_OPCODE_DESTROY_AH:
case CMDQ_BASE_OPCODE_DEINITIALIZE_FW:
case CMDQ_BASE_OPCODE_MODIFY_ROCE_CC:
case CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE:
return 0;
default:
return -ETIMEDOUT;
}
}
void bng_re_free_rcfw_channel(struct bng_re_rcfw *rcfw)
{
kfree(rcfw->crsqe_tbl);
bng_re_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
bng_re_free_hwq(rcfw->res, &rcfw->creq.hwq);
rcfw->pdev = NULL;
}
int bng_re_alloc_fw_channel(struct bng_re_res *res,
struct bng_re_rcfw *rcfw)
{
struct bng_re_hwq_attr hwq_attr = {};
struct bng_re_sg_info sginfo = {};
struct bng_re_cmdq_ctx *cmdq;
struct bng_re_creq_ctx *creq;
rcfw->pdev = res->pdev;
cmdq = &rcfw->cmdq;
creq = &rcfw->creq;
rcfw->res = res;
sginfo.pgsize = PAGE_SIZE;
sginfo.pgshft = PAGE_SHIFT;
hwq_attr.sginfo = &sginfo;
hwq_attr.res = rcfw->res;
hwq_attr.depth = BNG_FW_CREQE_MAX_CNT;
hwq_attr.stride = BNG_FW_CREQE_UNITS;
hwq_attr.type = BNG_HWQ_TYPE_QUEUE;
if (bng_re_alloc_init_hwq(&creq->hwq, &hwq_attr)) {
dev_err(&rcfw->pdev->dev,
"HW channel CREQ allocation failed\n");
goto fail;
}
rcfw->cmdq_depth = BNG_FW_CMDQE_MAX_CNT;
sginfo.pgsize = bng_fw_cmdqe_page_size(rcfw->cmdq_depth);
hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF;
hwq_attr.stride = BNG_FW_CMDQE_UNITS;
hwq_attr.type = BNG_HWQ_TYPE_CTX;
if (bng_re_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) {
dev_err(&rcfw->pdev->dev,
"HW channel CMDQ allocation failed\n");
goto fail;
}
rcfw->crsqe_tbl = kcalloc(cmdq->hwq.max_elements,
sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
if (!rcfw->crsqe_tbl)
goto fail;
spin_lock_init(&rcfw->tbl_lock);
rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
return 0;
fail:
bng_re_free_rcfw_channel(rcfw);
return -ENOMEM;
}
static int bng_re_process_qp_event(struct bng_re_rcfw *rcfw,
struct creq_qp_event *qp_event,
u32 *num_wait)
{
struct bng_re_hwq *hwq = &rcfw->cmdq.hwq;
struct bng_re_crsqe *crsqe;
u32 req_size;
u16 cookie;
bool is_waiter_alive;
struct pci_dev *pdev;
u32 wait_cmds = 0;
int rc = 0;
pdev = rcfw->pdev;
switch (qp_event->event) {
case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
dev_err(&pdev->dev, "Received QP error notification\n");
break;
default:
/*
* Command Response
* cmdq->lock needs to be acquired to synchronie
* the command send and completion reaping. This function
* is always called with creq->lock held. Using
* the nested variant of spin_lock.
*
*/
spin_lock_nested(&hwq->lock, SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie);
cookie &= BNG_FW_MAX_COOKIE_VALUE;
crsqe = &rcfw->crsqe_tbl[cookie];
if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED,
&rcfw->cmdq.flags),
"Unreponsive rcfw channel detected.!!")) {
dev_info(&pdev->dev,
"rcfw timedout: cookie = %#x, free_slots = %d",
cookie, crsqe->free_slots);
spin_unlock(&hwq->lock);
return rc;
}
if (crsqe->is_waiter_alive) {
if (crsqe->resp) {
memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
/* Insert write memory barrier to ensure that
* response data is copied before clearing the
* flags
*/
smp_wmb();
}
}
wait_cmds++;
req_size = crsqe->req_size;
is_waiter_alive = crsqe->is_waiter_alive;
crsqe->req_size = 0;
if (!is_waiter_alive)
crsqe->resp = NULL;
crsqe->is_in_used = false;
hwq->cons += req_size;
spin_unlock(&hwq->lock);
}
*num_wait += wait_cmds;
return rc;
}
/* function events */
static int bng_re_process_func_event(struct bng_re_rcfw *rcfw,
struct creq_func_event *func_event)
{
switch (func_event->event) {
case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
case CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST:
case CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED:
break;
default:
return -EINVAL;
}
return 0;
}
/* CREQ Completion handlers */
static void bng_re_service_creq(struct tasklet_struct *t)
{
struct bng_re_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet);
struct bng_re_creq_ctx *creq = &rcfw->creq;
u32 type, budget = BNG_FW_CREQ_ENTRY_POLL_BUDGET;
struct bng_re_hwq *hwq = &creq->hwq;
struct creq_base *creqe;
u32 num_wakeup = 0;
u32 hw_polled = 0;
/* Service the CREQ until budget is over */
spin_lock_bh(&hwq->lock);
while (budget > 0) {
creqe = bng_re_get_qe(hwq, hwq->cons, NULL);
if (!BNG_FW_CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
break;
/* The valid test of the entry must be done first before
* reading any further.
*/
dma_rmb();
type = creqe->type & CREQ_BASE_TYPE_MASK;
switch (type) {
case CREQ_BASE_TYPE_QP_EVENT:
bng_re_process_qp_event
(rcfw, (struct creq_qp_event *)creqe,
&num_wakeup);
creq->stats.creq_qp_event_processed++;
break;
case CREQ_BASE_TYPE_FUNC_EVENT:
if (!bng_re_process_func_event
(rcfw, (struct creq_func_event *)creqe))
creq->stats.creq_func_event_processed++;
else
dev_warn(&rcfw->pdev->dev,
"aeqe:%#x Not handled\n", type);
break;
default:
if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
dev_warn(&rcfw->pdev->dev,
"creqe with event 0x%x not handled\n",
type);
break;
}
budget--;
hw_polled++;
bng_re_hwq_incr_cons(hwq->max_elements, &hwq->cons,
1, &creq->creq_db.dbinfo.flags);
}
if (hw_polled)
bng_re_ring_nq_db(&creq->creq_db.dbinfo,
rcfw->res->cctx, true);
spin_unlock_bh(&hwq->lock);
if (num_wakeup)
wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
}
static int __send_message_basic_sanity(struct bng_re_rcfw *rcfw,
struct bng_re_cmdqmsg *msg,
u8 opcode)
{
struct bng_re_cmdq_ctx *cmdq;
cmdq = &rcfw->cmdq;
if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
return -ETIMEDOUT;
if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
dev_err(&rcfw->pdev->dev, "RCFW already initialized!");
return -EINVAL;
}
if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
(opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
dev_err(&rcfw->pdev->dev,
"RCFW not initialized, reject opcode 0x%x",
opcode);
return -EOPNOTSUPP;
}
return 0;
}
static int __send_message(struct bng_re_rcfw *rcfw,
struct bng_re_cmdqmsg *msg, u8 opcode)
{
u32 bsize, free_slots, required_slots;
struct bng_re_cmdq_ctx *cmdq;
struct bng_re_crsqe *crsqe;
struct bng_fw_cmdqe *cmdqe;
struct bng_re_hwq *hwq;
u32 sw_prod, cmdq_prod;
struct pci_dev *pdev;
u16 cookie;
u8 *preq;
cmdq = &rcfw->cmdq;
hwq = &cmdq->hwq;
pdev = rcfw->pdev;
/* Cmdq are in 16-byte units, each request can consume 1 or more
* cmdqe
*/
spin_lock_bh(&hwq->lock);
required_slots = bng_re_get_cmd_slots(msg->req);
free_slots = HWQ_FREE_SLOTS(hwq);
cookie = cmdq->seq_num & BNG_FW_MAX_COOKIE_VALUE;
crsqe = &rcfw->crsqe_tbl[cookie];
if (required_slots >= free_slots) {
dev_info_ratelimited(&pdev->dev,
"CMDQ is full req/free %d/%d!",
required_slots, free_slots);
spin_unlock_bh(&hwq->lock);
return -EAGAIN;
}
__set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
bsize = bng_re_set_cmd_slots(msg->req);
crsqe->free_slots = free_slots;
crsqe->resp = (struct creq_qp_event *)msg->resp;
crsqe->is_waiter_alive = true;
crsqe->is_in_used = true;
crsqe->opcode = opcode;
crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
struct bng_re_rcfw_sbuf *sbuf = msg->sb;
__set_cmdq_base_resp_addr(msg->req, msg->req_sz,
cpu_to_le64(sbuf->dma_addr));
__set_cmdq_base_resp_size(msg->req, msg->req_sz,
ALIGN(sbuf->size,
BNG_FW_CMDQE_UNITS) /
BNG_FW_CMDQE_UNITS);
}
preq = (u8 *)msg->req;
do {
/* Locate the next cmdq slot */
sw_prod = HWQ_CMP(hwq->prod, hwq);
cmdqe = bng_re_get_qe(hwq, sw_prod, NULL);
/* Copy a segment of the req cmd to the cmdq */
memset(cmdqe, 0, sizeof(*cmdqe));
memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
preq += min_t(u32, bsize, sizeof(*cmdqe));
bsize -= min_t(u32, bsize, sizeof(*cmdqe));
hwq->prod++;
} while (bsize > 0);
cmdq->seq_num++;
cmdq_prod = hwq->prod & 0xFFFF;
if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) {
/* The very first doorbell write
* is required to set this flag
* which prompts the FW to reset
* its internal pointers
*/
cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG);
clear_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
}
/* ring CMDQ DB */
wmb();
writel(cmdq_prod, cmdq->cmdq_mbox.prod);
writel(BNG_FW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
spin_unlock_bh(&hwq->lock);
/* Return the CREQ response pointer */
return 0;
}
/**
* __wait_for_resp - Don't hold the cpu context and wait for response
* @rcfw: rcfw channel instance of rdev
* @cookie: cookie to track the command
*
* Wait for command completion in sleepable context.
*
* Returns:
* 0 if command is completed by firmware.
* Non zero error code for rest of the case.
*/
static int __wait_for_resp(struct bng_re_rcfw *rcfw, u16 cookie)
{
struct bng_re_cmdq_ctx *cmdq;
struct bng_re_crsqe *crsqe;
cmdq = &rcfw->cmdq;
crsqe = &rcfw->crsqe_tbl[cookie];
do {
wait_event_timeout(cmdq->waitq,
!crsqe->is_in_used,
secs_to_jiffies(rcfw->max_timeout));
if (!crsqe->is_in_used)
return 0;
bng_re_service_creq(&rcfw->creq.creq_tasklet);
if (!crsqe->is_in_used)
return 0;
} while (true);
};
/**
* bng_re_rcfw_send_message - interface to send
* and complete rcfw command.
* @rcfw: rcfw channel instance of rdev
* @msg: message to send
*
* This function does not account shadow queue depth. It will send
* all the command unconditionally as long as send queue is not full.
*
* Returns:
* 0 if command completed by firmware.
* Non zero if the command is not completed by firmware.
*/
int bng_re_rcfw_send_message(struct bng_re_rcfw *rcfw,
struct bng_re_cmdqmsg *msg)
{
struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
struct bng_re_crsqe *crsqe;
u16 cookie;
int rc;
u8 opcode;
opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
rc = __send_message_basic_sanity(rcfw, msg, opcode);
if (rc)
return rc == -ENXIO ? bng_re_map_rc(opcode) : rc;
rc = __send_message(rcfw, msg, opcode);
if (rc)
return rc;
cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
& BNG_FW_MAX_COOKIE_VALUE;
rc = __wait_for_resp(rcfw, cookie);
if (rc) {
spin_lock_bh(&rcfw->cmdq.hwq.lock);
crsqe = &rcfw->crsqe_tbl[cookie];
crsqe->is_waiter_alive = false;
if (rc == -ENODEV)
set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags);
spin_unlock_bh(&rcfw->cmdq.hwq.lock);
return -ETIMEDOUT;
}
if (evnt->status) {
/* failed with status */
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
cookie, opcode, evnt->status);
rc = -EIO;
}
return rc;
}
static int bng_re_map_cmdq_mbox(struct bng_re_rcfw *rcfw)
{
struct bng_re_cmdq_mbox *mbox;
resource_size_t bar_reg;
struct pci_dev *pdev;
pdev = rcfw->pdev;
mbox = &rcfw->cmdq.cmdq_mbox;
mbox->reg.bar_id = BNG_FW_COMM_PCI_BAR_REGION;
mbox->reg.len = BNG_FW_COMM_SIZE;
mbox->reg.bar_base = pci_resource_start(pdev, mbox->reg.bar_id);
if (!mbox->reg.bar_base) {
dev_err(&pdev->dev,
"CMDQ BAR region %d resc start is 0!\n",
mbox->reg.bar_id);
return -ENOMEM;
}
bar_reg = mbox->reg.bar_base + BNG_FW_COMM_BASE_OFFSET;
mbox->reg.len = BNG_FW_COMM_SIZE;
mbox->reg.bar_reg = ioremap(bar_reg, mbox->reg.len);
if (!mbox->reg.bar_reg) {
dev_err(&pdev->dev,
"CMDQ BAR region %d mapping failed\n",
mbox->reg.bar_id);
return -ENOMEM;
}
mbox->prod = (void __iomem *)(mbox->reg.bar_reg +
BNG_FW_PF_VF_COMM_PROD_OFFSET);
mbox->db = (void __iomem *)(mbox->reg.bar_reg + BNG_FW_COMM_TRIG_OFFSET);
return 0;
}
static irqreturn_t bng_re_creq_irq(int irq, void *dev_instance)
{
struct bng_re_rcfw *rcfw = dev_instance;
struct bng_re_creq_ctx *creq;
struct bng_re_hwq *hwq;
u32 sw_cons;
creq = &rcfw->creq;
hwq = &creq->hwq;
/* Prefetch the CREQ element */
sw_cons = HWQ_CMP(hwq->cons, hwq);
bng_re_get_qe(hwq, sw_cons, NULL);
tasklet_schedule(&creq->creq_tasklet);
return IRQ_HANDLED;
}
int bng_re_rcfw_start_irq(struct bng_re_rcfw *rcfw, int msix_vector,
bool need_init)
{
struct bng_re_creq_ctx *creq;
struct bng_re_res *res;
int rc;
creq = &rcfw->creq;
res = rcfw->res;
if (creq->irq_handler_avail)
return -EFAULT;
creq->msix_vec = msix_vector;
if (need_init)
tasklet_setup(&creq->creq_tasklet, bng_re_service_creq);
else
tasklet_enable(&creq->creq_tasklet);
creq->irq_name = kasprintf(GFP_KERNEL, "bng_re-creq@pci:%s",
pci_name(res->pdev));
if (!creq->irq_name)
return -ENOMEM;
rc = request_irq(creq->msix_vec, bng_re_creq_irq, 0,
creq->irq_name, rcfw);
if (rc) {
kfree(creq->irq_name);
creq->irq_name = NULL;
tasklet_disable(&creq->creq_tasklet);
return rc;
}
creq->irq_handler_avail = true;
bng_re_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true);
atomic_inc(&rcfw->rcfw_intr_enabled);
return 0;
}
static int bng_re_map_creq_db(struct bng_re_rcfw *rcfw, u32 reg_offt)
{
struct bng_re_creq_db *creq_db;
resource_size_t bar_reg;
struct pci_dev *pdev;
pdev = rcfw->pdev;
creq_db = &rcfw->creq.creq_db;
creq_db->dbinfo.flags = 0;
creq_db->reg.bar_id = BNG_FW_COMM_CONS_PCI_BAR_REGION;
creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
if (!creq_db->reg.bar_id)
dev_err(&pdev->dev,
"CREQ BAR region %d resc start is 0!",
creq_db->reg.bar_id);
bar_reg = creq_db->reg.bar_base + reg_offt;
creq_db->reg.len = BNG_FW_CREQ_DB_LEN;
creq_db->reg.bar_reg = ioremap(bar_reg, creq_db->reg.len);
if (!creq_db->reg.bar_reg) {
dev_err(&pdev->dev,
"CREQ BAR region %d mapping failed",
creq_db->reg.bar_id);
return -ENOMEM;
}
creq_db->dbinfo.db = creq_db->reg.bar_reg;
creq_db->dbinfo.hwq = &rcfw->creq.hwq;
creq_db->dbinfo.xid = rcfw->creq.ring_id;
return 0;
}
void bng_re_rcfw_stop_irq(struct bng_re_rcfw *rcfw, bool kill)
{
struct bng_re_creq_ctx *creq;
creq = &rcfw->creq;
if (!creq->irq_handler_avail)
return;
creq->irq_handler_avail = false;
/* Mask h/w interrupts */
bng_re_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
/* Sync with last running IRQ-handler */
synchronize_irq(creq->msix_vec);
free_irq(creq->msix_vec, rcfw);
kfree(creq->irq_name);
creq->irq_name = NULL;
atomic_set(&rcfw->rcfw_intr_enabled, 0);
if (kill)
tasklet_kill(&creq->creq_tasklet);
tasklet_disable(&creq->creq_tasklet);
}
void bng_re_disable_rcfw_channel(struct bng_re_rcfw *rcfw)
{
struct bng_re_creq_ctx *creq;
struct bng_re_cmdq_ctx *cmdq;
creq = &rcfw->creq;
cmdq = &rcfw->cmdq;
/* Make sure the HW channel is stopped! */
bng_re_rcfw_stop_irq(rcfw, true);
iounmap(cmdq->cmdq_mbox.reg.bar_reg);
iounmap(creq->creq_db.reg.bar_reg);
cmdq->cmdq_mbox.reg.bar_reg = NULL;
creq->creq_db.reg.bar_reg = NULL;
creq->msix_vec = 0;
}
static void bng_re_start_rcfw(struct bng_re_rcfw *rcfw)
{
struct bng_re_cmdq_ctx *cmdq;
struct bng_re_creq_ctx *creq;
struct bng_re_cmdq_mbox *mbox;
struct cmdq_init init = {0};
cmdq = &rcfw->cmdq;
creq = &rcfw->creq;
mbox = &cmdq->cmdq_mbox;
init.cmdq_pbl = cpu_to_le64(cmdq->hwq.pbl[BNG_PBL_LVL_0].pg_map_arr[0]);
init.cmdq_size_cmdq_lvl =
cpu_to_le16(((rcfw->cmdq_depth <<
CMDQ_INIT_CMDQ_SIZE_SFT) &
CMDQ_INIT_CMDQ_SIZE_MASK) |
((cmdq->hwq.level <<
CMDQ_INIT_CMDQ_LVL_SFT) &
CMDQ_INIT_CMDQ_LVL_MASK));
init.creq_ring_id = cpu_to_le16(creq->ring_id);
/* Write to the mailbox register */
__iowrite32_copy(mbox->reg.bar_reg, &init, sizeof(init) / 4);
}
int bng_re_enable_fw_channel(struct bng_re_rcfw *rcfw,
int msix_vector,
int cp_bar_reg_off)
{
struct bng_re_cmdq_ctx *cmdq;
int rc;
cmdq = &rcfw->cmdq;
/* Assign defaults */
cmdq->seq_num = 0;
set_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
init_waitqueue_head(&cmdq->waitq);
rc = bng_re_map_cmdq_mbox(rcfw);
if (rc)
return rc;
rc = bng_re_map_creq_db(rcfw, cp_bar_reg_off);
if (rc)
return rc;
rc = bng_re_rcfw_start_irq(rcfw, msix_vector, true);
if (rc) {
dev_err(&rcfw->pdev->dev,
"Failed to request IRQ for CREQ rc = 0x%x\n", rc);
bng_re_disable_rcfw_channel(rcfw);
return rc;
}
bng_re_start_rcfw(rcfw);
return 0;
}
int bng_re_deinit_rcfw(struct bng_re_rcfw *rcfw)
{
struct creq_deinitialize_fw_resp resp = {};
struct cmdq_deinitialize_fw req = {};
struct bng_re_cmdqmsg msg = {};
int rc;
bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_DEINITIALIZE_FW,
sizeof(req));
bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL,
sizeof(req), sizeof(resp), 0);
rc = bng_re_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
return 0;
}
static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
{
return dev_cap_flags &
(CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED |
CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED);
}
#define BNG_RE_HW_RETX(a) _is_hw_retx_supported((a))
static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
{
return dev_cap_ext_flags2 &
CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
}
int bng_re_init_rcfw(struct bng_re_rcfw *rcfw,
struct bng_re_stats *stats_ctx)
{
struct creq_initialize_fw_resp resp = {};
struct cmdq_initialize_fw req = {};
struct bng_re_cmdqmsg msg = {};
int rc;
u16 flags = 0;
bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_INITIALIZE_FW,
sizeof(req));
/* Supply (log-base-2-of-host-page-size - base-page-shift)
* to bono to adjust the doorbell page sizes.
*/
req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
BNG_FW_DBR_BASE_PAGE_SHIFT);
if (BNG_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED;
if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
req.flags |= cpu_to_le16(flags);
req.stat_ctx_id = cpu_to_le32(stats_ctx->fw_id);
bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
rc = bng_re_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
return 0;
}

View File

@ -0,0 +1,211 @@
/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (c) 2025 Broadcom.
#ifndef __BNG_FW_H__
#define __BNG_FW_H__
#include "bng_tlv.h"
/* FW DB related */
#define BNG_FW_CMDQ_TRIG_VAL 1
#define BNG_FW_COMM_PCI_BAR_REGION 0
#define BNG_FW_COMM_CONS_PCI_BAR_REGION 2
#define BNG_FW_DBR_BASE_PAGE_SHIFT 12
#define BNG_FW_COMM_SIZE 0x104
#define BNG_FW_COMM_BASE_OFFSET 0x600
#define BNG_FW_COMM_TRIG_OFFSET 0x100
#define BNG_FW_PF_VF_COMM_PROD_OFFSET 0xc
#define BNG_FW_CREQ_DB_LEN 8
/* CREQ */
#define BNG_FW_CREQE_MAX_CNT (64 * 1024)
#define BNG_FW_CREQE_UNITS 16
#define BNG_FW_CREQ_ENTRY_POLL_BUDGET 0x100
#define BNG_FW_CREQ_CMP_VALID(hdr, pass) \
(!!((hdr)->v & CREQ_BASE_V) == \
!((pass) & BNG_RE_FLAG_EPOCH_CONS_MASK))
#define BNG_FW_CREQ_ENTRY_POLL_BUDGET 0x100
/* CMDQ */
struct bng_fw_cmdqe {
u8 data[16];
};
#define BNG_FW_CMDQE_MAX_CNT 8192
#define BNG_FW_CMDQE_UNITS sizeof(struct bng_fw_cmdqe)
#define BNG_FW_CMDQE_BYTES(depth) ((depth) * BNG_FW_CMDQE_UNITS)
#define BNG_FW_MAX_COOKIE_VALUE (BNG_FW_CMDQE_MAX_CNT - 1)
#define BNG_FW_CMD_IS_BLOCKING 0x8000
/* Crsq buf is 1024-Byte */
struct bng_re_crsbe {
u8 data[1024];
};
static inline u32 bng_fw_cmdqe_npages(u32 depth)
{
u32 npages;
npages = BNG_FW_CMDQE_BYTES(depth) / PAGE_SIZE;
if (BNG_FW_CMDQE_BYTES(depth) % PAGE_SIZE)
npages++;
return npages;
}
static inline u32 bng_fw_cmdqe_page_size(u32 depth)
{
return (bng_fw_cmdqe_npages(depth) * PAGE_SIZE);
}
struct bng_re_cmdq_mbox {
struct bng_re_reg_desc reg;
void __iomem *prod;
void __iomem *db;
};
/* HWQ */
struct bng_re_cmdq_ctx {
struct bng_re_hwq hwq;
struct bng_re_cmdq_mbox cmdq_mbox;
unsigned long flags;
#define FIRMWARE_INITIALIZED_FLAG (0)
#define FIRMWARE_STALL_DETECTED (3)
#define FIRMWARE_FIRST_FLAG (31)
wait_queue_head_t waitq;
u32 seq_num;
};
struct bng_re_creq_db {
struct bng_re_reg_desc reg;
struct bng_re_db_info dbinfo;
};
struct bng_re_creq_stat {
u64 creq_qp_event_processed;
u64 creq_func_event_processed;
};
struct bng_re_creq_ctx {
struct bng_re_hwq hwq;
struct bng_re_creq_db creq_db;
struct bng_re_creq_stat stats;
struct tasklet_struct creq_tasklet;
u16 ring_id;
int msix_vec;
bool irq_handler_avail;
char *irq_name;
};
struct bng_re_crsqe {
struct creq_qp_event *resp;
u32 req_size;
/* Free slots at the time of submission */
u32 free_slots;
u8 opcode;
bool is_waiter_alive;
bool is_in_used;
};
struct bng_re_rcfw_sbuf {
void *sb;
dma_addr_t dma_addr;
u32 size;
};
/* RoCE FW Communication Channels */
struct bng_re_rcfw {
struct pci_dev *pdev;
struct bng_re_res *res;
struct bng_re_cmdq_ctx cmdq;
struct bng_re_creq_ctx creq;
struct bng_re_crsqe *crsqe_tbl;
/* To synchronize the qp-handle hash table */
spinlock_t tbl_lock;
u32 cmdq_depth;
/* cached from chip cctx for quick reference in slow path */
u16 max_timeout;
atomic_t rcfw_intr_enabled;
};
struct bng_re_cmdqmsg {
struct cmdq_base *req;
struct creq_base *resp;
void *sb;
u32 req_sz;
u32 res_sz;
u8 block;
};
static inline void bng_re_rcfw_cmd_prep(struct cmdq_base *req,
u8 opcode, u8 cmd_size)
{
req->opcode = opcode;
req->cmd_size = cmd_size;
}
static inline void bng_re_fill_cmdqmsg(struct bng_re_cmdqmsg *msg,
void *req, void *resp, void *sb,
u32 req_sz, u32 res_sz, u8 block)
{
msg->req = req;
msg->resp = resp;
msg->sb = sb;
msg->req_sz = req_sz;
msg->res_sz = res_sz;
msg->block = block;
}
/* Get the number of command units required for the req. The
* function returns correct value only if called before
* setting using bng_re_set_cmd_slots
*/
static inline u32 bng_re_get_cmd_slots(struct cmdq_base *req)
{
u32 cmd_units = 0;
if (HAS_TLV_HEADER(req)) {
struct roce_tlv *tlv_req = (struct roce_tlv *)req;
cmd_units = tlv_req->total_size;
} else {
cmd_units = (req->cmd_size + BNG_FW_CMDQE_UNITS - 1) /
BNG_FW_CMDQE_UNITS;
}
return cmd_units;
}
static inline u32 bng_re_set_cmd_slots(struct cmdq_base *req)
{
u32 cmd_byte = 0;
if (HAS_TLV_HEADER(req)) {
struct roce_tlv *tlv_req = (struct roce_tlv *)req;
cmd_byte = tlv_req->total_size * BNG_FW_CMDQE_UNITS;
} else {
cmd_byte = req->cmd_size;
req->cmd_size = (req->cmd_size + BNG_FW_CMDQE_UNITS - 1) /
BNG_FW_CMDQE_UNITS;
}
return cmd_byte;
}
void bng_re_free_rcfw_channel(struct bng_re_rcfw *rcfw);
int bng_re_alloc_fw_channel(struct bng_re_res *res,
struct bng_re_rcfw *rcfw);
int bng_re_enable_fw_channel(struct bng_re_rcfw *rcfw,
int msix_vector,
int cp_bar_reg_off);
void bng_re_disable_rcfw_channel(struct bng_re_rcfw *rcfw);
int bng_re_rcfw_start_irq(struct bng_re_rcfw *rcfw, int msix_vector,
bool need_init);
void bng_re_rcfw_stop_irq(struct bng_re_rcfw *rcfw, bool kill);
int bng_re_rcfw_send_message(struct bng_re_rcfw *rcfw,
struct bng_re_cmdqmsg *msg);
int bng_re_init_rcfw(struct bng_re_rcfw *rcfw,
struct bng_re_stats *stats_ctx);
int bng_re_deinit_rcfw(struct bng_re_rcfw *rcfw);
#endif

View File

@ -0,0 +1,85 @@
/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (c) 2025 Broadcom.
#ifndef __BNG_RE_H__
#define __BNG_RE_H__
#include "bng_res.h"
#define BNG_RE_ADEV_NAME "bng_en"
#define BNG_RE_DESC "Broadcom 800G RoCE Driver"
#define rdev_to_dev(rdev) ((rdev) ? (&(rdev)->ibdev.dev) : NULL)
#define BNG_RE_MIN_MSIX 2
#define BNG_RE_MAX_MSIX BNGE_MAX_ROCE_MSIX
#define BNG_RE_CREQ_NQ_IDX 0
#define BNGE_INVALID_STATS_CTX_ID -1
/* NQ specific structures */
struct bng_re_nq_db {
struct bng_re_reg_desc reg;
struct bng_re_db_info dbinfo;
};
struct bng_re_nq {
struct pci_dev *pdev;
struct bng_re_res *res;
char *name;
struct bng_re_hwq hwq;
struct bng_re_nq_db nq_db;
u16 ring_id;
int msix_vec;
cpumask_t mask;
struct tasklet_struct nq_tasklet;
bool requested;
int budget;
u32 load;
struct workqueue_struct *cqn_wq;
};
struct bng_re_nq_record {
struct bnge_msix_info msix_entries[BNG_RE_MAX_MSIX];
struct bng_re_nq nq[BNG_RE_MAX_MSIX];
int num_msix;
/* serialize NQ access */
struct mutex load_lock;
};
struct bng_re_en_dev_info {
struct bng_re_dev *rdev;
struct bnge_auxr_dev *auxr_dev;
};
struct bng_re_ring_attr {
dma_addr_t *dma_arr;
int pages;
int type;
u32 depth;
u32 lrid; /* Logical ring id */
u8 mode;
};
struct bng_re_dev {
struct ib_device ibdev;
unsigned long flags;
#define BNG_RE_FLAG_NETDEV_REGISTERED 0
#define BNG_RE_FLAG_RCFW_CHANNEL_EN 1
struct net_device *netdev;
struct auxiliary_device *adev;
struct bnge_auxr_dev *aux_dev;
struct bng_re_chip_ctx *chip_ctx;
int fn_id;
struct bng_re_res bng_res;
struct bng_re_rcfw rcfw;
struct bng_re_nq_record *nqr;
/* Device Resources */
struct bng_re_dev_attr *dev_attr;
struct dentry *dbg_root;
struct bng_re_stats stats_ctx;
};
#endif

View File

@ -0,0 +1,279 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2025 Broadcom.
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <rdma/ib_umem.h>
#include <linux/bnxt/hsi.h>
#include "bng_res.h"
#include "roce_hsi.h"
/* Stats */
void bng_re_free_stats_ctx_mem(struct pci_dev *pdev,
struct bng_re_stats *stats)
{
if (stats->dma) {
dma_free_coherent(&pdev->dev, stats->size,
stats->dma, stats->dma_map);
}
memset(stats, 0, sizeof(*stats));
stats->fw_id = -1;
}
int bng_re_alloc_stats_ctx_mem(struct pci_dev *pdev,
struct bng_re_chip_ctx *cctx,
struct bng_re_stats *stats)
{
memset(stats, 0, sizeof(*stats));
stats->fw_id = -1;
stats->size = cctx->hw_stats_size;
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL);
if (!stats->dma)
return -ENOMEM;
return 0;
}
static void bng_free_pbl(struct bng_re_res *res, struct bng_re_pbl *pbl)
{
struct pci_dev *pdev = res->pdev;
int i;
for (i = 0; i < pbl->pg_count; i++) {
if (pbl->pg_arr[i])
dma_free_coherent(&pdev->dev, pbl->pg_size,
(void *)((unsigned long)
pbl->pg_arr[i] &
PAGE_MASK),
pbl->pg_map_arr[i]);
else
dev_warn(&pdev->dev,
"PBL free pg_arr[%d] empty?!\n", i);
pbl->pg_arr[i] = NULL;
}
vfree(pbl->pg_arr);
pbl->pg_arr = NULL;
vfree(pbl->pg_map_arr);
pbl->pg_map_arr = NULL;
pbl->pg_count = 0;
pbl->pg_size = 0;
}
static int bng_alloc_pbl(struct bng_re_res *res,
struct bng_re_pbl *pbl,
struct bng_re_sg_info *sginfo)
{
struct pci_dev *pdev = res->pdev;
u32 pages;
int i;
if (sginfo->nopte)
return 0;
pages = sginfo->npages;
/* page ptr arrays */
pbl->pg_arr = vmalloc_array(pages, sizeof(void *));
if (!pbl->pg_arr)
return -ENOMEM;
pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t));
if (!pbl->pg_map_arr) {
vfree(pbl->pg_arr);
pbl->pg_arr = NULL;
return -ENOMEM;
}
pbl->pg_count = 0;
pbl->pg_size = sginfo->pgsize;
for (i = 0; i < pages; i++) {
pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
pbl->pg_size,
&pbl->pg_map_arr[i],
GFP_KERNEL);
if (!pbl->pg_arr[i])
goto fail;
pbl->pg_count++;
}
return 0;
fail:
bng_free_pbl(res, pbl);
return -ENOMEM;
}
void bng_re_free_hwq(struct bng_re_res *res,
struct bng_re_hwq *hwq)
{
int i;
if (!hwq->max_elements)
return;
if (hwq->level >= BNG_PBL_LVL_MAX)
return;
for (i = 0; i < hwq->level + 1; i++)
bng_free_pbl(res, &hwq->pbl[i]);
hwq->level = BNG_PBL_LVL_MAX;
hwq->max_elements = 0;
hwq->element_size = 0;
hwq->prod = 0;
hwq->cons = 0;
}
/* All HWQs are power of 2 in size */
int bng_re_alloc_init_hwq(struct bng_re_hwq *hwq,
struct bng_re_hwq_attr *hwq_attr)
{
u32 npages, pg_size;
struct bng_re_sg_info sginfo = {};
u32 depth, stride, npbl, npde;
dma_addr_t *src_phys_ptr, **dst_virt_ptr;
struct bng_re_res *res;
struct pci_dev *pdev;
int i, rc, lvl;
res = hwq_attr->res;
pdev = res->pdev;
pg_size = hwq_attr->sginfo->pgsize;
hwq->level = BNG_PBL_LVL_MAX;
depth = roundup_pow_of_two(hwq_attr->depth);
stride = roundup_pow_of_two(hwq_attr->stride);
npages = (depth * stride) / pg_size;
if ((depth * stride) % pg_size)
npages++;
if (!npages)
return -EINVAL;
hwq_attr->sginfo->npages = npages;
if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
/* This request is Level 0, map PTE */
rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], hwq_attr->sginfo);
if (rc)
goto fail;
hwq->level = BNG_PBL_LVL_0;
goto done;
}
if (npages >= MAX_PBL_LVL_0_PGS) {
if (npages > MAX_PBL_LVL_1_PGS) {
u32 flag = PTU_PTE_VALID;
/* 2 levels of indirection */
npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
npbl++;
npde = npbl >> MAX_PDL_LVL_SHIFT;
if (npbl % BIT(MAX_PDL_LVL_SHIFT))
npde++;
/* Alloc PDE pages */
sginfo.pgsize = npde * pg_size;
sginfo.npages = 1;
rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], &sginfo);
if (rc)
goto fail;
/* Alloc PBL pages */
sginfo.npages = npbl;
sginfo.pgsize = PAGE_SIZE;
rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_1], &sginfo);
if (rc)
goto fail;
/* Fill PDL with PBL page pointers */
dst_virt_ptr =
(dma_addr_t **)hwq->pbl[BNG_PBL_LVL_0].pg_arr;
src_phys_ptr = hwq->pbl[BNG_PBL_LVL_1].pg_map_arr;
for (i = 0; i < hwq->pbl[BNG_PBL_LVL_1].pg_count; i++)
dst_virt_ptr[0][i] = src_phys_ptr[i] | flag;
/* Alloc or init PTEs */
rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_2],
hwq_attr->sginfo);
if (rc)
goto fail;
hwq->level = BNG_PBL_LVL_2;
if (hwq_attr->sginfo->nopte)
goto done;
/* Fill PBLs with PTE pointers */
dst_virt_ptr =
(dma_addr_t **)hwq->pbl[BNG_PBL_LVL_1].pg_arr;
src_phys_ptr = hwq->pbl[BNG_PBL_LVL_2].pg_map_arr;
for (i = 0; i < hwq->pbl[BNG_PBL_LVL_2].pg_count; i++) {
dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
src_phys_ptr[i] | PTU_PTE_VALID;
}
if (hwq_attr->type == BNG_HWQ_TYPE_QUEUE) {
/* Find the last pg of the size */
i = hwq->pbl[BNG_PBL_LVL_2].pg_count;
dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
PTU_PTE_LAST;
if (i > 1)
dst_virt_ptr[PTR_PG(i - 2)]
[PTR_IDX(i - 2)] |=
PTU_PTE_NEXT_TO_LAST;
}
} else { /* pages < 512 npbl = 1, npde = 0 */
u32 flag = PTU_PTE_VALID;
/* 1 level of indirection */
npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
npbl++;
sginfo.npages = npbl;
sginfo.pgsize = PAGE_SIZE;
/* Alloc PBL page */
rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], &sginfo);
if (rc)
goto fail;
/* Alloc or init PTEs */
rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_1],
hwq_attr->sginfo);
if (rc)
goto fail;
hwq->level = BNG_PBL_LVL_1;
if (hwq_attr->sginfo->nopte)
goto done;
/* Fill PBL with PTE pointers */
dst_virt_ptr =
(dma_addr_t **)hwq->pbl[BNG_PBL_LVL_0].pg_arr;
src_phys_ptr = hwq->pbl[BNG_PBL_LVL_1].pg_map_arr;
for (i = 0; i < hwq->pbl[BNG_PBL_LVL_1].pg_count; i++)
dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
src_phys_ptr[i] | flag;
if (hwq_attr->type == BNG_HWQ_TYPE_QUEUE) {
/* Find the last pg of the size */
i = hwq->pbl[BNG_PBL_LVL_1].pg_count;
dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
PTU_PTE_LAST;
if (i > 1)
dst_virt_ptr[PTR_PG(i - 2)]
[PTR_IDX(i - 2)] |=
PTU_PTE_NEXT_TO_LAST;
}
}
}
done:
hwq->prod = 0;
hwq->cons = 0;
hwq->pdev = pdev;
hwq->depth = hwq_attr->depth;
hwq->max_elements = hwq->depth;
hwq->element_size = stride;
hwq->qe_ppg = pg_size / stride;
/* For direct access to the elements */
lvl = hwq->level;
if (hwq_attr->sginfo->nopte && hwq->level)
lvl = hwq->level - 1;
hwq->pbl_ptr = hwq->pbl[lvl].pg_arr;
hwq->pbl_dma_ptr = hwq->pbl[lvl].pg_map_arr;
spin_lock_init(&hwq->lock);
return 0;
fail:
bng_re_free_hwq(res, hwq);
return -ENOMEM;
}

View File

@ -0,0 +1,215 @@
/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (c) 2025 Broadcom.
#ifndef __BNG_RES_H__
#define __BNG_RES_H__
#include "roce_hsi.h"
#define BNG_ROCE_FW_MAX_TIMEOUT 60
#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
#define PTR_IDX(x) ((x) & PTR_MAX_IDX_PER_PG)
#define HWQ_CMP(idx, hwq) ((idx) & ((hwq)->max_elements - 1))
#define HWQ_FREE_SLOTS(hwq) (hwq->max_elements - \
((HWQ_CMP(hwq->prod, hwq)\
- HWQ_CMP(hwq->cons, hwq))\
& (hwq->max_elements - 1)))
#define MAX_PBL_LVL_0_PGS 1
#define MAX_PBL_LVL_1_PGS 512
#define MAX_PBL_LVL_1_PGS_SHIFT 9
#define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256
#define MAX_PBL_LVL_2_PGS (256 * 512)
#define MAX_PDL_LVL_SHIFT 9
#define BNG_RE_DBR_VALID (0x1UL << 26)
#define BNG_RE_DBR_EPOCH_SHIFT 24
#define BNG_RE_DBR_TOGGLE_SHIFT 25
#define BNG_MAX_TQM_ALLOC_REQ 48
struct bng_re_reg_desc {
u8 bar_id;
resource_size_t bar_base;
unsigned long offset;
void __iomem *bar_reg;
size_t len;
};
struct bng_re_db_info {
void __iomem *db;
void __iomem *priv_db;
struct bng_re_hwq *hwq;
u32 xid;
u32 max_slot;
u32 flags;
u8 toggle;
};
enum bng_re_db_info_flags_mask {
BNG_RE_FLAG_EPOCH_CONS_SHIFT = 0x0UL,
BNG_RE_FLAG_EPOCH_PROD_SHIFT = 0x1UL,
BNG_RE_FLAG_EPOCH_CONS_MASK = 0x1UL,
BNG_RE_FLAG_EPOCH_PROD_MASK = 0x2UL,
};
enum bng_re_db_epoch_flag_shift {
BNG_RE_DB_EPOCH_CONS_SHIFT = BNG_RE_DBR_EPOCH_SHIFT,
BNG_RE_DB_EPOCH_PROD_SHIFT = (BNG_RE_DBR_EPOCH_SHIFT - 1),
};
struct bng_re_chip_ctx {
u16 chip_num;
u16 hw_stats_size;
u64 hwrm_intf_ver;
u16 hwrm_cmd_max_timeout;
};
struct bng_re_pbl {
u32 pg_count;
u32 pg_size;
void **pg_arr;
dma_addr_t *pg_map_arr;
};
enum bng_re_pbl_lvl {
BNG_PBL_LVL_0,
BNG_PBL_LVL_1,
BNG_PBL_LVL_2,
BNG_PBL_LVL_MAX
};
enum bng_re_hwq_type {
BNG_HWQ_TYPE_CTX,
BNG_HWQ_TYPE_QUEUE
};
struct bng_re_sg_info {
u32 npages;
u32 pgshft;
u32 pgsize;
bool nopte;
};
struct bng_re_hwq_attr {
struct bng_re_res *res;
struct bng_re_sg_info *sginfo;
enum bng_re_hwq_type type;
u32 depth;
u32 stride;
u32 aux_stride;
u32 aux_depth;
};
struct bng_re_hwq {
struct pci_dev *pdev;
/* lock to protect hwq */
spinlock_t lock;
struct bng_re_pbl pbl[BNG_PBL_LVL_MAX + 1];
/* Valid values: 0, 1, 2 */
enum bng_re_pbl_lvl level;
/* PBL entries */
void **pbl_ptr;
/* PBL dma_addr */
dma_addr_t *pbl_dma_ptr;
u32 max_elements;
u32 depth;
u16 element_size;
u32 prod;
u32 cons;
/* queue entry per page */
u16 qe_ppg;
};
struct bng_re_stats {
dma_addr_t dma_map;
void *dma;
u32 size;
u32 fw_id;
};
struct bng_re_res {
struct pci_dev *pdev;
struct bng_re_chip_ctx *cctx;
struct bng_re_dev_attr *dattr;
};
static inline void *bng_re_get_qe(struct bng_re_hwq *hwq,
u32 indx, u64 *pg)
{
u32 pg_num, pg_idx;
pg_num = (indx / hwq->qe_ppg);
pg_idx = (indx % hwq->qe_ppg);
if (pg)
*pg = (u64)&hwq->pbl_ptr[pg_num];
return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx);
}
#define BNG_RE_INIT_DBHDR(xid, type, indx, toggle) \
(((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \
(type) | BNG_RE_DBR_VALID) << 32) | (indx) | \
(((u32)(toggle)) << (BNG_RE_DBR_TOGGLE_SHIFT)))
static inline void bng_re_ring_db(struct bng_re_db_info *info,
u32 type)
{
u64 key = 0;
u32 indx;
u8 toggle = 0;
if (type == DBC_DBC_TYPE_CQ_ARMALL ||
type == DBC_DBC_TYPE_CQ_ARMSE)
toggle = info->toggle;
indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) |
((info->flags & BNG_RE_FLAG_EPOCH_CONS_MASK) <<
BNG_RE_DB_EPOCH_CONS_SHIFT);
key = BNG_RE_INIT_DBHDR(info->xid, type, indx, toggle);
writeq(key, info->db);
}
static inline void bng_re_ring_nq_db(struct bng_re_db_info *info,
struct bng_re_chip_ctx *cctx,
bool arm)
{
u32 type;
type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
bng_re_ring_db(info, type);
}
static inline void bng_re_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt,
u32 *dbinfo_flags)
{
/* move cons and update toggle/epoch if wrap around */
*cons += cnt;
if (*cons >= max_elements) {
*cons %= max_elements;
*dbinfo_flags ^= 1UL << BNG_RE_FLAG_EPOCH_CONS_SHIFT;
}
}
static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
{
return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
}
void bng_re_free_hwq(struct bng_re_res *res,
struct bng_re_hwq *hwq);
int bng_re_alloc_init_hwq(struct bng_re_hwq *hwq,
struct bng_re_hwq_attr *hwq_attr);
void bng_re_free_stats_ctx_mem(struct pci_dev *pdev,
struct bng_re_stats *stats);
int bng_re_alloc_stats_ctx_mem(struct pci_dev *pdev,
struct bng_re_chip_ctx *cctx,
struct bng_re_stats *stats);
#endif

View File

@ -0,0 +1,131 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2025 Broadcom.
#include <linux/interrupt.h>
#include <linux/pci.h>
#include "bng_res.h"
#include "bng_fw.h"
#include "bng_sp.h"
#include "bng_tlv.h"
static bool bng_re_is_atomic_cap(struct bng_re_rcfw *rcfw)
{
u16 pcie_ctl2 = 0;
pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2);
return (pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
}
static void bng_re_query_version(struct bng_re_rcfw *rcfw,
char *fw_ver)
{
struct creq_query_version_resp resp = {};
struct bng_re_cmdqmsg msg = {};
struct cmdq_query_version req = {};
int rc;
bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_QUERY_VERSION,
sizeof(req));
bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
rc = bng_re_rcfw_send_message(rcfw, &msg);
if (rc)
return;
fw_ver[0] = resp.fw_maj;
fw_ver[1] = resp.fw_minor;
fw_ver[2] = resp.fw_bld;
fw_ver[3] = resp.fw_rsvd;
}
int bng_re_get_dev_attr(struct bng_re_rcfw *rcfw)
{
struct bng_re_dev_attr *attr = rcfw->res->dattr;
struct creq_query_func_resp resp = {};
struct bng_re_cmdqmsg msg = {};
struct creq_query_func_resp_sb *sb;
struct bng_re_rcfw_sbuf sbuf;
struct cmdq_query_func req = {};
u8 *tqm_alloc;
int i, rc;
u32 temp;
bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_QUERY_FUNC,
sizeof(req));
sbuf.size = ALIGN(sizeof(*sb), BNG_FW_CMDQE_UNITS);
sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
&sbuf.dma_addr, GFP_KERNEL);
if (!sbuf.sb)
return -ENOMEM;
sb = sbuf.sb;
req.resp_size = sbuf.size / BNG_FW_CMDQE_UNITS;
bng_re_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
sizeof(resp), 0);
rc = bng_re_rcfw_send_message(rcfw, &msg);
if (rc)
goto bail;
/* Extract the context from the side buffer */
attr->max_qp = le32_to_cpu(sb->max_qp);
/* max_qp value reported by FW doesn't include the QP1 */
attr->max_qp += 1;
attr->max_qp_rd_atom =
sb->max_qp_rd_atom > BNG_RE_MAX_OUT_RD_ATOM ?
BNG_RE_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
attr->max_qp_init_rd_atom =
sb->max_qp_init_rd_atom > BNG_RE_MAX_OUT_RD_ATOM ?
BNG_RE_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1;
/* Adjust for max_qp_wqes for variable wqe */
attr->max_qp_wqes = min_t(u32, attr->max_qp_wqes, BNG_VAR_MAX_WQE - 1);
attr->max_qp_sges = min_t(u32, sb->max_sge_var_wqe, BNG_VAR_MAX_SGE);
attr->max_cq = le32_to_cpu(sb->max_cq);
attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
attr->max_cq_sges = attr->max_qp_sges;
attr->max_mr = le32_to_cpu(sb->max_mr);
attr->max_mw = le32_to_cpu(sb->max_mw);
attr->max_mr_size = le64_to_cpu(sb->max_mr_size);
attr->max_pd = 64 * 1024;
attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp);
attr->max_ah = le32_to_cpu(sb->max_ah);
attr->max_srq = le16_to_cpu(sb->max_srq);
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
attr->max_srq_sges = sb->max_srq_sge;
attr->max_pkey = 1;
attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
/*
* Read the max gid supported by HW.
* For each entry in HW GID in HW table, we consume 2
* GID entries in the kernel GID table. So max_gid reported
* to stack can be up to twice the value reported by the HW, up to 256 gids.
*/
attr->max_sgid = le32_to_cpu(sb->max_gid);
attr->max_sgid = min_t(u32, BNG_RE_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid);
attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
attr->max_srq += le16_to_cpu(sb->max_srq_ext);
bng_re_query_version(rcfw, attr->fw_ver);
for (i = 0; i < BNG_MAX_TQM_ALLOC_REQ / 4; i++) {
temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
tqm_alloc = (u8 *)&temp;
attr->tqm_alloc_reqs[i * 4] = *tqm_alloc;
attr->tqm_alloc_reqs[i * 4 + 1] = *(++tqm_alloc);
attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
}
attr->max_dpi = le32_to_cpu(sb->max_dpi);
attr->is_atomic = bng_re_is_atomic_cap(rcfw);
bail:
dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
sbuf.sb, sbuf.dma_addr);
return rc;
}

View File

@ -0,0 +1,47 @@
/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (c) 2025 Broadcom.
#ifndef __BNG_SP_H__
#define __BNG_SP_H__
#include "bng_fw.h"
#define BNG_VAR_MAX_WQE 4352
#define BNG_VAR_MAX_SGE 13
struct bng_re_dev_attr {
#define FW_VER_ARR_LEN 4
u8 fw_ver[FW_VER_ARR_LEN];
#define BNG_RE_NUM_GIDS_SUPPORTED 256
u16 max_sgid;
u16 max_mrw;
u32 max_qp;
#define BNG_RE_MAX_OUT_RD_ATOM 126
u32 max_qp_rd_atom;
u32 max_qp_init_rd_atom;
u32 max_qp_wqes;
u32 max_qp_sges;
u32 max_cq;
u32 max_cq_wqes;
u32 max_cq_sges;
u32 max_mr;
u64 max_mr_size;
u32 max_pd;
u32 max_mw;
u32 max_raw_ethy_qp;
u32 max_ah;
u32 max_srq;
u32 max_srq_wqes;
u32 max_srq_sges;
u32 max_pkey;
u32 max_inline_data;
u32 l2_db_size;
u8 tqm_alloc_reqs[BNG_MAX_TQM_ALLOC_REQ];
bool is_atomic;
u16 dev_cap_flags;
u16 dev_cap_flags2;
u32 max_dpi;
};
int bng_re_get_dev_attr(struct bng_re_rcfw *rcfw);
#endif

View File

@ -0,0 +1,128 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
#ifndef __BNG_TLV_H__
#define __BNG_TLV_H__
#include "roce_hsi.h"
struct roce_tlv {
struct tlv tlv;
u8 total_size; // in units of 16 byte chunks
u8 unused[7]; // for 16 byte alignment
};
/*
* TLV size in units of 16 byte chunks
*/
#define TLV_SIZE ((sizeof(struct roce_tlv) + 15) / 16)
/*
* TLV length in bytes
*/
#define TLV_BYTES (TLV_SIZE * 16)
#define HAS_TLV_HEADER(msg) (le16_to_cpu(((struct tlv *)(msg))->cmd_discr) == CMD_DISCR_TLV_ENCAP)
#define GET_TLV_DATA(tlv) ((void *)&((uint8_t *)(tlv))[TLV_BYTES])
static inline u8 __get_cmdq_base_opcode(struct cmdq_base *req, u32 size)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
return ((struct cmdq_base *)GET_TLV_DATA(req))->opcode;
else
return req->opcode;
}
static inline void __set_cmdq_base_opcode(struct cmdq_base *req,
u32 size, u8 val)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
((struct cmdq_base *)GET_TLV_DATA(req))->opcode = val;
else
req->opcode = val;
}
static inline __le16 __get_cmdq_base_cookie(struct cmdq_base *req, u32 size)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
return ((struct cmdq_base *)GET_TLV_DATA(req))->cookie;
else
return req->cookie;
}
static inline void __set_cmdq_base_cookie(struct cmdq_base *req,
u32 size, __le16 val)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
((struct cmdq_base *)GET_TLV_DATA(req))->cookie = val;
else
req->cookie = val;
}
static inline __le64 __get_cmdq_base_resp_addr(struct cmdq_base *req, u32 size)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr;
else
return req->resp_addr;
}
static inline void __set_cmdq_base_resp_addr(struct cmdq_base *req,
u32 size, __le64 val)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr = val;
else
req->resp_addr = val;
}
static inline u8 __get_cmdq_base_resp_size(struct cmdq_base *req, u32 size)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size;
else
return req->resp_size;
}
static inline void __set_cmdq_base_resp_size(struct cmdq_base *req,
u32 size, u8 val)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
((struct cmdq_base *)GET_TLV_DATA(req))->resp_size = val;
else
req->resp_size = val;
}
static inline u8 __get_cmdq_base_cmd_size(struct cmdq_base *req, u32 size)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
return ((struct roce_tlv *)(req))->total_size;
else
return req->cmd_size;
}
static inline void __set_cmdq_base_cmd_size(struct cmdq_base *req,
u32 size, u8 val)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
((struct cmdq_base *)GET_TLV_DATA(req))->cmd_size = val;
else
req->cmd_size = val;
}
static inline __le16 __get_cmdq_base_flags(struct cmdq_base *req, u32 size)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
return ((struct cmdq_base *)GET_TLV_DATA(req))->flags;
else
return req->flags;
}
static inline void __set_cmdq_base_flags(struct cmdq_base *req,
u32 size, __le16 val)
{
if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
((struct cmdq_base *)GET_TLV_DATA(req))->flags = val;
else
req->flags = val;
}
#endif /* __BNG_TLV_H__ */

View File

@ -224,6 +224,8 @@ struct bnxt_re_dev {
struct workqueue_struct *dcb_wq;
struct dentry *cc_config;
struct bnxt_re_dbg_cc_config_params *cc_config_params;
struct dentry *cq_coal_cfg;
struct bnxt_re_dbg_cq_coal_params *cq_coal_cfg_params;
#define BNXT_VPD_FLD_LEN 32
char board_partno[BNXT_VPD_FLD_LEN];
/* RoCE mirror */

View File

@ -23,6 +23,14 @@
static struct dentry *bnxt_re_debugfs_root;
static const char * const bnxt_re_cq_coal_str[] = {
"buf_maxtime",
"normal_maxbuf",
"during_maxbuf",
"en_ring_idle_mode",
"enable",
};
static const char * const bnxt_re_cc_gen0_name[] = {
"enable_cc",
"run_avg_weight_g",
@ -349,6 +357,123 @@ static void bnxt_re_debugfs_add_info(struct bnxt_re_dev *rdev)
debugfs_create_file("info", 0400, rdev->dbg_root, rdev, &info_fops);
}
static ssize_t cq_coal_cfg_write(struct file *file,
const char __user *buf,
size_t count, loff_t *pos)
{
struct seq_file *s = file->private_data;
struct bnxt_re_cq_coal_param *param = s->private;
struct bnxt_re_dev *rdev = param->rdev;
int offset = param->offset;
char lbuf[16] = { };
u32 val;
if (count > sizeof(lbuf))
return -EINVAL;
if (copy_from_user(lbuf, buf, count))
return -EFAULT;
lbuf[sizeof(lbuf) - 1] = '\0';
if (kstrtou32(lbuf, 0, &val))
return -EINVAL;
switch (offset) {
case BNXT_RE_COAL_CQ_BUF_MAXTIME:
if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME)
return -EINVAL;
rdev->cq_coalescing.buf_maxtime = val;
break;
case BNXT_RE_COAL_CQ_NORMAL_MAXBUF:
if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF)
return -EINVAL;
rdev->cq_coalescing.normal_maxbuf = val;
break;
case BNXT_RE_COAL_CQ_DURING_MAXBUF:
if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF)
return -EINVAL;
rdev->cq_coalescing.during_maxbuf = val;
break;
case BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE:
if (val > BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE)
return -EINVAL;
rdev->cq_coalescing.en_ring_idle_mode = val;
break;
case BNXT_RE_COAL_CQ_ENABLE:
if (val > 1)
return -EINVAL;
rdev->cq_coalescing.enable = val;
break;
default:
return -EINVAL;
}
return count;
}
static int cq_coal_cfg_show(struct seq_file *s, void *unused)
{
struct bnxt_re_cq_coal_param *param = s->private;
struct bnxt_re_dev *rdev = param->rdev;
int offset = param->offset;
u32 val = 0;
switch (offset) {
case BNXT_RE_COAL_CQ_BUF_MAXTIME:
val = rdev->cq_coalescing.buf_maxtime;
break;
case BNXT_RE_COAL_CQ_NORMAL_MAXBUF:
val = rdev->cq_coalescing.normal_maxbuf;
break;
case BNXT_RE_COAL_CQ_DURING_MAXBUF:
val = rdev->cq_coalescing.during_maxbuf;
break;
case BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE:
val = rdev->cq_coalescing.en_ring_idle_mode;
break;
case BNXT_RE_COAL_CQ_ENABLE:
val = rdev->cq_coalescing.enable;
break;
default:
return -EINVAL;
}
seq_printf(s, "%u\n", val);
return 0;
}
DEFINE_SHOW_STORE_ATTRIBUTE(cq_coal_cfg);
static void bnxt_re_cleanup_cq_coal_debugfs(struct bnxt_re_dev *rdev)
{
debugfs_remove_recursive(rdev->cq_coal_cfg);
kfree(rdev->cq_coal_cfg_params);
}
static void bnxt_re_init_cq_coal_debugfs(struct bnxt_re_dev *rdev)
{
struct bnxt_re_dbg_cq_coal_params *dbg_cq_coal_params;
int i;
if (!_is_cq_coalescing_supported(rdev->dev_attr->dev_cap_flags2))
return;
dbg_cq_coal_params = kzalloc(sizeof(*dbg_cq_coal_params), GFP_KERNEL);
if (!dbg_cq_coal_params)
return;
rdev->cq_coal_cfg = debugfs_create_dir("cq_coal_cfg", rdev->dbg_root);
rdev->cq_coal_cfg_params = dbg_cq_coal_params;
for (i = 0; i < BNXT_RE_COAL_CQ_MAX; i++) {
dbg_cq_coal_params->params[i].offset = i;
dbg_cq_coal_params->params[i].rdev = rdev;
debugfs_create_file(bnxt_re_cq_coal_str[i],
0600, rdev->cq_coal_cfg,
&dbg_cq_coal_params->params[i],
&cq_coal_cfg_fops);
}
}
void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
{
struct pci_dev *pdev = rdev->en_dev->pdev;
@ -374,10 +499,13 @@ void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
rdev->cc_config, tmp_params,
&bnxt_re_cc_config_ops);
}
bnxt_re_init_cq_coal_debugfs(rdev);
}
void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
{
bnxt_re_cleanup_cq_coal_debugfs(rdev);
debugfs_remove_recursive(rdev->qp_debugfs);
debugfs_remove_recursive(rdev->cc_config);
kfree(rdev->cc_config_params);

View File

@ -33,4 +33,23 @@ struct bnxt_re_cc_param {
struct bnxt_re_dbg_cc_config_params {
struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0];
};
struct bnxt_re_cq_coal_param {
struct bnxt_re_dev *rdev;
u32 offset;
};
enum bnxt_re_cq_coal_types {
BNXT_RE_COAL_CQ_BUF_MAXTIME,
BNXT_RE_COAL_CQ_NORMAL_MAXBUF,
BNXT_RE_COAL_CQ_DURING_MAXBUF,
BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE,
BNXT_RE_COAL_CQ_ENABLE,
BNXT_RE_COAL_CQ_MAX
};
struct bnxt_re_dbg_cq_coal_params {
struct bnxt_re_cq_coal_param params[BNXT_RE_COAL_CQ_MAX];
};
#endif

View File

@ -601,7 +601,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
mr->qplib_mr.va = (u64)(unsigned long)fence->va;
mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL,
BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE);
BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE,
_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags));
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n");
goto fail;
@ -4027,7 +4028,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
mr->qplib_mr.hwq.level = PBL_LVL_MAX;
mr->qplib_mr.total_size = -1; /* Infinte length */
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0,
PAGE_SIZE);
PAGE_SIZE, false);
if (rc)
goto fail_mr;
@ -4257,7 +4258,8 @@ static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64
umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem,
umem_pgs, page_size);
umem_pgs, page_size,
_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags));
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to register user MR - rc = %d\n", rc);
rc = -EIO;

View File

@ -1453,6 +1453,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
atomic_set(&rdev->stats.res.pd_count, 0);
rdev->cosq[0] = 0xFFFF;
rdev->cosq[1] = 0xFFFF;
rdev->cq_coalescing.enable = 1;
rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME;
if (bnxt_re_chip_gen_p7(en_dev->chip_num)) {
rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7;

View File

@ -2226,7 +2226,8 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
req.cq_handle = cpu_to_le64(cq->cq_handle);
req.cq_size = cpu_to_le32(cq->max_wqe);
if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2)) {
if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2) &&
cq->coalescing->enable) {
req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID);
coalescing |= ((cq->coalescing->buf_maxtime <<
CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) &

View File

@ -395,6 +395,7 @@ struct bnxt_qplib_cq_coal_param {
u8 normal_maxbuf;
u8 during_maxbuf;
u8 en_ring_idle_mode;
u8 enable;
};
#define BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME 0x1

View File

@ -162,7 +162,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
attr->max_srq_sges = sb->max_srq_sge;
attr->max_pkey = 1;
attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
attr->max_inline_data = attr->max_qp_sges * sizeof(struct sq_sge);
if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx))
attr->l2_db_size = (sb->l2_db_space_size + 1) *
(0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
@ -578,7 +578,7 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
}
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
struct ib_umem *umem, int num_pbls, u32 buf_pg_size)
struct ib_umem *umem, int num_pbls, u32 buf_pg_size, bool unified_mr)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct bnxt_qplib_hwq_attr hwq_attr = {};
@ -640,7 +640,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
req.access = (mr->access_flags & BNXT_QPLIB_MR_ACCESS_MASK);
req.va = cpu_to_le64(mr->va);
req.key = cpu_to_le32(mr->lkey);
if (_is_alloc_mr_unified(res->dattr->dev_cap_flags))
if (unified_mr)
req.key = cpu_to_le32(mr->pd->id);
req.flags = cpu_to_le16(mr->flags);
req.mr_size = cpu_to_le64(mr->total_size);
@ -651,7 +651,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
if (rc)
goto fail;
if (_is_alloc_mr_unified(res->dattr->dev_cap_flags)) {
if (unified_mr) {
mr->lkey = le32_to_cpu(resp.xid);
mr->rkey = mr->lkey;
}

View File

@ -341,7 +341,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
bool block);
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
struct ib_umem *umem, int num_pbls, u32 buf_pg_size);
struct ib_umem *umem, int num_pbls, u32 buf_pg_size, bool unified_mr);
int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mr, int max);

View File

@ -348,7 +348,7 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
{
int err;
pr_debug("*pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
pr_debug("*pbl_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
pbl_addr, rdev->lldi.vr->pbl.start,
pbl_size);

View File

@ -745,8 +745,8 @@ static int create_workqueues(struct hfi1_devdata *dd)
ppd->hfi1_wq =
alloc_workqueue(
"hfi%d_%d",
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
WQ_MEM_RECLAIM,
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
WQ_PERCPU,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
dd->unit, pidx);
if (!ppd->hfi1_wq)

View File

@ -305,8 +305,8 @@ void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
int opfn_init(void)
{
opfn_wq = alloc_workqueue("hfi_opfn",
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
WQ_MEM_RECLAIM,
WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
WQ_PERCPU,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
if (!opfn_wq)
return -ENOMEM;

View File

@ -4,11 +4,13 @@
#
ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3pf
ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3_common
ccflags-y += -I $(src)
hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \
hns_roce_debugfs.o hns_roce_hw_v2.o
hns_roce_debugfs.o hns_roce_hw_v2.o hns_roce_bond.o
obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o

View File

@ -30,7 +30,6 @@
* SOFTWARE.
*/
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include "hns_roce_device.h"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,95 @@
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Copyright (c) 2025 Hisilicon Limited.
*/
#ifndef _HNS_ROCE_BOND_H
#define _HNS_ROCE_BOND_H
#include <linux/netdevice.h>
#include <net/bonding.h>
#define ROCE_BOND_FUNC_MAX 4
#define ROCE_BOND_NUM_MAX 2
#define BOND_ID(id) BIT(id)
#define BOND_ERR_LOG(fmt, ...) \
pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__)
enum {
BOND_MODE_1,
BOND_MODE_2_4,
};
enum hns_roce_bond_hashtype {
BOND_HASH_L2,
BOND_HASH_L34,
BOND_HASH_L23,
};
enum bond_support_type {
BOND_NOT_SUPPORT,
/*
* bond_grp already exists, but in the current
* conditions it's no longer supported
*/
BOND_EXISTING_NOT_SUPPORT,
BOND_SUPPORT,
};
enum hns_roce_bond_state {
HNS_ROCE_BOND_NOT_ATTACHED,
HNS_ROCE_BOND_NOT_BONDED,
HNS_ROCE_BOND_IS_BONDED,
HNS_ROCE_BOND_SLAVE_CHANGE_NUM,
HNS_ROCE_BOND_SLAVE_CHANGESTATE,
};
enum hns_roce_bond_cmd_type {
HNS_ROCE_SET_BOND,
HNS_ROCE_CHANGE_BOND,
HNS_ROCE_CLEAR_BOND,
};
struct hns_roce_func_info {
struct net_device *net_dev;
struct hnae3_handle *handle;
};
struct hns_roce_bond_group {
struct net_device *upper_dev;
struct hns_roce_dev *main_hr_dev;
u8 active_slave_num;
u32 slave_map;
u32 active_slave_map;
u8 bond_id;
u8 bus_num;
struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX];
bool bond_ready;
enum hns_roce_bond_state bond_state;
enum netdev_lag_tx_type tx_type;
enum netdev_lag_hash hash_type;
struct mutex bond_mutex;
struct notifier_block bond_nb;
struct delayed_work bond_work;
};
struct hns_roce_die_info {
u8 bond_id_mask;
struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX];
struct mutex die_mutex;
u8 suspend_cnt;
};
struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
u8 bus_num);
int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev);
void hns_roce_dealloc_bond_grp(void);
void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp);
bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev);
int hns_roce_bond_init(struct hns_roce_dev *hr_dev);
void hns_roce_bond_suspend(struct hnae3_handle *handle);
void hns_roce_bond_resume(struct hnae3_handle *handle);
#endif

View File

@ -33,6 +33,7 @@
#ifndef _HNS_ROCE_DEVICE_H
#define _HNS_ROCE_DEVICE_H
#include <linux/pci.h>
#include <rdma/ib_verbs.h>
#include <rdma/hns-abi.h>
#include "hns_roce_debugfs.h"
@ -153,6 +154,7 @@ enum {
HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
HNS_ROCE_CAP_FLAG_STASH = BIT(17),
HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19),
HNS_ROCE_CAP_FLAG_BOND = BIT(21),
HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22),
};
@ -177,6 +179,7 @@ enum hns_roce_instance_state {
HNS_ROCE_STATE_INIT,
HNS_ROCE_STATE_INITED,
HNS_ROCE_STATE_UNINIT,
HNS_ROCE_STATE_BOND_UNINIT,
};
enum {
@ -1167,6 +1170,17 @@ static inline u8 get_tclass(const struct ib_global_route *grh)
grh->traffic_class >> DSCP_SHIFT : grh->traffic_class;
}
static inline struct net_device *get_hr_netdev(struct hns_roce_dev *hr_dev,
u8 port)
{
return hr_dev->iboe.netdevs[port];
}
static inline u8 get_hr_bus_num(struct hns_roce_dev *hr_dev)
{
return hr_dev->pci_dev->bus->number;
}
void hns_roce_init_uar_table(struct hns_roce_dev *dev);
int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
@ -1293,7 +1307,7 @@ void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup);
int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq);
int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq);
int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);

View File

@ -43,11 +43,13 @@
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
#include "hclge_main.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
#include "hns_roce_hw_v2.h"
#include "hns_roce_bond.h"
#define CREATE_TRACE_POINTS
#include "hns_roce_trace.h"
@ -1434,6 +1436,79 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
return ret;
}
static enum hns_roce_opcode_type
get_bond_opcode(enum hns_roce_bond_cmd_type bond_type)
{
switch (bond_type) {
case HNS_ROCE_SET_BOND:
return HNS_ROCE_OPC_SET_BOND_INFO;
case HNS_ROCE_CHANGE_BOND:
return HNS_ROCE_OPC_CHANGE_ACTIVE_PORT;
case HNS_ROCE_CLEAR_BOND:
return HNS_ROCE_OPC_CLEAR_BOND_INFO;
default:
WARN(true, "Invalid bond type %d!\n", bond_type);
return HNS_ROCE_OPC_SET_BOND_INFO;
}
}
static enum hns_roce_bond_hashtype
get_bond_hashtype(enum netdev_lag_hash netdev_hashtype)
{
switch (netdev_hashtype) {
case NETDEV_LAG_HASH_L2:
return BOND_HASH_L2;
case NETDEV_LAG_HASH_L34:
return BOND_HASH_L34;
case NETDEV_LAG_HASH_L23:
return BOND_HASH_L23;
default:
WARN(true, "Invalid hash type %d!\n", netdev_hashtype);
return BOND_HASH_L2;
}
}
int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp,
enum hns_roce_bond_cmd_type bond_type)
{
enum hns_roce_opcode_type opcode = get_bond_opcode(bond_type);
struct hns_roce_bond_info *slave_info;
struct hns_roce_cmq_desc desc = {};
int ret;
slave_info = (struct hns_roce_bond_info *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, opcode, false);
slave_info->bond_id = cpu_to_le32(bond_grp->bond_id);
if (bond_type == HNS_ROCE_CLEAR_BOND)
goto out;
if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
slave_info->bond_mode = cpu_to_le32(BOND_MODE_1);
if (bond_grp->active_slave_num != 1)
ibdev_warn(&bond_grp->main_hr_dev->ib_dev,
"active slave cnt(%u) in Mode 1 is invalid.\n",
bond_grp->active_slave_num);
} else {
slave_info->bond_mode = cpu_to_le32(BOND_MODE_2_4);
slave_info->hash_policy =
cpu_to_le32(get_bond_hashtype(bond_grp->hash_type));
}
slave_info->active_slave_cnt = cpu_to_le32(bond_grp->active_slave_num);
slave_info->active_slave_mask = cpu_to_le32(bond_grp->active_slave_map);
slave_info->slave_mask = cpu_to_le32(bond_grp->slave_map);
out:
ret = hns_roce_cmq_send(bond_grp->main_hr_dev, &desc, 1);
if (ret)
ibdev_err(&bond_grp->main_hr_dev->ib_dev,
"cmq bond type(%d) failed, ret = %d.\n",
bond_type, ret);
return ret;
}
static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev,
dma_addr_t base_addr, u8 cmd, unsigned long tag)
{
@ -2275,6 +2350,9 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) <<
HNS_ROCE_CAP_FLAGS_EX_SHIFT;
if (hr_dev->is_vf)
caps->flags &= ~HNS_ROCE_CAP_FLAG_BOND;
caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS);
caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID);
caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH);
@ -7067,7 +7145,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
}
static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
bool reset)
bool reset, bool bond_cleanup)
{
struct hns_roce_dev *hr_dev = handle->priv;
@ -7079,7 +7157,7 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
hns_roce_handle_device_err(hr_dev);
hns_roce_exit(hr_dev);
hns_roce_exit(hr_dev, bond_cleanup);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
}
@ -7130,12 +7208,51 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
bool reset)
{
/* Suspend bond to avoid concurrency */
hns_roce_bond_suspend(handle);
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
return;
goto out;
handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT;
__hns_roce_hw_v2_uninit_instance(handle, reset);
__hns_roce_hw_v2_uninit_instance(handle, reset, true);
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
out:
hns_roce_bond_resume(handle);
}
struct hns_roce_dev
*hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
int func_idx)
{
struct hnae3_handle *handle;
int ret;
handle = bond_grp->bond_func_info[func_idx].handle;
if (!handle || !handle->client)
return NULL;
ret = hns_roce_hw_v2_init_instance(handle);
if (ret)
return NULL;
return handle->priv;
}
void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
int func_idx)
{
struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle;
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
return;
handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT;
__hns_roce_hw_v2_uninit_instance(handle, false, false);
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
}
@ -7144,6 +7261,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev;
/* Suspend bond to avoid concurrency */
hns_roce_bond_suspend(handle);
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) {
set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
return 0;
@ -7174,6 +7294,7 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN,
&handle->rinfo.state)) {
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
hns_roce_bond_resume(handle);
return 0;
}
@ -7193,6 +7314,7 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
dev_info(dev, "reset done, RoCE client reinit finished.\n");
}
hns_roce_bond_resume(handle);
return ret;
}
@ -7204,7 +7326,7 @@ static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT;
dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n");
msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY);
__hns_roce_hw_v2_uninit_instance(handle, false);
__hns_roce_hw_v2_uninit_instance(handle, false, false);
return 0;
}
@ -7240,6 +7362,14 @@ static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle,
if (linkup || !hr_dev)
return;
/* For bond device, the link status depends on the upper netdev,
* and the upper device's link status depends on all the slaves'
* netdev but not only one. So bond device cannot get a correct
* link status from this path.
*/
if (hns_roce_get_bond_grp(netdev, get_hr_bus_num(hr_dev)))
return;
ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev);
}
@ -7264,6 +7394,7 @@ static int __init hns_roce_hw_v2_init(void)
static void __exit hns_roce_hw_v2_exit(void)
{
hns_roce_dealloc_bond_grp();
hnae3_unregister_client(&hns_roce_hw_v2_client);
hns_roce_cleanup_debugfs();
}

View File

@ -35,6 +35,7 @@
#include <linux/bitops.h>
#include "hnae3.h"
#include "hns_roce_bond.h"
#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
@ -228,6 +229,9 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
HNS_ROCE_QUERY_RAM_ECC = 0x8513,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
HNS_ROCE_OPC_SET_BOND_INFO = 0x8601,
HNS_ROCE_OPC_CLEAR_BOND_INFO = 0x8602,
HNS_ROCE_OPC_CHANGE_ACTIVE_PORT = 0x8603,
};
#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
@ -1465,7 +1469,23 @@ struct hns_roce_sccc_clr_done {
__le32 rsv[5];
};
struct hns_roce_bond_info {
__le32 bond_id;
__le32 bond_mode;
__le32 active_slave_cnt;
__le32 active_slave_mask;
__le32 slave_mask;
__le32 hash_policy;
};
struct hns_roce_dev
*hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
int func_idx);
void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
int func_idx);
int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp,
enum hns_roce_bond_cmd_type bond_type);
static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
void __iomem *dest)

View File

@ -32,7 +32,6 @@
*/
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@ -41,6 +40,7 @@
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
#include "hns_roce_hw_v2.h"
#include "hns_roce_bond.h"
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port,
const u8 *addr)
@ -89,30 +89,75 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
return ret;
}
static int handle_en_event(struct hns_roce_dev *hr_dev, u32 port,
unsigned long event)
static int hns_roce_get_port_state(struct hns_roce_dev *hr_dev, u32 port_num,
enum ib_port_state *state)
{
struct hns_roce_bond_group *bond_grp;
u8 bus_num = get_hr_bus_num(hr_dev);
struct net_device *net_dev;
net_dev = ib_device_get_netdev(&hr_dev->ib_dev, port_num);
if (!net_dev)
return -ENODEV;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
if (bond_grp) {
*state = ib_get_curr_port_state(bond_grp->upper_dev);
goto out;
}
}
*state = ib_get_curr_port_state(net_dev);
out:
dev_put(net_dev);
return 0;
}
static int handle_en_event(struct net_device *netdev,
struct hns_roce_dev *hr_dev,
u32 port, unsigned long event)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
struct device *dev = hr_dev->dev;
struct net_device *netdev;
enum ib_port_state curr_state;
struct ib_event ibevent;
int ret = 0;
netdev = hr_dev->iboe.netdevs[port];
if (!netdev) {
dev_err(dev, "can't find netdev on port(%u)!\n", port);
return -ENODEV;
}
switch (event) {
case NETDEV_UP:
case NETDEV_CHANGE:
case NETDEV_REGISTER:
case NETDEV_CHANGEADDR:
ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
break;
case NETDEV_UP:
case NETDEV_CHANGE:
ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
if (ret)
return ret;
fallthrough;
case NETDEV_DOWN:
/*
* In v1 engine, only support all ports closed together.
*/
if (!netif_is_lag_master(netdev))
break;
curr_state = ib_get_curr_port_state(netdev);
write_lock_irq(&ibdev->cache_lock);
if (ibdev->port_data[port].cache.last_port_state == curr_state) {
write_unlock_irq(&ibdev->cache_lock);
return 0;
}
ibdev->port_data[port].cache.last_port_state = curr_state;
write_unlock_irq(&ibdev->cache_lock);
ibevent.event = (curr_state == IB_PORT_DOWN) ?
IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE;
ibevent.device = ibdev;
ibevent.element.port_num = port + 1;
ib_dispatch_event(&ibevent);
break;
default:
dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
@ -126,17 +171,25 @@ static int hns_roce_netdev_event(struct notifier_block *self,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct hns_roce_bond_group *bond_grp;
struct hns_roce_ib_iboe *iboe = NULL;
struct hns_roce_dev *hr_dev = NULL;
struct net_device *upper = NULL;
int ret;
u32 port;
hr_dev = container_of(self, struct hns_roce_dev, iboe.nb);
iboe = &hr_dev->iboe;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0),
get_hr_bus_num(hr_dev));
upper = bond_grp ? bond_grp->upper_dev : NULL;
}
for (port = 0; port < hr_dev->caps.num_ports; port++) {
if (dev == iboe->netdevs[port]) {
ret = handle_en_event(hr_dev, port, event);
if ((!upper && dev == iboe->netdevs[port]) ||
(upper && dev == upper)) {
ret = handle_en_event(dev, hr_dev, port, event);
if (ret)
return NOTIFY_DONE;
break;
@ -148,12 +201,13 @@ static int hns_roce_netdev_event(struct notifier_block *self,
static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev)
{
struct net_device *net_dev;
int ret;
u8 i;
for (i = 0; i < hr_dev->caps.num_ports; i++) {
ret = hns_roce_set_mac(hr_dev, i,
hr_dev->iboe.netdevs[i]->dev_addr);
net_dev = get_hr_netdev(hr_dev, i);
ret = hns_roce_set_mac(hr_dev, i, net_dev->dev_addr);
if (ret)
return ret;
}
@ -221,9 +275,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
struct ib_port_attr *props)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
struct device *dev = hr_dev->dev;
struct net_device *net_dev;
unsigned long flags;
enum ib_mtu mtu;
u32 port;
int ret;
@ -244,26 +296,26 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
if (ret)
ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret);
spin_lock_irqsave(&hr_dev->iboe.lock, flags);
net_dev = hr_dev->iboe.netdevs[port];
net_dev = ib_device_get_netdev(ib_dev, port_num);
if (!net_dev) {
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
dev_err(dev, "find netdev %u failed!\n", port);
ibdev_err(ib_dev, "find netdev %u failed!\n", port);
return -EINVAL;
}
mtu = iboe_get_mtu(net_dev->mtu);
props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256;
props->state = netif_running(net_dev) && netif_carrier_ok(net_dev) ?
IB_PORT_ACTIVE :
IB_PORT_DOWN;
dev_put(net_dev);
ret = hns_roce_get_port_state(hr_dev, port_num, &props->state);
if (ret) {
ibdev_err(ib_dev, "failed to get port state.\n");
return ret;
}
props->phys_state = props->state == IB_PORT_ACTIVE ?
IB_PORT_PHYS_STATE_LINK_UP :
IB_PORT_PHYS_STATE_DISABLED;
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
return 0;
}
@ -617,9 +669,40 @@ static int hns_roce_get_hw_stats(struct ib_device *device,
return num_counters;
}
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
static void
hns_roce_unregister_bond_cleanup(struct hns_roce_dev *hr_dev,
struct hns_roce_bond_group *bond_grp)
{
struct net_device *net_dev;
int i;
/* To avoid the loss of other slave devices when main_hr_dev
* is unregistered, re-initialize the remaining slaves before
* the bond resources cleanup.
*/
bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
net_dev = bond_grp->bond_func_info[i].net_dev;
if (net_dev && net_dev != get_hr_netdev(hr_dev, 0))
hns_roce_bond_init_client(bond_grp, i);
}
hns_roce_cleanup_bond(bond_grp);
}
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev,
bool bond_cleanup)
{
struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
struct hns_roce_bond_group *bond_grp;
u8 bus_num = get_hr_bus_num(hr_dev);
if (bond_cleanup && hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
if (bond_grp)
hns_roce_unregister_bond_cleanup(hr_dev, bond_grp);
}
hr_dev->active = false;
unregister_netdevice_notifier(&iboe->nb);
@ -708,11 +791,12 @@ static const struct ib_device_ops hns_roce_dev_restrack_ops = {
static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
{
int ret;
struct hns_roce_ib_iboe *iboe = NULL;
struct ib_device *ib_dev = NULL;
struct device *dev = hr_dev->dev;
struct ib_device *ib_dev = NULL;
struct net_device *net_dev;
unsigned int i;
int ret;
iboe = &hr_dev->iboe;
spin_lock_init(&iboe->lock);
@ -747,17 +831,38 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops);
dma_set_max_seg_size(dev, SZ_2G);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
ret = hns_roce_alloc_bond_grp(hr_dev);
if (ret) {
dev_err(dev, "failed to alloc bond_grp for bus %u, ret = %d\n",
get_hr_bus_num(hr_dev), ret);
return ret;
}
}
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND &&
hns_roce_bond_is_active(hr_dev)) {
ret = hns_roce_bond_init(hr_dev);
if (ret) {
dev_err(dev, "failed to init bond!\n");
return ret;
}
ret = ib_register_device(ib_dev, "hns_bond_%d", dev);
} else {
for (i = 0; i < hr_dev->caps.num_ports; i++) {
if (!hr_dev->iboe.netdevs[i])
net_dev = get_hr_netdev(hr_dev, i);
if (!net_dev)
continue;
ret = ib_device_set_netdev(ib_dev, hr_dev->iboe.netdevs[i],
i + 1);
ret = ib_device_set_netdev(ib_dev, net_dev, i + 1);
if (ret)
return ret;
}
dma_set_max_seg_size(dev, SZ_2G);
ret = ib_register_device(ib_dev, "hns_%d", dev);
}
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
return ret;
@ -1157,10 +1262,10 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
return ret;
}
void hns_roce_exit(struct hns_roce_dev *hr_dev)
void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup)
{
hns_roce_unregister_debugfs(hr_dev);
hns_roce_unregister_device(hr_dev);
hns_roce_unregister_device(hr_dev, bond_cleanup);
if (hr_dev->hw->hw_exit)
hr_dev->hw->hw_exit(hr_dev);

View File

@ -30,7 +30,6 @@
* SOFTWARE.
*/
#include <linux/pci.h>
#include "hns_roce_device.h"
void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)

View File

@ -31,7 +31,6 @@
* SOFTWARE.
*/
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
@ -1348,11 +1347,13 @@ static int check_mtu_validate(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct ib_qp_attr *attr, int attr_mask)
{
struct net_device *net_dev;
enum ib_mtu active_mtu;
int p;
p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
net_dev = get_hr_netdev(hr_dev, p);
active_mtu = iboe_get_mtu(net_dev->mtu);
if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
attr->path_mtu > hr_dev->caps.max_mtu) ||

View File

@ -3,7 +3,6 @@
* Copyright (c) 2018 Hisilicon Limited.
*/
#include <linux/pci.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
#include "hns_roce_device.h"

View File

@ -3710,7 +3710,7 @@ int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
iwpd = iwqp->iwpd;
tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
ibmr = irdma_reg_phys_mr(&iwpd->ibpd, iwqp->ietf_mem.pa, buf_len,
IB_ACCESS_LOCAL_WRITE, &tagged_offset);
IB_ACCESS_LOCAL_WRITE, &tagged_offset, false);
if (IS_ERR(ibmr)) {
ret = -ENOMEM;
goto error;

View File

@ -2943,8 +2943,6 @@ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch,
__le64 *wqe;
struct irdma_sc_cqp *cqp;
u64 hdr;
struct irdma_sc_ceq *ceq;
int ret_code = 0;
cqp = cq->dev->cqp;
if (cq->cq_uk.cq_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt)
@ -2953,19 +2951,9 @@ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch,
if (cq->ceq_id >= cq->dev->hmc_fpm_misc.max_ceqs)
return -EINVAL;
ceq = cq->dev->ceq[cq->ceq_id];
if (ceq && ceq->reg_cq)
ret_code = irdma_sc_add_cq_ctx(ceq, cq);
if (ret_code)
return ret_code;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe) {
if (ceq && ceq->reg_cq)
irdma_sc_remove_cq_ctx(ceq, cq);
if (!wqe)
return -ENOMEM;
}
set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
@ -3018,17 +3006,12 @@ int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq)
struct irdma_sc_cqp *cqp;
__le64 *wqe;
u64 hdr;
struct irdma_sc_ceq *ceq;
cqp = cq->dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
return -ENOMEM;
ceq = cq->dev->ceq[cq->ceq_id];
if (ceq && ceq->reg_cq)
irdma_sc_remove_cq_ctx(ceq, cq);
set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
set_64bit_val(wqe, 40, cq->shadow_area_pa);
@ -3601,71 +3584,6 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
return 0;
}
/**
* irdma_sc_find_reg_cq - find cq ctx index
* @ceq: ceq sc structure
* @cq: cq sc structure
*/
static u32 irdma_sc_find_reg_cq(struct irdma_sc_ceq *ceq,
struct irdma_sc_cq *cq)
{
u32 i;
for (i = 0; i < ceq->reg_cq_size; i++) {
if (cq == ceq->reg_cq[i])
return i;
}
return IRDMA_INVALID_CQ_IDX;
}
/**
* irdma_sc_add_cq_ctx - add cq ctx tracking for ceq
* @ceq: ceq sc structure
* @cq: cq sc structure
*/
int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq)
{
unsigned long flags;
spin_lock_irqsave(&ceq->req_cq_lock, flags);
if (ceq->reg_cq_size == ceq->elem_cnt) {
spin_unlock_irqrestore(&ceq->req_cq_lock, flags);
return -ENOMEM;
}
ceq->reg_cq[ceq->reg_cq_size++] = cq;
spin_unlock_irqrestore(&ceq->req_cq_lock, flags);
return 0;
}
/**
* irdma_sc_remove_cq_ctx - remove cq ctx tracking for ceq
* @ceq: ceq sc structure
* @cq: cq sc structure
*/
void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq)
{
unsigned long flags;
u32 cq_ctx_idx;
spin_lock_irqsave(&ceq->req_cq_lock, flags);
cq_ctx_idx = irdma_sc_find_reg_cq(ceq, cq);
if (cq_ctx_idx == IRDMA_INVALID_CQ_IDX)
goto exit;
ceq->reg_cq_size--;
if (cq_ctx_idx != ceq->reg_cq_size)
ceq->reg_cq[cq_ctx_idx] = ceq->reg_cq[ceq->reg_cq_size];
ceq->reg_cq[ceq->reg_cq_size] = NULL;
exit:
spin_unlock_irqrestore(&ceq->req_cq_lock, flags);
}
/**
* irdma_sc_cqp_init - Initialize buffers for a control Queue Pair
* @cqp: IWARP control queue pair pointer
@ -3950,11 +3868,13 @@ int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp)
*/
void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
{
unsigned long flags;
u64 temp_val;
u16 sw_cq_sel;
u8 arm_next_se;
u8 arm_seq_num;
spin_lock_irqsave(&ccq->dev->cqp_lock, flags);
get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val);
sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val);
arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val);
@ -3965,6 +3885,7 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) |
FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, 1);
set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
spin_unlock_irqrestore(&ccq->dev->cqp_lock, flags);
dma_wmb(); /* make sure shadow area is updated before arming */
@ -4387,9 +4308,6 @@ int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
ceq->ceq_elem_pa = info->ceqe_pa;
ceq->virtual_map = info->virtual_map;
ceq->itr_no_expire = info->itr_no_expire;
ceq->reg_cq = info->reg_cq;
ceq->reg_cq_size = 0;
spin_lock_init(&ceq->req_cq_lock);
ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0);
ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0);
ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
@ -4472,9 +4390,6 @@ int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq)
{
struct irdma_sc_cqp *cqp;
if (ceq->reg_cq)
irdma_sc_remove_cq_ctx(ceq, ceq->dev->ccq);
cqp = ceq->dev->cqp;
cqp->process_cqp_sds = irdma_update_sds_noccq;
@ -4493,11 +4408,6 @@ int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch)
struct irdma_sc_dev *dev = ceq->dev;
dev->ccq->vsi_idx = ceq->vsi_idx;
if (ceq->reg_cq) {
ret_code = irdma_sc_add_cq_ctx(ceq, ceq->dev->ccq);
if (ret_code)
return ret_code;
}
ret_code = irdma_sc_ceq_create(ceq, scratch, true);
if (!ret_code)
@ -4562,7 +4472,6 @@ void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq)
struct irdma_sc_cq *temp_cq;
u8 polarity;
u32 cq_idx;
unsigned long flags;
do {
cq_idx = 0;
@ -4583,11 +4492,6 @@ void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq)
}
cq = temp_cq;
if (ceq->reg_cq) {
spin_lock_irqsave(&ceq->req_cq_lock, flags);
cq_idx = irdma_sc_find_reg_cq(ceq, cq);
spin_unlock_irqrestore(&ceq->req_cq_lock, flags);
}
IRDMA_RING_MOVE_TAIL(ceq->ceq_ring);
if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring))
@ -4731,7 +4635,8 @@ static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch,
u64 hdr;
dev = aeq->dev;
if (dev->privileged)
if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]);
cqp = dev->cqp;

View File

@ -2365,7 +2365,6 @@ static int irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev,
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.manage_apbvt_entry.info;
memset(info, 0, sizeof(*info));
info->add = add_port;
info->port = accel_local_port;
cqp_info->cqp_cmd = IRDMA_OP_MANAGE_APBVT_ENTRY;
@ -2474,7 +2473,6 @@ void irdma_manage_arp_cache(struct irdma_pci_f *rf,
if (action == IRDMA_ARP_ADD) {
cqp_info->cqp_cmd = IRDMA_OP_ADD_ARP_CACHE_ENTRY;
info = &cqp_info->in.u.add_arp_cache_entry.info;
memset(info, 0, sizeof(*info));
info->arp_index = (u16)arp_index;
info->permanent = true;
ether_addr_copy(info->mac_addr, mac_addr);
@ -2533,7 +2531,6 @@ int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.manage_qhash_table_entry.info;
memset(info, 0, sizeof(*info));
info->vsi = &iwdev->vsi;
info->manage = mtype;
info->entry_type = etype;

View File

@ -302,7 +302,8 @@ static int icrdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary
err_ctrl_init:
icrdma_deinit_interrupts(rf, cdev_info);
err_init_interrupts:
kfree(iwdev->rf);
mutex_destroy(&rf->ah_tbl_lock);
kfree(rf);
ib_dealloc_device(&iwdev->ibdev);
return err;
@ -319,6 +320,9 @@ static void icrdma_remove(struct auxiliary_device *aux_dev)
ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false);
irdma_ib_unregister_device(iwdev);
icrdma_deinit_interrupts(iwdev->rf, cdev_info);
mutex_destroy(&iwdev->rf->ah_tbl_lock);
kfree(iwdev->rf);
pr_debug("INIT: Gen[%d] func[%d] device remove success\n",
rdma_ver, PCI_FUNC(cdev_info->pdev->devfn));

View File

@ -55,6 +55,7 @@ static int ig3rdma_vchnl_init(struct irdma_pci_f *rf,
ret = irdma_sc_vchnl_init(&rf->sc_dev, &virt_info);
if (ret) {
destroy_workqueue(rf->vchnl_wq);
mutex_destroy(&rf->sc_dev.vchnl_mutex);
return ret;
}
@ -124,7 +125,9 @@ static void ig3rdma_decfg_rf(struct irdma_pci_f *rf)
{
struct irdma_hw *hw = &rf->hw;
mutex_destroy(&rf->ah_tbl_lock);
destroy_workqueue(rf->vchnl_wq);
mutex_destroy(&rf->sc_dev.vchnl_mutex);
kfree(hw->io_regs);
iounmap(hw->rdma_reg.addr);
}
@ -149,6 +152,7 @@ static int ig3rdma_cfg_rf(struct irdma_pci_f *rf,
err = ig3rdma_cfg_regions(&rf->hw, cdev_info);
if (err) {
destroy_workqueue(rf->vchnl_wq);
mutex_destroy(&rf->sc_dev.vchnl_mutex);
return err;
}

View File

@ -556,7 +556,7 @@ void irdma_copy_ip_htonl(__be32 *dst, u32 *src);
u16 irdma_get_vlan_ipv4(u32 *addr);
void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac);
struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
int acc, u64 *iova_start);
int acc, u64 *iova_start, bool dma_mr);
int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw);
void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
@ -564,7 +564,6 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
void (*callback_fcn)(struct irdma_cqp_request *cqp_request),
void *cb_param);
void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request);
bool irdma_cq_empty(struct irdma_cq *iwcq);
int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
void *ptr);
int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,

View File

@ -506,12 +506,14 @@ int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
struct irdma_pble_alloc *palloc)
{
pble_rsrc->freedpbles += palloc->total_cnt;
if (palloc->level == PBLE_LEVEL_2)
free_lvl2(pble_rsrc, palloc);
else
irdma_prm_return_pbles(&pble_rsrc->pinfo,
&palloc->level1.chunkinfo);
mutex_lock(&pble_rsrc->pble_mutex_lock);
pble_rsrc->freedpbles += palloc->total_cnt;
pble_rsrc->stats_alloc_freed++;
mutex_unlock(&pble_rsrc->pble_mutex_lock);
}

View File

@ -685,7 +685,6 @@ static int irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc)
ukqp->rq_size = rsrc->rq_size;
IRDMA_RING_INIT(ukqp->sq_ring, ukqp->sq_size);
IRDMA_RING_INIT(ukqp->initial_ring, ukqp->sq_size);
IRDMA_RING_INIT(ukqp->rq_ring, ukqp->rq_size);
ukqp->wqe_alloc_db = qp->pd->dev->wqe_alloc_db;
@ -726,7 +725,6 @@ static int irdma_puda_cq_wqe(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
struct irdma_sc_cqp *cqp;
u64 hdr;
struct irdma_ccq_cqe_info compl_info;
int status = 0;
cqp = dev->cqp;
wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0);
@ -756,16 +754,8 @@ static int irdma_puda_cq_wqe(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
print_hex_dump_debug("PUDA: PUDA CREATE CQ", DUMP_PREFIX_OFFSET, 16,
8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
irdma_sc_cqp_post_sq(dev->cqp);
status = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_CQ,
return irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_CQ,
&compl_info);
if (!status) {
struct irdma_sc_ceq *ceq = dev->ceq[0];
if (ceq && ceq->reg_cq)
status = irdma_sc_add_cq_ctx(ceq, cq);
}
return status;
}
/**
@ -897,23 +887,17 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
struct irdma_puda_buf *buf = NULL;
struct irdma_puda_buf *nextbuf = NULL;
struct irdma_virt_mem *vmem;
struct irdma_sc_ceq *ceq;
ceq = vsi->dev->ceq[0];
switch (type) {
case IRDMA_PUDA_RSRC_TYPE_ILQ:
rsrc = vsi->ilq;
vmem = &vsi->ilq_mem;
vsi->ilq = NULL;
if (ceq && ceq->reg_cq)
irdma_sc_remove_cq_ctx(ceq, &rsrc->cq);
break;
case IRDMA_PUDA_RSRC_TYPE_IEQ:
rsrc = vsi->ieq;
vmem = &vsi->ieq_mem;
vsi->ieq = NULL;
if (ceq && ceq->reg_cq)
irdma_sc_remove_cq_ctx(ceq, &rsrc->cq);
break;
default:
ibdev_dbg(to_ibdev(dev), "PUDA: error resource type = 0x%x\n",

View File

@ -492,9 +492,6 @@ struct irdma_sc_ceq {
u32 first_pm_pbl_idx;
u8 polarity;
u16 vsi_idx;
struct irdma_sc_cq **reg_cq;
u32 reg_cq_size;
spinlock_t req_cq_lock; /* protect access to reg_cq array */
bool virtual_map:1;
bool tph_en:1;
bool itr_no_expire:1;
@ -894,8 +891,6 @@ struct irdma_ceq_init_info {
u8 tph_val;
u16 vsi_idx;
u32 first_pm_pbl_idx;
struct irdma_sc_cq **reg_cq;
u32 reg_cq_idx;
};
struct irdma_aeq_init_info {

View File

@ -114,33 +114,8 @@ void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx)
*/
void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp)
{
u64 temp;
u32 hw_sq_tail;
u32 sw_sq_head;
/* valid bit is written and loads completed before reading shadow */
mb();
/* read the doorbell shadow area */
get_64bit_val(qp->shadow_area, 0, &temp);
hw_sq_tail = (u32)FIELD_GET(IRDMA_QP_DBSA_HW_SQ_TAIL, temp);
sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
if (sw_sq_head != qp->initial_ring.head) {
if (sw_sq_head != hw_sq_tail) {
if (sw_sq_head > qp->initial_ring.head) {
if (hw_sq_tail >= qp->initial_ring.head &&
hw_sq_tail < sw_sq_head)
dma_wmb();
writel(qp->qp_id, qp->wqe_alloc_db);
} else {
if (hw_sq_tail >= qp->initial_ring.head ||
hw_sq_tail < sw_sq_head)
writel(qp->qp_id, qp->wqe_alloc_db);
}
}
}
qp->initial_ring.head = qp->sq_ring.head;
}
/**
@ -194,6 +169,7 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id;
qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
qp->sq_wrtrk_array[*wqe_idx].quanta = quanta;
qp->sq_wrtrk_array[*wqe_idx].signaled = info->signaled;
return wqe;
}
@ -1136,6 +1112,27 @@ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
writel(cq->cq_id, cq->cqe_alloc_db);
}
/**
* irdma_uk_cq_empty - Check if CQ is empty
* @cq: hw cq
*/
bool irdma_uk_cq_empty(struct irdma_cq_uk *cq)
{
__le64 *cqe;
u8 polarity;
u64 qword3;
if (cq->avoid_mem_cflct)
cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
else
cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq);
get_64bit_val(cqe, 24, &qword3);
polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
return polarity != cq->polarity;
}
/**
* irdma_uk_cq_poll_cmpl - get cq completion info
* @cq: hw cq
@ -1287,6 +1284,8 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
if (info->q_type == IRDMA_CQE_QTYPE_RQ && is_srq) {
unsigned long flags;
srq = qp->srq_uk;
get_64bit_val(cqe, 8, &info->wr_id);
@ -1299,8 +1298,11 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
} else {
info->stag_invalid_set = false;
}
spin_lock_irqsave(srq->lock, flags);
IRDMA_RING_MOVE_TAIL(srq->srq_ring);
spin_unlock_irqrestore(srq->lock, flags);
pring = &srq->srq_ring;
} else if (info->q_type == IRDMA_CQE_QTYPE_RQ && !is_srq) {
u32 array_idx;
@ -1355,6 +1357,10 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
if (!info->comp_status)
info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
if (!qp->sq_wrtrk_array[wqe_idx].signaled) {
ret_code = -EFAULT;
goto exit;
}
info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
IRDMA_RING_SET_TAIL(qp->sq_ring,
wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
@ -1420,6 +1426,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
IRDMA_RING_MOVE_TAIL(cq->cq_ring);
if (!cq->avoid_mem_cflct && ext_valid)
IRDMA_RING_MOVE_TAIL(cq->cq_ring);
if (IRDMA_RING_CURRENT_HEAD(cq->cq_ring) & 0x3F || irdma_uk_cq_empty(cq))
set_64bit_val(cq->shadow_area, 0,
IRDMA_RING_CURRENT_HEAD(cq->cq_ring));
} else {
@ -1574,7 +1581,6 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp,
qp->conn_wqes = move_cnt;
IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, move_cnt);
IRDMA_RING_MOVE_TAIL_BY_COUNT(qp->sq_ring, move_cnt);
IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->initial_ring, move_cnt);
}
/**
@ -1719,7 +1725,6 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info)
qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
sq_ring_size = qp->sq_size << info->sq_shift;
IRDMA_RING_INIT(qp->sq_ring, sq_ring_size);
IRDMA_RING_INIT(qp->initial_ring, sq_ring_size);
if (info->first_sq_wq) {
irdma_setup_connection_wqes(qp, info);
qp->swqe_polarity = 1;

View File

@ -429,6 +429,7 @@ struct irdma_wqe_uk_ops {
struct irdma_bind_window *op_info);
};
bool irdma_uk_cq_empty(struct irdma_cq_uk *cq);
int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
struct irdma_cq_poll_info *info);
void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
@ -456,7 +457,6 @@ struct irdma_srq_uk {
struct irdma_uk_attrs *uk_attrs;
__le64 *shadow_area;
struct irdma_ring srq_ring;
struct irdma_ring initial_ring;
u32 srq_id;
u32 srq_size;
u32 max_srq_frag_cnt;
@ -465,6 +465,7 @@ struct irdma_srq_uk {
u8 wqe_size;
u8 wqe_size_multiplier;
u8 deferred_flag;
spinlock_t *lock;
};
struct irdma_srq_uk_init_info {
@ -482,7 +483,8 @@ struct irdma_sq_uk_wr_trk_info {
u64 wrid;
u32 wr_len;
u16 quanta;
u8 reserved[2];
u8 signaled;
u8 reserved[1];
};
struct irdma_qp_quanta {

View File

@ -452,6 +452,7 @@ struct irdma_cqp_request *irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp,
cqp_request->waiting = wait;
refcount_set(&cqp_request->refcnt, 1);
memset(&cqp_request->compl_info, 0, sizeof(cqp_request->compl_info));
memset(&cqp_request->info, 0, sizeof(cqp_request->info));
return cqp_request;
}
@ -1068,7 +1069,6 @@ int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
cqp_info = &cqp_request->info;
qp_info = &cqp_request->info.in.u.qp_create.info;
memset(qp_info, 0, sizeof(*qp_info));
qp_info->cq_num_valid = true;
qp_info->next_iwarp_state = IRDMA_QP_STATE_RTS;
cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE;
@ -1343,7 +1343,6 @@ int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
return -ENOMEM;
cqp_info = &cqp_request->info;
memset(cqp_info, 0, sizeof(*cqp_info));
cqp_info->cqp_cmd = IRDMA_OP_QP_DESTROY;
cqp_info->post_sq = 1;
cqp_info->in.u.qp_destroy.qp = qp;
@ -1749,7 +1748,6 @@ int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
return -ENOMEM;
cqp_info = &cqp_request->info;
memset(cqp_info, 0, sizeof(*cqp_info));
cqp_info->cqp_cmd = IRDMA_OP_STATS_GATHER;
cqp_info->post_sq = 1;
cqp_info->in.u.stats_gather.info = pestat->gather_info;
@ -1789,7 +1787,6 @@ int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
return -ENOMEM;
cqp_info = &cqp_request->info;
memset(cqp_info, 0, sizeof(*cqp_info));
cqp_info->cqp_cmd = cmd;
cqp_info->post_sq = 1;
cqp_info->in.u.stats_manage.info = *stats_info;
@ -1890,7 +1887,6 @@ int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
return -ENOMEM;
cqp_info = &cqp_request->info;
memset(cqp_info, 0, sizeof(*cqp_info));
cqp_info->cqp_cmd = cmd;
cqp_info->post_sq = 1;
cqp_info->in.u.ws_node.info = *node_info;
@ -2357,24 +2353,6 @@ void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event)
iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
}
bool irdma_cq_empty(struct irdma_cq *iwcq)
{
struct irdma_cq_uk *ukcq;
u64 qword3;
__le64 *cqe;
u8 polarity;
ukcq = &iwcq->sc_cq.cq_uk;
if (ukcq->avoid_mem_cflct)
cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(ukcq);
else
cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq);
get_64bit_val(cqe, 24, &qword3);
polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
return polarity != ukcq->polarity;
}
void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
{
struct irdma_cmpl_gen *cmpl_node;
@ -2436,6 +2414,8 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
struct irdma_ring *sq_ring = &qp->sq_ring;
struct irdma_ring *rq_ring = &qp->rq_ring;
struct irdma_cq *iwscq = iwqp->iwscq;
struct irdma_cq *iwrcq = iwqp->iwrcq;
struct irdma_cmpl_gen *cmpl;
__le64 *sw_wqe;
u64 wqe_qword;
@ -2443,8 +2423,8 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
bool compl_generated = false;
unsigned long flags1;
spin_lock_irqsave(&iwqp->iwscq->lock, flags1);
if (irdma_cq_empty(iwqp->iwscq)) {
spin_lock_irqsave(&iwscq->lock, flags1);
if (irdma_uk_cq_empty(&iwscq->sc_cq.cq_uk)) {
unsigned long flags2;
spin_lock_irqsave(&iwqp->lock, flags2);
@ -2452,7 +2432,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
if (!cmpl) {
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
spin_unlock_irqrestore(&iwscq->lock, flags1);
return;
}
@ -2471,24 +2451,24 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
kfree(cmpl);
continue;
}
ibdev_dbg(iwqp->iwscq->ibcq.device,
ibdev_dbg(iwscq->ibcq.device,
"DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
__func__, cmpl->cpi.wr_id, qp->qp_id);
list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated);
list_add_tail(&cmpl->list, &iwscq->cmpl_generated);
compl_generated = true;
}
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
spin_unlock_irqrestore(&iwscq->lock, flags1);
if (compl_generated)
irdma_comp_handler(iwqp->iwscq);
irdma_comp_handler(iwscq);
} else {
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
spin_unlock_irqrestore(&iwscq->lock, flags1);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
spin_lock_irqsave(&iwqp->iwrcq->lock, flags1);
if (irdma_cq_empty(iwqp->iwrcq)) {
spin_lock_irqsave(&iwrcq->lock, flags1);
if (irdma_uk_cq_empty(&iwrcq->sc_cq.cq_uk)) {
unsigned long flags2;
spin_lock_irqsave(&iwqp->lock, flags2);
@ -2496,7 +2476,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
if (!cmpl) {
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
spin_unlock_irqrestore(&iwrcq->lock, flags1);
return;
}
@ -2508,20 +2488,20 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ;
/* remove the RQ WR by moving RQ tail */
IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
ibdev_dbg(iwqp->iwrcq->ibcq.device,
ibdev_dbg(iwrcq->ibcq.device,
"DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
__func__, cmpl->cpi.wr_id, qp->qp_id,
wqe_idx);
list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated);
list_add_tail(&cmpl->list, &iwrcq->cmpl_generated);
compl_generated = true;
}
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
spin_unlock_irqrestore(&iwrcq->lock, flags1);
if (compl_generated)
irdma_comp_handler(iwqp->iwrcq);
irdma_comp_handler(iwrcq);
} else {
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
spin_unlock_irqrestore(&iwrcq->lock, flags1);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}

View File

@ -27,6 +27,7 @@ static int irdma_query_device(struct ib_device *ibdev,
irdma_fw_minor_ver(&rf->sc_dev);
props->device_cap_flags = IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_MGT_EXTENSIONS;
if (hw_attrs->uk_attrs.hw_rev < IRDMA_GEN_3)
props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
props->vendor_id = pcidev->vendor;
props->vendor_part_id = pcidev->device;
@ -771,7 +772,6 @@ static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp)
cqp_info = &cqp_request->info;
qp_info = &cqp_request->info.in.u.qp_create.info;
memset(qp_info, 0, sizeof(*qp_info));
qp_info->mac_valid = true;
qp_info->cq_num_valid = true;
qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE;
@ -2029,6 +2029,7 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
struct irdma_pci_f *rf;
struct irdma_cq_buf *cq_buf = NULL;
unsigned long flags;
u8 cqe_size;
int ret;
iwdev = to_iwdev(ibcq->device);
@ -2045,7 +2046,7 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
return -EINVAL;
if (!iwcq->user_mode) {
entries++;
entries += 2;
if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct &&
dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
@ -2053,6 +2054,10 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
if (entries & 1)
entries += 1; /* cq size must be an even number */
cqe_size = iwcq->sc_cq.cq_uk.avoid_mem_cflct ? 64 : 32;
if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
entries += 2;
}
info.cq_size = max(entries, 4);
@ -2306,8 +2311,8 @@ static int irdma_setup_kmode_srq(struct irdma_device *iwdev,
ukinfo->srq_size = depth >> shift;
ukinfo->shadow_area = mem->va + ring_size;
info->shadow_area_pa = info->srq_pa + ring_size;
info->srq_pa = mem->pa;
info->shadow_area_pa = info->srq_pa + ring_size;
return 0;
}
@ -2384,6 +2389,7 @@ static int irdma_create_srq(struct ib_srq *ibsrq,
info.vsi = &iwdev->vsi;
info.pd = &iwpd->sc_pd;
iwsrq->sc_srq.srq_uk.lock = &iwsrq->lock;
err_code = irdma_sc_srq_init(&iwsrq->sc_srq, &info);
if (err_code)
goto free_dmem;
@ -2483,6 +2489,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
int err_code;
int entries = attr->cqe;
bool cqe_64byte_ena;
u8 cqe_size;
err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev);
if (err_code)
@ -2509,6 +2516,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
ukinfo->cq_id = cq_num;
cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ?
true : false;
cqe_size = cqe_64byte_ena ? 64 : 32;
ukinfo->avoid_mem_cflct = cqe_64byte_ena;
iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
if (attr->comp_vector < rf->ceqs_count)
@ -2581,13 +2589,16 @@ static int irdma_create_cq(struct ib_cq *ibcq,
goto cq_free_rsrc;
}
entries++;
entries += 2;
if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
entries *= 2;
if (entries & 1)
entries += 1; /* cq size must be an even number */
if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
entries += 2;
ukinfo->cq_size = entries;
if (cqe_64byte_ena)
@ -3103,12 +3114,10 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.alloc_stag.info;
memset(info, 0, sizeof(*info));
info->page_size = PAGE_SIZE;
info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
info->pd_id = iwpd->sc_pd.pd_id;
info->total_len = iwmr->len;
info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
info->remote_access = true;
cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG;
cqp_info->post_sq = 1;
@ -3119,7 +3128,7 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
if (status)
return status;
iwmr->is_hwreg = 1;
iwmr->is_hwreg = true;
return 0;
}
@ -3253,7 +3262,6 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
cqp_info = &cqp_request->info;
stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
memset(stag_info, 0, sizeof(*stag_info));
stag_info->va = iwpbl->user_base;
stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
stag_info->stag_key = (u8)iwmr->stag;
@ -3263,7 +3271,7 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
stag_info->remote_atomics_en = (access & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
stag_info->pd_id = iwpd->sc_pd.pd_id;
stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
stag_info->all_memory = iwmr->dma_mr;
if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED)
stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED;
else
@ -3290,7 +3298,7 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
if (!ret)
iwmr->is_hwreg = 1;
iwmr->is_hwreg = true;
return ret;
}
@ -3647,7 +3655,6 @@ static int irdma_hwdereg_mr(struct ib_mr *ib_mr)
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.dealloc_stag.info;
memset(info, 0, sizeof(*info));
info->pd_id = iwpd->sc_pd.pd_id;
info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
info->mr = true;
@ -3663,7 +3670,7 @@ static int irdma_hwdereg_mr(struct ib_mr *ib_mr)
if (status)
return status;
iwmr->is_hwreg = 0;
iwmr->is_hwreg = false;
return 0;
}
@ -3786,9 +3793,10 @@ static struct ib_mr *irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags,
* @size: size of memory to register
* @access: Access rights
* @iova_start: start of virtual address for physical buffers
* @dma_mr: Flag indicating whether this region is a PD DMA MR
*/
struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access,
u64 *iova_start)
u64 *iova_start, bool dma_mr)
{
struct irdma_device *iwdev = to_iwdev(pd->device);
struct irdma_pbl *iwpbl;
@ -3805,6 +3813,7 @@ struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access
iwpbl = &iwmr->iwpbl;
iwpbl->iwmr = iwmr;
iwmr->type = IRDMA_MEMREG_TYPE_MEM;
iwmr->dma_mr = dma_mr;
iwpbl->user_base = *iova_start;
stag = irdma_create_stag(iwdev);
if (!stag) {
@ -3843,7 +3852,7 @@ static struct ib_mr *irdma_get_dma_mr(struct ib_pd *pd, int acc)
{
u64 kva = 0;
return irdma_reg_phys_mr(pd, 0, 0, acc, &kva);
return irdma_reg_phys_mr(pd, 0, 0, acc, &kva, true);
}
/**
@ -4078,7 +4087,7 @@ static int irdma_post_send(struct ib_qp *ibqp,
break;
case IB_WR_LOCAL_INV:
info.op_type = IRDMA_OP_TYPE_INV_STAG;
info.local_fence = info.read_fence;
info.local_fence = true;
info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
err = irdma_uk_stag_local_invalidate(ukqp, &info, true);
break;
@ -4505,7 +4514,7 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq,
}
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
(!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated)))
(!irdma_uk_cq_empty(ukcq) || !list_empty(&iwcq->cmpl_generated)))
ret = 1;
spin_unlock_irqrestore(&iwcq->lock, flags);
@ -5204,7 +5213,7 @@ static int irdma_create_user_ah(struct ib_ah *ibah,
struct irdma_ah *parent_ah;
int err;
if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN)
if (udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN)
return -EINVAL;
err = irdma_setup_ah(ibah, attr);
@ -5500,7 +5509,9 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev)
irdma_rt_deinit_hw(iwdev);
if (!iwdev->is_vport) {
irdma_ctrl_deinit_hw(iwdev->rf);
if (iwdev->rf->vchnl_wq)
if (iwdev->rf->vchnl_wq) {
destroy_workqueue(iwdev->rf->vchnl_wq);
mutex_destroy(&iwdev->rf->sc_dev.vchnl_mutex);
}
}
}

View File

@ -111,7 +111,8 @@ struct irdma_mr {
};
struct ib_umem *region;
int access;
u8 is_hwreg;
bool is_hwreg:1;
bool dma_mr:1;
u16 type;
u32 page_cnt;
u64 page_size;

View File

@ -591,7 +591,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
int mlx4_ib_cm_init(void)
{
cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0);
cm_wq = alloc_workqueue("mlx4_ib_cm", WQ_PERCPU, 0);
if (!cm_wq)
return -ENOMEM;

View File

@ -1225,6 +1225,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_GET(create_flow_table_in, in, other_vport));
MLX5_SET(destroy_flow_table_in, din, vport_number,
MLX5_GET(create_flow_table_in, in, vport_number));
MLX5_SET(destroy_flow_table_in, din, other_eswitch,
MLX5_GET(create_flow_table_in, in, other_eswitch));
MLX5_SET(destroy_flow_table_in, din, eswitch_owner_vhca_id,
MLX5_GET(create_flow_table_in, in,
eswitch_owner_vhca_id));
MLX5_SET(destroy_flow_table_in, din, table_type,
MLX5_GET(create_flow_table_in, in, table_type));
MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
@ -1237,6 +1242,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_GET(create_flow_group_in, in, other_vport));
MLX5_SET(destroy_flow_group_in, din, vport_number,
MLX5_GET(create_flow_group_in, in, vport_number));
MLX5_SET(destroy_flow_group_in, din, other_eswitch,
MLX5_GET(create_flow_group_in, in, other_eswitch));
MLX5_SET(destroy_flow_group_in, din, eswitch_owner_vhca_id,
MLX5_GET(create_flow_group_in, in,
eswitch_owner_vhca_id));
MLX5_SET(destroy_flow_group_in, din, table_type,
MLX5_GET(create_flow_group_in, in, table_type));
MLX5_SET(destroy_flow_group_in, din, table_id,
@ -1251,6 +1261,10 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_GET(set_fte_in, in, other_vport));
MLX5_SET(delete_fte_in, din, vport_number,
MLX5_GET(set_fte_in, in, vport_number));
MLX5_SET(delete_fte_in, din, other_eswitch,
MLX5_GET(set_fte_in, in, other_eswitch));
MLX5_SET(delete_fte_in, din, eswitch_owner_vhca_id,
MLX5_GET(set_fte_in, in, eswitch_owner_vhca_id));
MLX5_SET(delete_fte_in, din, table_type,
MLX5_GET(set_fte_in, in, table_type));
MLX5_SET(delete_fte_in, din, table_id,

View File

@ -691,22 +691,13 @@ static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
}
static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_flow_namespace *ns,
static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
struct mlx5_ib_flow_prio *prio,
int priority,
int num_entries, int num_groups,
u32 flags, u16 vport)
struct mlx5_flow_table_attr *ft_attr)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
ft_attr.vport = vport;
ft_attr.autogroup.max_num_groups = num_groups;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
ft = mlx5_create_auto_grouped_flow_table(ns, ft_attr);
if (IS_ERR(ft))
return ERR_CAST(ft);
@ -720,6 +711,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
enum flow_table_type ft_type)
{
bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns = NULL;
enum mlx5_flow_namespace_type fn_type;
struct mlx5_ib_flow_prio *prio;
@ -797,11 +789,14 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
max_table_size = min_t(int, num_entries, max_table_size);
ft = prio->flow_table;
if (!ft)
return _get_prio(dev, ns, prio, priority, max_table_size,
num_groups, flags, 0);
if (ft)
return prio;
ft_attr.prio = priority;
ft_attr.max_fte = max_table_size;
ft_attr.flags = flags;
ft_attr.autogroup.max_num_groups = num_groups;
return _get_prio(ns, prio, &ft_attr);
}
enum {
@ -950,6 +945,7 @@ static int get_per_qp_prio(struct mlx5_ib_dev *dev,
enum mlx5_ib_optional_counter_type type)
{
enum mlx5_ib_optional_counter_type per_qp_type;
struct mlx5_flow_table_attr ft_attr = {};
enum mlx5_flow_namespace_type fn_type;
struct mlx5_flow_namespace *ns;
struct mlx5_ib_flow_prio *prio;
@ -1003,7 +999,10 @@ static int get_per_qp_prio(struct mlx5_ib_dev *dev,
if (prio->flow_table)
return 0;
prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0);
ft_attr.prio = priority;
ft_attr.max_fte = MLX5_FS_MAX_POOL_SIZE;
ft_attr.autogroup.max_num_groups = 1;
prio = _get_prio(ns, prio, &ft_attr);
if (IS_ERR(prio))
return PTR_ERR(prio);
@ -1223,6 +1222,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
{
struct mlx5_flow_table_attr ft_attr = {};
enum mlx5_flow_namespace_type fn_type;
int priority, i, err, spec_num;
struct mlx5_flow_act flow_act = {};
@ -1304,8 +1304,10 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
if (err)
goto free;
prio = _get_prio(dev, ns, prio, priority,
dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
ft_attr.prio = priority;
ft_attr.max_fte = dev->num_ports * MAX_OPFC_RULES;
ft_attr.autogroup.max_num_groups = 1;
prio = _get_prio(ns, prio, &ft_attr);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
goto put_prio;
@ -1872,7 +1874,7 @@ static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
u32 *flags, u16 *vport_idx,
u16 *vport,
struct mlx5_core_dev **ft_mdev,
u32 ib_port)
u32 ib_port, u16 *esw_owner_vhca_id)
{
struct mlx5_core_dev *esw_mdev;
@ -1886,8 +1888,13 @@ static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
return -EINVAL;
esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
if (esw_mdev != dev->mdev)
if (esw_mdev != dev->mdev) {
if (!MLX5_CAP_ADV_RDMA(dev->mdev,
rdma_transport_manager_other_eswitch))
return -EOPNOTSUPP;
*flags |= MLX5_FLOW_TABLE_OTHER_ESWITCH;
*esw_owner_vhca_id = MLX5_CAP_GEN(esw_mdev, vhca_id);
}
*flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
*ft_mdev = esw_mdev;
@ -1903,8 +1910,10 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
bool mcast, u32 ib_port)
{
struct mlx5_core_dev *ft_mdev = dev->mdev;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
u16 esw_owner_vhca_id = 0;
int max_table_size = 0;
u16 vport_idx = 0;
bool esw_encap;
@ -1966,7 +1975,8 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
return ERR_PTR(-EINVAL);
ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
&vport_idx, &vport,
&ft_mdev, ib_port);
&ft_mdev, ib_port,
&esw_owner_vhca_id);
if (ret)
return ERR_PTR(ret);
@ -2026,8 +2036,13 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
if (prio->flow_table)
return prio;
return _get_prio(dev, ns, prio, priority, max_table_size,
MLX5_FS_MAX_TYPES, flags, vport);
ft_attr.prio = priority;
ft_attr.max_fte = max_table_size;
ft_attr.flags = flags;
ft_attr.vport = vport;
ft_attr.esw_owner_vhca_id = esw_owner_vhca_id;
ft_attr.autogroup.max_num_groups = MLX5_FS_MAX_TYPES;
return _get_prio(ns, prio, &ft_attr);
}
static struct mlx5_ib_flow_handler *

View File

@ -44,6 +44,63 @@ static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
}
}
static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
struct mlx5_core_dev *new_owner)
{
int ret;
if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
return 0;
if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
return 0;
ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
FS_FT_RDMA_TRANSPORT_TX);
if (ret)
return ret;
ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
FS_FT_RDMA_TRANSPORT_RX);
if (ret) {
mlx5_fs_set_root_dev(cur_owner, cur_owner,
FS_FT_RDMA_TRANSPORT_TX);
return ret;
}
return 0;
}
static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev;
int i, ret;
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
WARN_ON_ONCE(ret);
}
}
static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev;
int ret;
int i;
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
ret = mlx5_ib_set_owner_transport(peer_dev, dev);
if (ret) {
mlx5_ib_release_transport(dev);
return ret;
}
}
return 0;
}
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
@ -88,10 +145,18 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
else
return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
if (mlx5_lag_is_shared_fdb(dev)) {
ret = mlx5_ib_take_transport(lag_master);
if (ret)
return ret;
}
ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
mlx5_core_net(lag_master));
if (!ibdev)
return -ENOMEM;
if (!ibdev) {
ret = -ENOMEM;
goto release_transport;
}
ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
GFP_KERNEL);
@ -127,6 +192,10 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
kfree(ibdev->port);
fail_port:
ib_dealloc_device(&ibdev->ib_dev);
release_transport:
if (mlx5_lag_is_shared_fdb(lag_master))
mlx5_ib_release_transport(lag_master);
return ret;
}
@ -182,6 +251,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
esw = peer_mdev->priv.eswitch;
mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
}
mlx5_ib_release_transport(mdev);
}
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}

View File

@ -511,6 +511,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_XDR;
break;
case MLX5E_PROT_MASK(MLX5E_1600TAUI_8_1600TBASE_CR8_KR8):
*active_width = IB_WIDTH_8X;
*active_speed = IB_SPEED_XDR;
break;
default:
return -EINVAL;
}

View File

@ -97,33 +97,28 @@ struct mlx5_pagefault {
* a pagefault. */
#define MMU_NOTIFIER_TIMEOUT 1000
#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
static u64 mlx5_imr_ksm_entries;
static u64 mlx5_imr_mtt_entries;
static u64 mlx5_imr_mtt_size;
static u8 mlx5_imr_mtt_shift;
static u8 mlx5_imr_ksm_page_shift;
static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
static void populate_ksm(struct mlx5_ksm *pksm, size_t idx, size_t nentries,
struct mlx5_ib_mr *imr, int flags)
{
struct mlx5_core_dev *dev = mr_to_mdev(imr)->mdev;
struct mlx5_klm *end = pklm + nentries;
int step = MLX5_CAP_ODP(dev, mem_page_fault) ? MLX5_IMR_MTT_SIZE : 0;
struct mlx5_ksm *end = pksm + nentries;
u64 step = MLX5_CAP_ODP(dev, mem_page_fault) ? mlx5_imr_mtt_size : 0;
__be32 key = MLX5_CAP_ODP(dev, mem_page_fault) ?
cpu_to_be32(imr->null_mmkey.key) :
mr_to_mdev(imr)->mkeys.null_mkey;
u64 va =
MLX5_CAP_ODP(dev, mem_page_fault) ? idx * MLX5_IMR_MTT_SIZE : 0;
MLX5_CAP_ODP(dev, mem_page_fault) ? idx * mlx5_imr_mtt_size : 0;
if (flags & MLX5_IB_UPD_XLT_ZAP) {
for (; pklm != end; pklm++, idx++, va += step) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
pklm->key = key;
pklm->va = cpu_to_be64(va);
for (; pksm != end; pksm++, idx++, va += step) {
pksm->key = key;
pksm->va = cpu_to_be64(va);
}
return;
}
@ -147,16 +142,15 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
*/
lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
for (; pklm != end; pklm++, idx++, va += step) {
for (; pksm != end; pksm++, idx++, va += step) {
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
if (mtt) {
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE);
pksm->key = cpu_to_be32(mtt->ibmr.lkey);
pksm->va = cpu_to_be64(idx * mlx5_imr_mtt_size);
} else {
pklm->key = key;
pklm->va = cpu_to_be64(va);
pksm->key = key;
pksm->va = cpu_to_be64(va);
}
}
}
@ -201,7 +195,7 @@ int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
struct mlx5_ib_mr *mr, int flags)
{
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
populate_klm(xlt, idx, nentries, mr, flags);
populate_ksm(xlt, idx, nentries, mr, flags);
return 0;
} else {
return populate_mtt(xlt, idx, nentries, mr, flags);
@ -226,7 +220,7 @@ static void free_implicit_child_mr_work(struct work_struct *work)
mutex_lock(&odp_imr->umem_mutex);
mlx5r_umr_update_xlt(mr->parent,
ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0,
ib_umem_start(odp) >> mlx5_imr_mtt_shift, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
mlx5_ib_dereg_mr(&mr->ibmr, NULL);
@ -237,7 +231,7 @@ static void free_implicit_child_mr_work(struct work_struct *work)
static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
unsigned long idx = ib_umem_start(odp) >> mlx5_imr_mtt_shift;
struct mlx5_ib_mr *imr = mr->parent;
/*
@ -265,7 +259,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
queue_work(system_unbound_wq, &mr->odp_destroy.work);
queue_work(system_dfl_wq, &mr->odp_destroy.work);
}
static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
@ -425,7 +419,10 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
MLX5_CAP_GEN(dev->mdev, null_mkey) &&
MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
!MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled))
!MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled) &&
mlx5_imr_ksm_entries != 0 &&
!(mlx5_imr_ksm_page_shift >
get_max_log_entity_size_cap(dev, MLX5_MKC_ACCESS_MODE_KSM)))
caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
}
@ -476,14 +473,14 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
int err;
odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem),
idx * MLX5_IMR_MTT_SIZE,
MLX5_IMR_MTT_SIZE, &mlx5_mn_ops);
idx * mlx5_imr_mtt_size,
mlx5_imr_mtt_size, &mlx5_mn_ops);
if (IS_ERR(odp))
return ERR_CAST(odp);
mr = mlx5_mr_cache_alloc(dev, imr->access_flags,
MLX5_MKC_ACCESS_MODE_MTT,
MLX5_IMR_MTT_ENTRIES);
mlx5_imr_mtt_entries);
if (IS_ERR(mr)) {
ib_umem_odp_release(odp);
return mr;
@ -495,7 +492,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
mr->umem = &odp->umem;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->ibmr.iova = idx * MLX5_IMR_MTT_SIZE;
mr->ibmr.iova = idx * mlx5_imr_mtt_size;
mr->parent = imr;
odp->private = mr;
@ -506,7 +503,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
refcount_set(&mr->mmkey.usecount, 2);
err = mlx5r_umr_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
mlx5_imr_mtt_entries,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
@ -611,7 +608,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
if (!mlx5r_umr_can_load_pas(dev, mlx5_imr_mtt_entries * PAGE_SIZE))
return ERR_PTR(-EOPNOTSUPP);
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
@ -647,7 +644,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
err = mlx5r_umr_update_xlt(imr, 0,
mlx5_imr_ksm_entries,
MLX5_KSM_PAGE_SHIFT,
mlx5_imr_ksm_page_shift,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
@ -750,20 +747,20 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
struct ib_umem_odp *odp_imr, u64 user_va,
size_t bcnt, u32 *bytes_mapped, u32 flags)
{
unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT;
unsigned long end_idx = (user_va + bcnt - 1) >> mlx5_imr_mtt_shift;
unsigned long upd_start_idx = end_idx + 1;
unsigned long upd_len = 0;
unsigned long npages = 0;
int err;
int ret;
if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt))
if (unlikely(user_va >= mlx5_imr_ksm_entries * mlx5_imr_mtt_size ||
mlx5_imr_ksm_entries * mlx5_imr_mtt_size - user_va < bcnt))
return -EFAULT;
/* Fault each child mr that intersects with our interval. */
while (bcnt) {
unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT;
unsigned long idx = user_va >> mlx5_imr_mtt_shift;
struct ib_umem_odp *umem_odp;
struct mlx5_ib_mr *mtt;
u64 len;
@ -1924,9 +1921,25 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
int mlx5_ib_odp_init(void)
{
mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
MLX5_IMR_MTT_BITS);
u32 log_va_pages = ilog2(TASK_SIZE) - PAGE_SHIFT;
u8 mlx5_imr_mtt_bits;
/* 48 is default ARM64 VA space and covers X86 4-level paging which is 47 */
if (log_va_pages <= 48 - PAGE_SHIFT)
mlx5_imr_mtt_shift = 30;
/* 56 is x86-64, 5-level paging */
else if (log_va_pages <= 56 - PAGE_SHIFT)
mlx5_imr_mtt_shift = 34;
else
return 0;
mlx5_imr_mtt_size = BIT_ULL(mlx5_imr_mtt_shift);
mlx5_imr_mtt_bits = mlx5_imr_mtt_shift - PAGE_SHIFT;
mlx5_imr_mtt_entries = BIT_ULL(mlx5_imr_mtt_bits);
mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
mlx5_imr_mtt_bits);
mlx5_imr_ksm_page_shift = mlx5_imr_mtt_shift;
return 0;
}
@ -2093,6 +2106,6 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
destroy_prefetch_work(work);
return rc;
}
queue_work(system_unbound_wq, &work->work);
queue_work(system_dfl_wq, &work->work);
return 0;
}

View File

@ -3451,10 +3451,11 @@ int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate)
{
u32 stat_rate_support;
if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS)
if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS ||
rate == IB_RATE_1600_GBPS)
return 0;
if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS)
if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_1600_GBPS)
return -EINVAL;
stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);

View File

@ -518,7 +518,8 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
*/
int rvt_driver_cq_init(void)
{
comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE,
comp_vector_wq = alloc_workqueue("%s",
WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_PERCPU,
0, "rdmavt_cq");
if (!comp_vector_wq)
return -ENOMEM;

View File

@ -452,7 +452,6 @@ static int rxe_mr_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int leng
length -= bytes;
iova += bytes;
page_offset = 0;
}
return 0;

View File

@ -20,6 +20,54 @@
static struct rxe_recv_sockets recv_sockets;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* lockdep can detect false positive circular dependencies
* when there are user-space socket API users or in kernel
* users switching between a tcp and rdma transport.
* Maybe also switching between siw and rxe may cause
* problems as per default sockets are only classified
* by family and not by ip protocol. And there might
* be different locks used between the application
* and the low level sockets.
*
* Problems were seen with ksmbd.ko and cifs.ko,
* switching transports, use git blame to find
* more details.
*/
static struct lock_class_key rxe_recv_sk_key[2];
static struct lock_class_key rxe_recv_slock_key[2];
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
static inline void rxe_reclassify_recv_socket(struct socket *sock)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct sock *sk = sock->sk;
if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
return;
switch (sk->sk_family) {
case AF_INET:
sock_lock_init_class_and_name(sk,
"slock-AF_INET-RDMA-RXE-RECV",
&rxe_recv_slock_key[0],
"sk_lock-AF_INET-RDMA-RXE-RECV",
&rxe_recv_sk_key[0]);
break;
case AF_INET6:
sock_lock_init_class_and_name(sk,
"slock-AF_INET6-RDMA-RXE-RECV",
&rxe_recv_slock_key[1],
"sk_lock-AF_INET6-RDMA-RXE-RECV",
&rxe_recv_sk_key[1]);
break;
default:
WARN_ON_ONCE(1);
}
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
}
static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
struct net_device *ndev,
struct in_addr *saddr,
@ -192,6 +240,7 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
err = udp_sock_create(net, &udp_cfg, &sock);
if (err < 0)
return ERR_PTR(err);
rxe_reclassify_recv_socket(sock);
tnl_cfg.encap_type = 1;
tnl_cfg.encap_rcv = rxe_udp_encap_recv;

View File

@ -358,7 +358,6 @@ int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova,
length -= bytes;
iova += bytes;
page_offset = 0;
}
mutex_unlock(&umem_odp->umem_mutex);

View File

@ -15,6 +15,54 @@
#include "rxe_queue.h"
#include "rxe_task.h"
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* lockdep can detect false positive circular dependencies
* when there are user-space socket API users or in kernel
* users switching between a tcp and rdma transport.
* Maybe also switching between siw and rxe may cause
* problems as per default sockets are only classified
* by family and not by ip protocol. And there might
* be different locks used between the application
* and the low level sockets.
*
* Problems were seen with ksmbd.ko and cifs.ko,
* switching transports, use git blame to find
* more details.
*/
static struct lock_class_key rxe_send_sk_key[2];
static struct lock_class_key rxe_send_slock_key[2];
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
static inline void rxe_reclassify_send_socket(struct socket *sock)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct sock *sk = sock->sk;
if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
return;
switch (sk->sk_family) {
case AF_INET:
sock_lock_init_class_and_name(sk,
"slock-AF_INET-RDMA-RXE-SEND",
&rxe_send_slock_key[0],
"sk_lock-AF_INET-RDMA-RXE-SEND",
&rxe_send_sk_key[0]);
break;
case AF_INET6:
sock_lock_init_class_and_name(sk,
"slock-AF_INET6-RDMA-RXE-SEND",
&rxe_send_slock_key[1],
"sk_lock-AF_INET6-RDMA-RXE-SEND",
&rxe_send_sk_key[1]);
break;
default:
WARN_ON_ONCE(1);
}
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
}
static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
@ -244,6 +292,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
if (err < 0)
return err;
rxe_reclassify_send_socket(qp->sk);
qp->sk->sk->sk_user_data = qp;
/* pick a source UDP port number for this QP based on

View File

@ -171,7 +171,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
udata, mi, &srq->rq.producer_lock,
&srq->rq.consumer_lock);
if (err)
goto err_free;
return err;
srq->rq.max_wr = attr->max_wr;
}
@ -180,11 +180,6 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
srq->limit = attr->srq_limit;
return 0;
err_free:
rxe_queue_cleanup(q);
srq->rq.queue = NULL;
return err;
}
void rxe_srq_cleanup(struct rxe_pool_elem *elem)

View File

@ -39,6 +39,55 @@ static void siw_cm_llp_error_report(struct sock *s);
static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason,
int status);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* lockdep can detect false positive circular dependencies
* when there are user-space socket API users or in kernel
* users switching between a tcp and rdma transport.
* Maybe also switching between siw and rxe may cause
* problems as per default sockets are only classified
* by family and not by ip protocol. And there might
* be different locks used between the application
* and the low level sockets.
*
* Problems were seen with ksmbd.ko and cifs.ko,
* switching transports, use git blame to find
* more details.
*/
static struct lock_class_key siw_sk_key[2];
static struct lock_class_key siw_slock_key[2];
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
static inline void siw_reclassify_socket(struct socket *sock)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct sock *sk = sock->sk;
if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
return;
switch (sk->sk_family) {
case AF_INET:
sock_lock_init_class_and_name(sk,
"slock-AF_INET-RDMA-SIW",
&siw_slock_key[0],
"sk_lock-AF_INET-RDMA-SIW",
&siw_sk_key[0]);
break;
case AF_INET6:
sock_lock_init_class_and_name(sk,
"slock-AF_INET6-RDMA-SIW",
&siw_slock_key[1],
"sk_lock-AF_INET6-RDMA-SIW",
&siw_sk_key[1]);
break;
default:
WARN_ON_ONCE(1);
}
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
}
static void siw_sk_assign_cm_upcalls(struct sock *sk)
{
struct siw_cep *cep = sk_to_cep(sk);
@ -1394,6 +1443,7 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s);
if (rv < 0)
goto error;
siw_reclassify_socket(s);
/*
* NOTE: For simplification, connect() is called in blocking
@ -1770,6 +1820,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
if (rv < 0)
return rv;
siw_reclassify_socket(s);
/*
* Allow binding local port when still in TIME_WAIT from last close.

View File

@ -1029,7 +1029,7 @@ static int __init iser_init(void)
mutex_init(&ig.connlist_mutex);
INIT_LIST_HEAD(&ig.connlist);
release_wq = alloc_workqueue("release workqueue", 0, 0);
release_wq = alloc_workqueue("release workqueue", WQ_PERCPU, 0);
if (!release_wq) {
iser_err("failed to allocate release workqueue\n");
err = -ENOMEM;

View File

@ -2613,7 +2613,7 @@ static struct iscsit_transport iser_target_transport = {
static int __init isert_init(void)
{
isert_login_wq = alloc_workqueue("isert_login_wq", 0, 0);
isert_login_wq = alloc_workqueue("isert_login_wq", WQ_PERCPU, 0);
if (!isert_login_wq) {
isert_err("Unable to allocate isert_login_wq\n");
return -ENOMEM;

View File

@ -1450,7 +1450,7 @@ static struct rtrs_srv_sess *get_or_create_srv(struct rtrs_srv_ctx *ctx,
kfree(srv->chunks);
err_free_srv:
kfree(srv);
put_device(&srv->dev);
return ERR_PTR(-ENOMEM);
}

View File

@ -9,4 +9,5 @@ bng_en-y := bnge_core.o \
bnge_rmem.o \
bnge_resc.o \
bnge_netdev.o \
bnge_ethtool.o
bnge_ethtool.o \
bnge_auxr.o

View File

@ -11,6 +11,7 @@
#include <linux/bnxt/hsi.h>
#include "bnge_rmem.h"
#include "bnge_resc.h"
#include "bnge_auxr.h"
#define DRV_VER_MAJ 1
#define DRV_VER_MIN 15
@ -22,6 +23,12 @@ enum board_idx {
BCM57708,
};
struct bnge_auxr_priv {
struct auxiliary_device aux_dev;
struct bnge_auxr_dev *auxr_dev;
int id;
};
struct bnge_pf_info {
u16 fw_fid;
u16 port_id;
@ -197,6 +204,9 @@ struct bnge_dev {
struct bnge_irq *irq_tbl;
u16 irqs_acquired;
struct bnge_auxr_priv *aux_priv;
struct bnge_auxr_dev *auxr_dev;
};
static inline bool bnge_is_roce_en(struct bnge_dev *bd)

View File

@ -0,0 +1,258 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2025 Broadcom.
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/bitops.h>
#include <linux/irq.h>
#include <asm/byteorder.h>
#include <linux/bitmap.h>
#include <linux/auxiliary_bus.h>
#include <linux/bnxt/hsi.h>
#include "bnge.h"
#include "bnge_hwrm.h"
#include "bnge_auxr.h"
static DEFINE_IDA(bnge_aux_dev_ids);
static void bnge_fill_msix_vecs(struct bnge_dev *bd,
struct bnge_msix_info *info)
{
struct bnge_auxr_dev *auxr_dev = bd->auxr_dev;
int num_msix, i;
if (!auxr_dev->auxr_info->msix_requested) {
dev_warn(bd->dev, "Requested MSI-X vectors not allocated\n");
return;
}
num_msix = auxr_dev->auxr_info->msix_requested;
for (i = 0; i < num_msix; i++) {
info[i].vector = bd->irq_tbl[i].vector;
info[i].db_offset = bd->db_offset;
info[i].ring_idx = i;
}
}
int bnge_register_dev(struct bnge_auxr_dev *auxr_dev,
void *handle)
{
struct bnge_dev *bd = pci_get_drvdata(auxr_dev->pdev);
struct bnge_auxr_info *auxr_info;
int rc = 0;
netdev_lock(bd->netdev);
mutex_lock(&auxr_dev->auxr_dev_lock);
if (!bd->irq_tbl) {
rc = -ENODEV;
goto exit;
}
if (!bnge_aux_has_enough_resources(bd)) {
rc = -ENOMEM;
goto exit;
}
auxr_info = auxr_dev->auxr_info;
auxr_info->handle = handle;
auxr_info->msix_requested = bd->aux_num_msix;
bnge_fill_msix_vecs(bd, bd->auxr_dev->msix_info);
auxr_dev->flags |= BNGE_ARDEV_MSIX_ALLOC;
exit:
mutex_unlock(&auxr_dev->auxr_dev_lock);
netdev_unlock(bd->netdev);
return rc;
}
EXPORT_SYMBOL(bnge_register_dev);
void bnge_unregister_dev(struct bnge_auxr_dev *auxr_dev)
{
struct bnge_dev *bd = pci_get_drvdata(auxr_dev->pdev);
struct bnge_auxr_info *auxr_info;
auxr_info = auxr_dev->auxr_info;
netdev_lock(bd->netdev);
mutex_lock(&auxr_dev->auxr_dev_lock);
if (auxr_info->msix_requested)
auxr_dev->flags &= ~BNGE_ARDEV_MSIX_ALLOC;
auxr_info->msix_requested = 0;
mutex_unlock(&auxr_dev->auxr_dev_lock);
netdev_unlock(bd->netdev);
}
EXPORT_SYMBOL(bnge_unregister_dev);
int bnge_send_msg(struct bnge_auxr_dev *auxr_dev, struct bnge_fw_msg *fw_msg)
{
struct bnge_dev *bd = pci_get_drvdata(auxr_dev->pdev);
struct output *resp;
struct input *req;
u32 resp_len;
int rc;
rc = bnge_hwrm_req_init(bd, req, 0 /* don't care */);
if (rc)
return rc;
rc = bnge_hwrm_req_replace(bd, req, fw_msg->msg, fw_msg->msg_len);
if (rc)
goto drop_req;
bnge_hwrm_req_timeout(bd, req, fw_msg->timeout);
resp = bnge_hwrm_req_hold(bd, req);
rc = bnge_hwrm_req_send(bd, req);
resp_len = le16_to_cpu(resp->resp_len);
if (resp_len) {
if (fw_msg->resp_max_len < resp_len)
resp_len = fw_msg->resp_max_len;
memcpy(fw_msg->resp, resp, resp_len);
}
drop_req:
bnge_hwrm_req_drop(bd, req);
return rc;
}
EXPORT_SYMBOL(bnge_send_msg);
void bnge_rdma_aux_device_uninit(struct bnge_dev *bd)
{
struct bnge_auxr_priv *aux_priv;
struct auxiliary_device *adev;
/* Skip if no auxiliary device init was done. */
if (!bd->aux_priv)
return;
aux_priv = bd->aux_priv;
adev = &aux_priv->aux_dev;
auxiliary_device_uninit(adev);
}
static void bnge_aux_dev_release(struct device *dev)
{
struct bnge_auxr_priv *aux_priv =
container_of(dev, struct bnge_auxr_priv, aux_dev.dev);
struct bnge_dev *bd = pci_get_drvdata(aux_priv->auxr_dev->pdev);
ida_free(&bnge_aux_dev_ids, aux_priv->id);
kfree(aux_priv->auxr_dev->auxr_info);
bd->auxr_dev = NULL;
kfree(aux_priv->auxr_dev);
kfree(aux_priv);
bd->aux_priv = NULL;
}
void bnge_rdma_aux_device_del(struct bnge_dev *bd)
{
if (!bd->auxr_dev)
return;
auxiliary_device_delete(&bd->aux_priv->aux_dev);
}
static void bnge_set_auxr_dev_info(struct bnge_auxr_dev *auxr_dev,
struct bnge_dev *bd)
{
auxr_dev->pdev = bd->pdev;
auxr_dev->l2_db_size = bd->db_size;
auxr_dev->l2_db_size_nc = bd->db_size;
auxr_dev->l2_db_offset = bd->db_offset;
mutex_init(&auxr_dev->auxr_dev_lock);
if (bd->flags & BNGE_EN_ROCE_V1)
auxr_dev->flags |= BNGE_ARDEV_ROCEV1_SUPP;
if (bd->flags & BNGE_EN_ROCE_V2)
auxr_dev->flags |= BNGE_ARDEV_ROCEV2_SUPP;
auxr_dev->chip_num = bd->chip_num;
auxr_dev->hw_ring_stats_size = bd->hw_ring_stats_size;
auxr_dev->pf_port_id = bd->pf.port_id;
auxr_dev->en_state = bd->state;
auxr_dev->bar0 = bd->bar0;
}
void bnge_rdma_aux_device_add(struct bnge_dev *bd)
{
struct auxiliary_device *aux_dev;
int rc;
if (!bd->auxr_dev)
return;
aux_dev = &bd->aux_priv->aux_dev;
rc = auxiliary_device_add(aux_dev);
if (rc) {
dev_warn(bd->dev, "Failed to add auxiliary device for ROCE\n");
auxiliary_device_uninit(aux_dev);
bd->flags &= ~BNGE_EN_ROCE;
}
bd->auxr_dev->net = bd->netdev;
}
void bnge_rdma_aux_device_init(struct bnge_dev *bd)
{
struct auxiliary_device *aux_dev;
struct bnge_auxr_info *auxr_info;
struct bnge_auxr_priv *aux_priv;
struct bnge_auxr_dev *auxr_dev;
int rc;
if (!bnge_is_roce_en(bd))
return;
aux_priv = kzalloc(sizeof(*aux_priv), GFP_KERNEL);
if (!aux_priv)
goto exit;
aux_priv->id = ida_alloc(&bnge_aux_dev_ids, GFP_KERNEL);
if (aux_priv->id < 0) {
dev_warn(bd->dev, "ida alloc failed for aux device\n");
kfree(aux_priv);
goto exit;
}
aux_dev = &aux_priv->aux_dev;
aux_dev->id = aux_priv->id;
aux_dev->name = "rdma";
aux_dev->dev.parent = &bd->pdev->dev;
aux_dev->dev.release = bnge_aux_dev_release;
rc = auxiliary_device_init(aux_dev);
if (rc) {
ida_free(&bnge_aux_dev_ids, aux_priv->id);
kfree(aux_priv);
goto exit;
}
bd->aux_priv = aux_priv;
auxr_dev = kzalloc(sizeof(*auxr_dev), GFP_KERNEL);
if (!auxr_dev)
goto aux_dev_uninit;
aux_priv->auxr_dev = auxr_dev;
auxr_info = kzalloc(sizeof(*auxr_info), GFP_KERNEL);
if (!auxr_info)
goto aux_dev_uninit;
auxr_dev->auxr_info = auxr_info;
bd->auxr_dev = auxr_dev;
bnge_set_auxr_dev_info(auxr_dev, bd);
return;
aux_dev_uninit:
auxiliary_device_uninit(aux_dev);
exit:
bd->flags &= ~BNGE_EN_ROCE;
}

View File

@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2025 Broadcom */
#ifndef _BNGE_AUXR_H_
#define _BNGE_AUXR_H_
#include <linux/auxiliary_bus.h>
#define BNGE_MIN_ROCE_CP_RINGS 2
#define BNGE_MIN_ROCE_STAT_CTXS 1
#define BNGE_MAX_ROCE_MSIX 64
struct hwrm_async_event_cmpl;
struct bnge;
struct bnge_msix_info {
u32 vector;
u32 ring_idx;
u32 db_offset;
};
struct bnge_fw_msg {
void *msg;
int msg_len;
void *resp;
int resp_max_len;
int timeout;
};
struct bnge_auxr_info {
void *handle;
u16 msix_requested;
};
enum {
BNGE_ARDEV_ROCEV1_SUPP = BIT(0),
BNGE_ARDEV_ROCEV2_SUPP = BIT(1),
BNGE_ARDEV_MSIX_ALLOC = BIT(2),
};
#define BNGE_ARDEV_ROCE_SUPP (BNGE_ARDEV_ROCEV1_SUPP | \
BNGE_ARDEV_ROCEV2_SUPP)
struct bnge_auxr_dev {
struct net_device *net;
struct pci_dev *pdev;
void __iomem *bar0;
struct bnge_msix_info msix_info[BNGE_MAX_ROCE_MSIX];
u32 flags;
struct bnge_auxr_info *auxr_info;
/* Doorbell BAR size in bytes mapped by L2 driver. */
int l2_db_size;
/* Doorbell BAR size in bytes mapped as non-cacheable. */
int l2_db_size_nc;
/* Doorbell offset in bytes within l2_db_size_nc. */
int l2_db_offset;
u16 chip_num;
u16 hw_ring_stats_size;
u16 pf_port_id;
unsigned long en_state;
u16 auxr_num_msix_vec;
u16 auxr_num_ctxs;
/* serialize auxr operations */
struct mutex auxr_dev_lock;
};
void bnge_rdma_aux_device_uninit(struct bnge_dev *bdev);
void bnge_rdma_aux_device_del(struct bnge_dev *bdev);
void bnge_rdma_aux_device_add(struct bnge_dev *bdev);
void bnge_rdma_aux_device_init(struct bnge_dev *bdev);
int bnge_register_dev(struct bnge_auxr_dev *adev,
void *handle);
void bnge_unregister_dev(struct bnge_auxr_dev *adev);
int bnge_send_msg(struct bnge_auxr_dev *adev, struct bnge_fw_msg *fw_msg);
#endif /* _BNGE_AUXR_H_ */

View File

@ -41,6 +41,11 @@ static void bnge_print_device_info(struct pci_dev *pdev, enum board_idx idx)
bool bnge_aux_registered(struct bnge_dev *bd)
{
struct bnge_auxr_dev *ba_dev = bd->auxr_dev;
if (ba_dev && ba_dev->auxr_info->msix_requested)
return true;
return false;
}
@ -312,16 +317,20 @@ static int bnge_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent)
spin_lock_init(&bd->db_lock);
#endif
bnge_rdma_aux_device_init(bd);
rc = bnge_alloc_irqs(bd);
if (rc) {
dev_err(&pdev->dev, "Error IRQ allocation rc = %d\n", rc);
goto err_config_uninit;
goto err_uninit_auxr;
}
rc = bnge_netdev_alloc(bd, max_irqs);
if (rc)
goto err_free_irq;
bnge_rdma_aux_device_add(bd);
pci_save_state(pdev);
return 0;
@ -329,6 +338,9 @@ static int bnge_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent)
err_free_irq:
bnge_free_irqs(bd);
err_uninit_auxr:
bnge_rdma_aux_device_uninit(bd);
err_config_uninit:
bnge_net_uninit_dflt_config(bd);
@ -354,10 +366,14 @@ static void bnge_remove_one(struct pci_dev *pdev)
{
struct bnge_dev *bd = pci_get_drvdata(pdev);
bnge_rdma_aux_device_del(bd);
bnge_netdev_free(bd);
bnge_free_irqs(bd);
bnge_rdma_aux_device_uninit(bd);
bnge_net_uninit_dflt_config(bd);
bnge_devlink_unregister(bd);

View File

@ -98,6 +98,46 @@ void bnge_hwrm_req_alloc_flags(struct bnge_dev *bd, void *req, gfp_t gfp)
ctx->gfp = gfp;
}
int bnge_hwrm_req_replace(struct bnge_dev *bd, void *req, void *new_req,
u32 len)
{
struct bnge_hwrm_ctx *ctx = __hwrm_ctx_get(bd, req);
struct input *internal_req = req;
u16 req_type;
if (!ctx)
return -EINVAL;
if (len > BNGE_HWRM_CTX_OFFSET)
return -E2BIG;
/* free any existing slices */
ctx->allocated = BNGE_HWRM_DMA_SIZE - BNGE_HWRM_CTX_OFFSET;
if (ctx->slice_addr) {
dma_free_coherent(bd->dev, ctx->slice_size,
ctx->slice_addr, ctx->slice_handle);
ctx->slice_addr = NULL;
}
ctx->gfp = GFP_KERNEL;
if ((bd->fw_cap & BNGE_FW_CAP_SHORT_CMD) || len > BNGE_HWRM_MAX_REQ_LEN) {
memcpy(internal_req, new_req, len);
} else {
internal_req->req_type = ((struct input *)new_req)->req_type;
ctx->req = new_req;
}
ctx->req_len = len;
ctx->req->resp_addr = cpu_to_le64(ctx->dma_handle +
BNGE_HWRM_RESP_OFFSET);
/* update sentinel for potentially new request type */
req_type = le16_to_cpu(internal_req->req_type);
ctx->sentinel = bnge_cal_sentinel(ctx, req_type);
return 0;
}
void bnge_hwrm_req_flags(struct bnge_dev *bd, void *req,
enum bnge_hwrm_ctx_flags flags)
{

View File

@ -107,4 +107,6 @@ int bnge_hwrm_req_send_silent(struct bnge_dev *bd, void *req);
void bnge_hwrm_req_alloc_flags(struct bnge_dev *bd, void *req, gfp_t flags);
void *bnge_hwrm_req_dma_slice(struct bnge_dev *bd, void *req, u32 size,
dma_addr_t *dma);
int bnge_hwrm_req_replace(struct bnge_dev *bd, void *req, void *new_req,
u32 len);
#endif /* _BNGE_HWRM_H_ */

View File

@ -34,6 +34,18 @@ static unsigned int bnge_get_max_func_stat_ctxs(struct bnge_dev *bd)
return bd->hw_resc.max_stat_ctxs;
}
bool bnge_aux_has_enough_resources(struct bnge_dev *bd)
{
unsigned int max_stat_ctxs;
max_stat_ctxs = bnge_get_max_func_stat_ctxs(bd);
if (max_stat_ctxs <= BNGE_MIN_ROCE_STAT_CTXS ||
bd->nq_nr_rings == max_stat_ctxs)
return false;
return true;
}
static unsigned int bnge_get_max_func_cp_rings(struct bnge_dev *bd)
{
return bd->hw_resc.max_cp_rings;

View File

@ -74,6 +74,7 @@ void bnge_net_uninit_dflt_config(struct bnge_dev *bd);
void bnge_aux_init_dflt_config(struct bnge_dev *bd);
u32 bnge_get_rxfh_indir_size(struct bnge_dev *bd);
int bnge_cal_nr_rss_ctxs(u16 rx_rings);
bool bnge_aux_has_enough_resources(struct bnge_dev *bd);
static inline u32
bnge_adjust_pow_two(u32 total_ent, u16 ent_per_blk)

View File

@ -271,7 +271,7 @@ struct ib_cm_event {
#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
/**
* ib_cm_handler - User-defined callback to process communication events.
* typedef ib_cm_handler - User-defined callback to process communication events.
* @cm_id: Communication identifier associated with the reported event.
* @event: Information about the communication event.
*
@ -482,7 +482,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
/**
* ib_prepare_cm_mra - Prepares to send a message receipt acknowledgment to a
connection message in case duplicates are received.
* connection message in case duplicates are received.
* @cm_id: Connection identifier associated with the connection message.
*/
int ib_prepare_cm_mra(struct ib_cm_id *cm_id);

View File

@ -586,10 +586,10 @@ enum ib_stat_flag {
};
/**
* struct rdma_stat_desc
* @name - The name of the counter
* @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
* @priv - Driver private information; Core code should not use
* struct rdma_stat_desc - description of one rdma stat/counter
* @name: The name of the counter
* @flags: Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
* @priv: Driver private information; Core code should not use
*/
struct rdma_stat_desc {
const char *name;
@ -598,24 +598,24 @@ struct rdma_stat_desc {
};
/**
* struct rdma_hw_stats
* @lock - Mutex to protect parallel write access to lifespan and values
* struct rdma_hw_stats - collection of hardware stats and their management
* @lock: Mutex to protect parallel write access to lifespan and values
* of counters, which are 64bits and not guaranteed to be written
* atomicaly on 32bits systems.
* @timestamp - Used by the core code to track when the last update was
* @lifespan - Used by the core code to determine how old the counters
* @timestamp: Used by the core code to track when the last update was
* @lifespan: Used by the core code to determine how old the counters
* should be before being updated again. Stored in jiffies, defaults
* to 10 milliseconds, drivers can override the default be specifying
* their own value during their allocation routine.
* @descs - Array of pointers to static descriptors used for the counters
* @descs: Array of pointers to static descriptors used for the counters
* in directory.
* @is_disabled - A bitmap to indicate each counter is currently disabled
* @is_disabled: A bitmap to indicate each counter is currently disabled
* or not.
* @num_counters - How many hardware counters there are. If name is
* @num_counters: How many hardware counters there are. If name is
* shorter than this number, a kernel oops will result. Driver authors
* are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
* in their code to prevent this.
* @value - Array of u64 counters that are accessed by the sysfs code and
* @value: Array of u64 counters that are accessed by the sysfs code and
* filled in by the drivers get_stats routine
*/
struct rdma_hw_stats {
@ -859,6 +859,7 @@ enum ib_rate {
IB_RATE_400_GBPS = 21,
IB_RATE_600_GBPS = 22,
IB_RATE_800_GBPS = 23,
IB_RATE_1600_GBPS = 25,
};
/**
@ -2405,7 +2406,7 @@ struct ib_device_ops {
int (*modify_port)(struct ib_device *device, u32 port_num,
int port_modify_mask,
struct ib_port_modify *port_modify);
/**
/*
* The following mandatory functions are used only at device
* registration. Keep functions such as these at the end of this
* structure to avoid cache line misses when accessing struct ib_device
@ -2415,7 +2416,7 @@ struct ib_device_ops {
struct ib_port_immutable *immutable);
enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
u32 port_num);
/**
/*
* When calling get_netdev, the HW vendor's driver should return the
* net device of device @device at port @port_num or NULL if such
* a net device doesn't exist. The vendor driver should call dev_hold
@ -2425,7 +2426,7 @@ struct ib_device_ops {
*/
struct net_device *(*get_netdev)(struct ib_device *device,
u32 port_num);
/**
/*
* rdma netdev operation
*
* Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
@ -2439,14 +2440,14 @@ struct ib_device_ops {
int (*rdma_netdev_get_params)(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params);
/**
/*
* query_gid should be return GID value for @device, when @port_num
* link layer is either IB or iWarp. It is no-op if @port_num port
* is RoCE link layer.
*/
int (*query_gid)(struct ib_device *device, u32 port_num, int index,
union ib_gid *gid);
/**
/*
* When calling add_gid, the HW vendor's driver should add the gid
* of device of port at gid index available at @attr. Meta-info of
* that gid (for example, the network device related to this gid) is
@ -2460,7 +2461,7 @@ struct ib_device_ops {
* roce_gid_table is used.
*/
int (*add_gid)(const struct ib_gid_attr *attr, void **context);
/**
/*
* When calling del_gid, the HW vendor's driver should delete the
* gid of device @device at gid index gid_index of port port_num
* available in @attr.
@ -2475,7 +2476,7 @@ struct ib_device_ops {
struct ib_udata *udata);
void (*dealloc_ucontext)(struct ib_ucontext *context);
int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
/**
/*
* This will be called once refcount of an entry in mmap_xa reaches
* zero. The type of the memory that was mapped may differ between
* entries and is opaque to the rdma_user_mmap interface.
@ -2516,12 +2517,12 @@ struct ib_device_ops {
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
/**
/*
* pre_destroy_cq - Prevent a cq from generating any new work
* completions, but not free any kernel resources
*/
int (*pre_destroy_cq)(struct ib_cq *cq);
/**
/*
* post_destroy_cq - Free all kernel resources
*/
void (*post_destroy_cq)(struct ib_cq *cq);
@ -2615,7 +2616,7 @@ struct ib_device_ops {
struct scatterlist *meta_sg, int meta_sg_nents,
unsigned int *meta_sg_offset);
/**
/*
* alloc_hw_[device,port]_stats - Allocate a struct rdma_hw_stats and
* fill in the driver initialized data. The struct is kfree()'ed by
* the sysfs core when the device is removed. A lifespan of -1 in the
@ -2624,7 +2625,7 @@ struct ib_device_ops {
struct rdma_hw_stats *(*alloc_hw_device_stats)(struct ib_device *device);
struct rdma_hw_stats *(*alloc_hw_port_stats)(struct ib_device *device,
u32 port_num);
/**
/*
* get_hw_stats - Fill in the counter value(s) in the stats struct.
* @index - The index in the value array we wish to have updated, or
* num_counters if we want all stats updated
@ -2639,14 +2640,14 @@ struct ib_device_ops {
int (*get_hw_stats)(struct ib_device *device,
struct rdma_hw_stats *stats, u32 port, int index);
/**
/*
* modify_hw_stat - Modify the counter configuration
* @enable: true/false when enable/disable a counter
* Return codes - 0 on success or error code otherwise.
*/
int (*modify_hw_stat)(struct ib_device *device, u32 port,
unsigned int counter_index, bool enable);
/**
/*
* Allows rdma drivers to add their own restrack attributes.
*/
int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
@ -2682,39 +2683,39 @@ struct ib_device_ops {
u8 pdata_len);
int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
/**
/*
* counter_bind_qp - Bind a QP to a counter.
* @counter - The counter to be bound. If counter->id is zero then
* the driver needs to allocate a new counter and set counter->id
*/
int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp,
u32 port);
/**
/*
* counter_unbind_qp - Unbind the qp from the dynamically-allocated
* counter and bind it onto the default one
*/
int (*counter_unbind_qp)(struct ib_qp *qp, u32 port);
/**
/*
* counter_dealloc -De-allocate the hw counter
*/
int (*counter_dealloc)(struct rdma_counter *counter);
/**
/*
* counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
* the driver initialized data.
*/
struct rdma_hw_stats *(*counter_alloc_stats)(
struct rdma_counter *counter);
/**
/*
* counter_update_stats - Query the stats value of this counter
*/
int (*counter_update_stats)(struct rdma_counter *counter);
/**
/*
* counter_init - Initialize the driver specific rdma counter struct.
*/
void (*counter_init)(struct rdma_counter *counter);
/**
/*
* Allows rdma drivers to add their own restrack attributes
* dumped via 'rdma stat' iproute2 command.
*/
@ -2730,25 +2731,25 @@ struct ib_device_ops {
*/
int (*get_numa_node)(struct ib_device *dev);
/**
/*
* add_sub_dev - Add a sub IB device
*/
struct ib_device *(*add_sub_dev)(struct ib_device *parent,
enum rdma_nl_dev_type type,
const char *name);
/**
/*
* del_sub_dev - Delete a sub IB device
*/
void (*del_sub_dev)(struct ib_device *sub_dev);
/**
/*
* ufile_cleanup - Attempt to cleanup ubojects HW resources inside
* the ufile.
*/
void (*ufile_hw_cleanup)(struct ib_uverbs_file *ufile);
/**
/*
* report_port_event - Drivers need to implement this if they have
* some private stuff to handle when link status changes.
*/
@ -3157,8 +3158,8 @@ static inline u32 rdma_start_port(const struct ib_device *device)
/**
* rdma_for_each_port - Iterate over all valid port numbers of the IB device
* @device - The struct ib_device * to iterate over
* @iter - The unsigned int to store the port number
* @device: The struct ib_device * to iterate over
* @iter: The unsigned int to store the port number
*/
#define rdma_for_each_port(device, iter) \
for (iter = rdma_start_port(device + \
@ -3524,7 +3525,7 @@ static inline bool rdma_core_cap_opa_port(struct ib_device *device,
/**
* rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
* @device: Device
* @port_num: Port number
* @port: Port number
* @mtu: enum value of MTU
*
* Return the MTU size supported by the port as an integer value. Will return
@ -3542,7 +3543,7 @@ static inline int rdma_mtu_enum_to_int(struct ib_device *device, u32 port,
/**
* rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
* @device: Device
* @port_num: Port number
* @port: Port number
* @attr: port attribute
*
* Return the MTU size supported by the port as an integer value.
@ -3919,7 +3920,7 @@ static inline int ib_destroy_qp(struct ib_qp *qp)
/**
* ib_open_qp - Obtain a reference to an existing sharable QP.
* @xrcd - XRC domain
* @xrcd: XRC domain
* @qp_open_attr: Attributes identifying the QP to open.
*
* Returns a reference to a sharable QP.
@ -4273,9 +4274,9 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
/**
* ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses
* @dev: The device for which the DMA addresses are to be created
* @sg: The sg_table object describing the buffer
* @sgt: The sg_table object describing the buffer
* @direction: The direction of the DMA
* @attrs: Optional DMA attributes for the map operation
* @dma_attrs: Optional DMA attributes for the map operation
*/
static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev,
struct sg_table *sgt,
@ -4419,8 +4420,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
/**
* ib_update_fast_reg_key - updates the key portion of the fast_reg MR
* R_Key and L_Key.
* @mr - struct ib_mr pointer to be updated.
* @newkey - new key to be used.
* @mr: struct ib_mr pointer to be updated.
* @newkey: new key to be used.
*/
static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
{
@ -4431,7 +4432,7 @@ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
/**
* ib_inc_rkey - increments the key portion of the given rkey. Can be used
* for calculating a new rkey for type 2 memory windows.
* @rkey - the rkey to increment.
* @rkey: the rkey to increment.
*/
static inline u32 ib_inc_rkey(u32 rkey)
{
@ -4525,7 +4526,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
/**
* ib_device_try_get: Hold a registration lock
* device: The device to lock
* @dev: The device to lock
*
* A device under an active registration lock cannot become unregistered. It
* is only possible to obtain a registration lock on a device that is fully
@ -4832,7 +4833,7 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
*
* @device: the rdma device
* @ibdev: the rdma device
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port);
@ -4885,7 +4886,7 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
/**
* ibdev_to_node - return the NUMA node for a given ib_device
* @dev: device to get the NUMA node for.
* @ibdev: device to get the NUMA node for.
*/
static inline int ibdev_to_node(struct ib_device *ibdev)
{
@ -4923,6 +4924,7 @@ static inline struct net *rdma_dev_net(struct ib_device *device)
/**
* rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
* on the flow_label
* @fl: flow_label value
*
* This function will convert the 20 bit flow_label input to a valid RoCE v2
* UDP src port 14 bit value. All RoCE V2 drivers should use this same

View File

@ -144,7 +144,7 @@
#define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1)
/**
* rvt_ud_wr - IB UD work plus AH cache
* struct rvt_ud_wr - IB UD work plus AH cache
* @wr: valid IB work request
* @attr: pointer to an allocated AH attribute
*
@ -186,8 +186,8 @@ struct rvt_swqe {
* @head: index of next entry to fill
* @c_lock: lock to protect consumer of the kernel buffer
* @tail: index of next entry to pull
* @count: count is aproximate of total receive enteries posted
* @rvt_rwqe: struct of receive work request queue entry
* @count: count is approximate of total receive entries posted
* @curr_wq: struct of receive work request queue entry
*
* This structure is used to contain the head pointer,
* tail pointer and receive work queue entries for kernel
@ -309,10 +309,10 @@ struct rvt_ack_entry {
#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
/**
* rvt_operation_params - op table entry
* @length - the length to copy into the swqe entry
* @qpt_support - a bit mask indicating QP type support
* @flags - RVT_OPERATION flags (see above)
* struct rvt_operation_params - op table entry
* @length: the length to copy into the swqe entry
* @qpt_support: a bit mask indicating QP type support
* @flags: RVT_OPERATION flags (see above)
*
* This supports table driven post send so that
* the driver can have differing an potentially
@ -552,7 +552,7 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n)
/**
* rvt_is_user_qp - return if this is user mode QP
* @qp - the target QP
* @qp: the target QP
*/
static inline bool rvt_is_user_qp(struct rvt_qp *qp)
{
@ -561,7 +561,7 @@ static inline bool rvt_is_user_qp(struct rvt_qp *qp)
/**
* rvt_get_qp - get a QP reference
* @qp - the QP to hold
* @qp: the QP to hold
*/
static inline void rvt_get_qp(struct rvt_qp *qp)
{
@ -570,7 +570,7 @@ static inline void rvt_get_qp(struct rvt_qp *qp)
/**
* rvt_put_qp - release a QP reference
* @qp - the QP to release
* @qp: the QP to release
*/
static inline void rvt_put_qp(struct rvt_qp *qp)
{
@ -580,7 +580,7 @@ static inline void rvt_put_qp(struct rvt_qp *qp)
/**
* rvt_put_swqe - drop mr refs held by swqe
* @wqe - the send wqe
* @wqe: the send wqe
*
* This drops any mr references held by the swqe
*/
@ -597,8 +597,8 @@ static inline void rvt_put_swqe(struct rvt_swqe *wqe)
/**
* rvt_qp_wqe_reserve - reserve operation
* @qp - the rvt qp
* @wqe - the send wqe
* @qp: the rvt qp
* @wqe: the send wqe
*
* This routine used in post send to record
* a wqe relative reserved operation use.
@ -612,8 +612,8 @@ static inline void rvt_qp_wqe_reserve(
/**
* rvt_qp_wqe_unreserve - clean reserved operation
* @qp - the rvt qp
* @flags - send wqe flags
* @qp: the rvt qp
* @flags: send wqe flags
*
* This decrements the reserve use count.
*
@ -653,8 +653,8 @@ u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
/**
* rvt_div_round_up_mtu - round up divide
* @qp - the qp pair
* @len - the length
* @qp: the qp pair
* @len: the length
*
* Perform a shift based mtu round up divide
*/
@ -664,8 +664,9 @@ static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len)
}
/**
* @qp - the qp pair
* @len - the length
* rvt_div_mtu - shift-based divide
* @qp: the qp pair
* @len: the length
*
* Perform a shift based mtu divide
*/
@ -676,7 +677,7 @@ static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len)
/**
* rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies
* @timeout - timeout input(0 - 31).
* @timeout: timeout input(0 - 31).
*
* Return a timeout value in jiffies.
*/
@ -690,7 +691,8 @@ static inline unsigned long rvt_timeout_to_jiffies(u8 timeout)
/**
* rvt_lookup_qpn - return the QP with the given QPN
* @ibp: the ibport
* @rdi: rvt device info structure
* @rvp: the ibport
* @qpn: the QP number to look up
*
* The caller must hold the rcu_read_lock(), and keep the lock until
@ -716,9 +718,9 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
}
/**
* rvt_mod_retry_timer - mod a retry timer
* @qp - the QP
* @shift - timeout shift to wait for multiple packets
* rvt_mod_retry_timer_ext - mod a retry timer
* @qp: the QP
* @shift: timeout shift to wait for multiple packets
* Modify a potentially already running retry timer
*/
static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
@ -753,7 +755,7 @@ static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
}
/**
* rvt_qp_sqwe_incr - increment ring index
* rvt_qp_swqe_incr - increment ring index
* @qp: the qp
* @val: the starting value
*
@ -811,10 +813,10 @@ static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc,
/**
* rvt_qp_complete_swqe - insert send completion
* @qp - the qp
* @wqe - the send wqe
* @opcode - wc operation (driver dependent)
* @status - completion status
* @qp: the qp
* @wqe: the send wqe
* @opcode: wc operation (driver dependent)
* @status: completion status
*
* Update the s_last information, and then insert a send
* completion into the completion
@ -891,7 +893,7 @@ void rvt_ruc_loopback(struct rvt_qp *qp);
/**
* struct rvt_qp_iter - the iterator for QPs
* @qp - the current QP
* @qp: the current QP
*
* This structure defines the current iterator
* state for sequenced access to all QPs relative
@ -913,7 +915,7 @@ struct rvt_qp_iter {
/**
* ib_cq_tail - Return tail index of cq buffer
* @send_cq - The cq for send
* @send_cq: The cq for send
*
* This is called in qp_iter_print to get tail
* of cq buffer.
@ -929,7 +931,7 @@ static inline u32 ib_cq_tail(struct ib_cq *send_cq)
/**
* ib_cq_head - Return head index of cq buffer
* @send_cq - The cq for send
* @send_cq: The cq for send
*
* This is called in qp_iter_print to get head
* of cq buffer.
@ -945,7 +947,7 @@ static inline u32 ib_cq_head(struct ib_cq *send_cq)
/**
* rvt_free_rq - free memory allocated for rvt_rq struct
* @rvt_rq: request queue data structure
* @rq: request queue data structure
*
* This function should only be called if the rvt_mmap_info()
* has not succeeded.