fwctl first pull request

fwctl is a new subsystem intended to bring some common rules and order to
 the growing pattern of exposing a secure FW interface directly to
 userspace. Unlike existing places like RDMA/DRM/VFIO/uacce that are
 exposing a device for datapath operations fwctl is focused on debugging,
 configuration and provisioning of the device. It will not have the
 necessary features like interrupt delivery to support a datapath.
 
 This concept is similar to the long standing practice in the "HW" RAID
 space of having a device specific misc device to manage the RAID
 controller FW. fwctl generalizes this notion of a companion debug and
 management interface that goes along with a dataplane implemented in an
 appropriate subsystem.
 
 There have been three LWN articles written discussing various aspects of
 this:
 
  https://lwn.net/Articles/955001/
  https://lwn.net/Articles/969383/
  https://lwn.net/Articles/990802/
 
 This pull requests includes three drivers to launch the subsystem:
 
  - CXL provides a vendor scheme for executing commands and a way to learn
    the 'command effects' (ie the security properties) of such
    commands. The fwctl driver allows access to these mechanism within the
    fwctl security model
 
  - mlx5 is family of networking products, the driver supports all current
    Mellanox HW still receiving FW feature updates. This includes RDMA
    multiprotocol NICs like ConnectX and the Bluefield family of Smart
    NICs.
 
  - AMD/Pensando Distributed Services card is a multi protocol Smart NIC
    with a multi PCI function design. fwctl works on the management PCI
    function following a 'command effects' model similar to CXL.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCZ939zQAKCRCFwuHvBreF
 YdOoAQCJq59/UC7lXU+sOsR6LISaDVTT5jAweBo0o036P9+DNAEA1iQdZ/GK2yCJ
 Ub33Xo9L+hzIpIbCouI3BtCXqymybAg=
 =f5YG
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-fwctl' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull fwctl subsystem from Jason Gunthorpe:
 "fwctl is a new subsystem intended to bring some common rules and order
  to the growing pattern of exposing a secure FW interface directly to
  userspace.

  Unlike existing places like RDMA/DRM/VFIO/uacce that are exposing a
  device for datapath operations fwctl is focused on debugging,
  configuration and provisioning of the device. It will not have the
  necessary features like interrupt delivery to support a datapath.

  This concept is similar to the long standing practice in the "HW" RAID
  space of having a device specific misc device to manage the RAID
  controller FW. fwctl generalizes this notion of a companion debug and
  management interface that goes along with a dataplane implemented in
  an appropriate subsystem.

  There have been three LWN articles written discussing various aspects
  of this:

	https://lwn.net/Articles/955001/
	https://lwn.net/Articles/969383/
	https://lwn.net/Articles/990802/

  This includes three drivers to launch the subsystem:

   - CXL provides a vendor scheme for executing commands and a way to
     learn the 'command effects' (ie the security properties) of such
     commands. The fwctl driver allows access to these mechanism within
     the fwctl security model

   - mlx5 is family of networking products, the driver supports all
     current Mellanox HW still receiving FW feature updates. This
     includes RDMA multiprotocol NICs like ConnectX and the Bluefield
     family of Smart NICs.

   - AMD/Pensando Distributed Services card is a multi protocol Smart
     NIC with a multi PCI function design. fwctl works on the management
     PCI function following a 'command effects' model similar to CXL"

* tag 'for-linus-fwctl' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (30 commits)
  pds_fwctl: add Documentation entries
  pds_fwctl: add rpc and query support
  pds_fwctl: initial driver framework
  pds_core: add new fwctl auxiliary_device
  pds_core: specify auxiliary_device to be created
  pds_core: make pdsc_auxbus_dev_del() void
  cxl: Fixup kdoc issues for include/cxl/features.h
  fwctl/cxl: Add documentation to FWCTL CXL
  cxl/test: Add Set Feature support to cxl_test
  cxl/test: Add Get Feature support to cxl_test
  cxl: Add support to handle user feature commands for set feature
  cxl: Add support to handle user feature commands for get feature
  cxl: Add support for fwctl RPC command to enable CXL feature commands
  cxl: Move cxl feature command structs to user header
  cxl: Add FWCTL support to CXL
  mlx5: Create an auxiliary device for fwctl_mlx5
  fwctl/mlx5: Support for communicating with mlx5 fw
  fwctl: Add documentation
  fwctl: FWCTL_RPC to execute a Remote Procedure Call to device firmware
  taint: Add TAINT_FWCTL
  ...
This commit is contained in:
Linus Torvalds 2025-03-29 10:45:20 -07:00
commit 0ccff074d6
46 changed files with 4054 additions and 132 deletions

View File

@ -101,6 +101,7 @@ Bit Log Number Reason that got the kernel tainted
16 _/X 65536 auxiliary taint, defined for and used by distros 16 _/X 65536 auxiliary taint, defined for and used by distros
17 _/T 131072 kernel was built with the struct randomization plugin 17 _/T 131072 kernel was built with the struct randomization plugin
18 _/N 262144 an in-kernel test has been run 18 _/N 262144 an in-kernel test has been run
19 _/J 524288 userspace used a mutating debug operation in fwctl
=== === ====== ======================================================== === === ====== ========================================================
Note: The character ``_`` is representing a blank in this table to make reading Note: The character ``_`` is representing a blank in this table to make reading
@ -184,3 +185,7 @@ More detailed explanation for tainting
build time. build time.
18) ``N`` if an in-kernel test, such as a KUnit test, has been run. 18) ``N`` if an in-kernel test, such as a KUnit test, has been run.
19) ``J`` if userpace opened /dev/fwctl/* and performed a FWTCL_RPC_DEBUG_WRITE
to use the devices debugging features. Device debugging features could
cause the device to malfunction in undefined ways.

View File

@ -0,0 +1,142 @@
.. SPDX-License-Identifier: GPL-2.0
================
fwctl cxl driver
================
:Author: Dave Jiang
Overview
========
The CXL spec defines a set of commands that can be issued to the mailbox of a
CXL device or switch. It also left room for vendor specific commands to be
issued to the mailbox as well. fwctl provides a path to issue a set of allowed
mailbox commands from user space to the device moderated by the kernel driver.
The following 3 commands will be used to support CXL Features:
CXL spec r3.1 8.2.9.6.1 Get Supported Features (Opcode 0500h)
CXL spec r3.1 8.2.9.6.2 Get Feature (Opcode 0501h)
CXL spec r3.1 8.2.9.6.3 Set Feature (Opcode 0502h)
The "Get Supported Features" return data may be filtered by the kernel driver to
drop any features that are forbidden by the kernel or being exclusively used by
the kernel. The driver will set the "Set Feature Size" of the "Get Supported
Features Supported Feature Entry" to 0 to indicate that the Feature cannot be
modified. The "Get Supported Features" command and the "Get Features" falls
under the fwctl policy of FWCTL_RPC_CONFIGURATION.
For "Set Feature" command, the access policy currently is broken down into two
categories depending on the Set Feature effects reported by the device. If the
Set Feature will cause immediate change to the device, the fwctl access policy
must be FWCTL_RPC_DEBUG_WRITE_FULL. The effects for this level are
"immediate config change", "immediate data change", "immediate policy change",
or "immediate log change" for the set effects mask. If the effects are "config
change with cold reset" or "config change with conventional reset", then the
fwctl access policy must be FWCTL_RPC_DEBUG_WRITE or higher.
fwctl cxl User API
==================
.. kernel-doc:: include/uapi/fwctl/cxl.h
1. Driver info query
--------------------
First step for the app is to issue the ioctl(FWCTL_CMD_INFO). Successful
invocation of the ioctl implies the Features capability is operational and
returns an all zeros 32bit payload. A ``struct fwctl_info`` needs to be filled
out with the ``fwctl_info.out_device_type`` set to ``FWCTL_DEVICE_TYPE_CXL``.
The return data should be ``struct fwctl_info_cxl`` that contains a reserved
32bit field that should be all zeros.
2. Send hardware commands
-------------------------
Next step is to send the 'Get Supported Features' command to the driver from
user space via ioctl(FWCTL_RPC). A ``struct fwctl_rpc_cxl`` is pointed to
by ``fwctl_rpc.in``. ``struct fwctl_rpc_cxl.in_payload`` points to
the hardware input structure that is defined by the CXL spec. ``fwctl_rpc.out``
points to the buffer that contains a ``struct fwctl_rpc_cxl_out`` that includes
the hardware output data inlined as ``fwctl_rpc_cxl_out.payload``. This command
is called twice. First time to retrieve the number of features supported.
A second time to retrieve the specific feature details as the output data.
After getting the specific feature details, a Get/Set Feature command can be
appropriately programmed and sent. For a "Set Feature" command, the retrieved
feature info contains an effects field that details the resulting
"Set Feature" command will trigger. That will inform the user whether
the system is configured to allowed the "Set Feature" command or not.
Code example of a Get Feature
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: c
static int cxl_fwctl_rpc_get_test_feature(int fd, struct test_feature *feat_ctx,
const uint32_t expected_data)
{
struct cxl_mbox_get_feat_in *feat_in;
struct fwctl_rpc_cxl_out *out;
struct fwctl_rpc rpc = {0};
struct fwctl_rpc_cxl *in;
size_t out_size, in_size;
uint32_t val;
void *data;
int rc;
in_size = sizeof(*in) + sizeof(*feat_in);
rc = posix_memalign((void **)&in, 16, in_size);
if (rc)
return -ENOMEM;
memset(in, 0, in_size);
feat_in = &in->get_feat_in;
uuid_copy(feat_in->uuid, feat_ctx->uuid);
feat_in->count = feat_ctx->get_size;
out_size = sizeof(*out) + feat_ctx->get_size;
rc = posix_memalign((void **)&out, 16, out_size);
if (rc)
goto free_in;
memset(out, 0, out_size);
in->opcode = CXL_MBOX_OPCODE_GET_FEATURE;
in->op_size = sizeof(*feat_in);
rpc.size = sizeof(rpc);
rpc.scope = FWCTL_RPC_CONFIGURATION;
rpc.in_len = in_size;
rpc.out_len = out_size;
rpc.in = (uint64_t)(uint64_t *)in;
rpc.out = (uint64_t)(uint64_t *)out;
rc = send_command(fd, &rpc, out);
if (rc)
goto free_all;
data = out->payload;
val = le32toh(*(__le32 *)data);
if (memcmp(&val, &expected_data, sizeof(val)) != 0) {
rc = -ENXIO;
goto free_all;
}
free_all:
free(out);
free_in:
free(in);
return rc;
}
Take a look at CXL CLI test directory
<https://github.com/pmem/ndctl/tree/main/test/fwctl.c> for a detailed user code
for examples on how to exercise this path.
fwctl cxl Kernel API
====================
.. kernel-doc:: drivers/cxl/core/features.c
:export:
.. kernel-doc:: include/cxl/features.h

View File

@ -0,0 +1,286 @@
.. SPDX-License-Identifier: GPL-2.0
===============
fwctl subsystem
===============
:Author: Jason Gunthorpe
Overview
========
Modern devices contain extensive amounts of FW, and in many cases, are largely
software-defined pieces of hardware. The evolution of this approach is largely a
reaction to Moore's Law where a chip tape out is now highly expensive, and the
chip design is extremely large. Replacing fixed HW logic with a flexible and
tightly coupled FW/HW combination is an effective risk mitigation against chip
respin. Problems in the HW design can be counteracted in device FW. This is
especially true for devices which present a stable and backwards compatible
interface to the operating system driver (such as NVMe).
The FW layer in devices has grown to incredible size and devices frequently
integrate clusters of fast processors to run it. For example, mlx5 devices have
over 30MB of FW code, and big configurations operate with over 1GB of FW managed
runtime state.
The availability of such a flexible layer has created quite a variety in the
industry where single pieces of silicon are now configurable software-defined
devices and can operate in substantially different ways depending on the need.
Further, we often see cases where specific sites wish to operate devices in ways
that are highly specialized and require applications that have been tailored to
their unique configuration.
Further, devices have become multi-functional and integrated to the point they
no longer fit neatly into the kernel's division of subsystems. Modern
multi-functional devices have drivers, such as bnxt/ice/mlx5/pds, that span many
subsystems while sharing the underlying hardware using the auxiliary device
system.
All together this creates a challenge for the operating system, where devices
have an expansive FW environment that needs robust device-specific debugging
support, and FW-driven functionality that is not well suited to “generic”
interfaces. fwctl seeks to allow access to the full device functionality from
user space in the areas of debuggability, management, and first-boot/nth-boot
provisioning.
fwctl is aimed at the common device design pattern where the OS and FW
communicate via an RPC message layer constructed with a queue or mailbox scheme.
In this case the driver will typically have some layer to deliver RPC messages
and collect RPC responses from device FW. The in-kernel subsystem drivers that
operate the device for its primary purposes will use these RPCs to build their
drivers, but devices also usually have a set of ancillary RPCs that don't really
fit into any specific subsystem. For example, a HW RAID controller is primarily
operated by the block layer but also comes with a set of RPCs to administer the
construction of drives within the HW RAID.
In the past when devices were more single function, individual subsystems would
grow different approaches to solving some of these common problems. For instance
monitoring device health, manipulating its FLASH, debugging the FW,
provisioning, all have various unique interfaces across the kernel.
fwctl's purpose is to define a common set of limited rules, described below,
that allow user space to securely construct and execute RPCs inside device FW.
The rules serve as an agreement between the operating system and FW on how to
correctly design the RPC interface. As a uAPI the subsystem provides a thin
layer of discovery and a generic uAPI to deliver the RPCs and collect the
response. It supports a system of user space libraries and tools which will
use this interface to control the device using the device native protocols.
Scope of Action
---------------
fwctl drivers are strictly restricted to being a way to operate the device FW.
It is not an avenue to access random kernel internals, or other operating system
SW states.
fwctl instances must operate on a well-defined device function, and the device
should have a well-defined security model for what scope within the physical
device the function is permitted to access. For instance, the most complex PCIe
device today may broadly have several function-level scopes:
1. A privileged function with full access to the on-device global state and
configuration
2. Multiple hypervisor functions with control over itself and child functions
used with VMs
3. Multiple VM functions tightly scoped within the VM
The device may create a logical parent/child relationship between these scopes.
For instance a child VM's FW may be within the scope of the hypervisor FW. It is
quite common in the VFIO world that the hypervisor environment has a complex
provisioning/profiling/configuration responsibility for the function VFIO
assigns to the VM.
Further, within the function, devices often have RPC commands that fall within
some general scopes of action (see enum fwctl_rpc_scope):
1. Access to function & child configuration, FLASH, etc. that becomes live at a
function reset. Access to function & child runtime configuration that is
transparent or non-disruptive to any driver or VM.
2. Read-only access to function debug information that may report on FW objects
in the function & child, including FW objects owned by other kernel
subsystems.
3. Write access to function & child debug information strictly compatible with
the principles of kernel lockdown and kernel integrity protection. Triggers
a kernel Taint.
4. Full debug device access. Triggers a kernel Taint, requires CAP_SYS_RAWIO.
User space will provide a scope label on each RPC and the kernel must enforce the
above CAPs and taints based on that scope. A combination of kernel and FW can
enforce that RPCs are placed in the correct scope by user space.
Denied behavior
---------------
There are many things this interface must not allow user space to do (without a
Taint or CAP), broadly derived from the principles of kernel lockdown. Some
examples:
1. DMA to/from arbitrary memory, hang the system, compromise FW integrity with
untrusted code, or otherwise compromise device or system security and
integrity.
2. Provide an abnormal “back door” to kernel drivers. No manipulation of kernel
objects owned by kernel drivers.
3. Directly configure or otherwise control kernel drivers. A subsystem kernel
driver can react to the device configuration at function reset/driver load
time, but otherwise must not be coupled to fwctl.
4. Operate the HW in a way that overlaps with the core purpose of another
primary kernel subsystem, such as read/write to LBAs, send/receive of
network packets, or operate an accelerator's data plane.
fwctl is not a replacement for device direct access subsystems like uacce or
VFIO.
Operations exposed through fwctl's non-taining interfaces should be fully
sharable with other users of the device. For instance exposing a RPC through
fwctl should never prevent a kernel subsystem from also concurrently using that
same RPC or hardware unit down the road. In such cases fwctl will be less
important than proper kernel subsystems that eventually emerge. Mistakes in this
area resulting in clashes will be resolved in favour of a kernel implementation.
fwctl User API
==============
.. kernel-doc:: include/uapi/fwctl/fwctl.h
.. kernel-doc:: include/uapi/fwctl/mlx5.h
.. kernel-doc:: include/uapi/fwctl/pds.h
sysfs Class
-----------
fwctl has a sysfs class (/sys/class/fwctl/fwctlNN/) and character devices
(/dev/fwctl/fwctlNN) with a simple numbered scheme. The character device
operates the iotcl uAPI described above.
fwctl devices can be related to driver components in other subsystems through
sysfs::
$ ls /sys/class/fwctl/fwctl0/device/infiniband/
ibp0s10f0
$ ls /sys/class/infiniband/ibp0s10f0/device/fwctl/
fwctl0/
$ ls /sys/devices/pci0000:00/0000:00:0a.0/fwctl/fwctl0
dev device power subsystem uevent
User space Community
--------------------
Drawing inspiration from nvme-cli, participating in the kernel side must come
with a user space in a common TBD git tree, at a minimum to usefully operate the
kernel driver. Providing such an implementation is a pre-condition to merging a
kernel driver.
The goal is to build user space community around some of the shared problems
we all have, and ideally develop some common user space programs with some
starting themes of:
- Device in-field debugging
- HW provisioning
- VFIO child device profiling before VM boot
- Confidential Compute topics (attestation, secure provisioning)
that stretch across all subsystems in the kernel. fwupd is a great example of
how an excellent user space experience can emerge out of kernel-side diversity.
fwctl Kernel API
================
.. kernel-doc:: drivers/fwctl/main.c
:export:
.. kernel-doc:: include/linux/fwctl.h
fwctl Driver design
-------------------
In many cases a fwctl driver is going to be part of a larger cross-subsystem
device possibly using the auxiliary_device mechanism. In that case several
subsystems are going to be sharing the same device and FW interface layer so the
device design must already provide for isolation and cooperation between kernel
subsystems. fwctl should fit into that same model.
Part of the driver should include a description of how its scope restrictions
and security model work. The driver and FW together must ensure that RPCs
provided by user space are mapped to the appropriate scope. If the validation is
done in the driver then the validation can read a 'command effects' report from
the device, or hardwire the enforcement. If the validation is done in the FW,
then the driver should pass the fwctl_rpc_scope to the FW along with the command.
The driver and FW must cooperate to ensure that either fwctl cannot allocate
any FW resources, or any resources it does allocate are freed on FD closure. A
driver primarily constructed around FW RPCs may find that its core PCI function
and RPC layer belongs under fwctl with auxiliary devices connecting to other
subsystems.
Each device type must be mindful of Linux's philosophy for stable ABI. The FW
RPC interface does not have to meet a strictly stable ABI, but it does need to
meet an expectation that userspace tools that are deployed and in significant
use don't needlessly break. FW upgrade and kernel upgrade should keep widely
deployed tooling working.
Development and debugging focused RPCs under more permissive scopes can have
less stabilitiy if the tools using them are only run under exceptional
circumstances and not for every day use of the device. Debugging tools may even
require exact version matching as they may require something similar to DWARF
debug information from the FW binary.
Security Response
=================
The kernel remains the gatekeeper for this interface. If violations of the
scopes, security or isolation principles are found, we have options to let
devices fix them with a FW update, push a kernel patch to parse and block RPC
commands or push a kernel patch to block entire firmware versions/devices.
While the kernel can always directly parse and restrict RPCs, it is expected
that the existing kernel pattern of allowing drivers to delegate validation to
FW to be a useful design.
Existing Similar Examples
=========================
The approach described in this document is not a new idea. Direct, or near
direct device access has been offered by the kernel in different areas for
decades. With more devices wanting to follow this design pattern it is becoming
clear that it is not entirely well understood and, more importantly, the
security considerations are not well defined or agreed upon.
Some examples:
- HW RAID controllers. This includes RPCs to do things like compose drives into
a RAID volume, configure RAID parameters, monitor the HW and more.
- Baseboard managers. RPCs for configuring settings in the device and more
- NVMe vendor command capsules. nvme-cli provides access to some monitoring
functions that different products have defined, but more exist.
- CXL also has a NVMe-like vendor command system.
- DRM allows user space drivers to send commands to the device via kernel
mediation
- RDMA allows user space drivers to directly push commands to the device
without kernel involvement
- Various “raw” APIs, raw HID (SDL2), raw USB, NVMe Generic Interface, etc.
The first 4 are examples of areas that fwctl intends to cover. The latter three
are examples of denied behavior as they fully overlap with the primary purpose
of a kernel subsystem.
Some key lessons learned from these past efforts are the importance of having a
common user space project to use as a pre-condition for obtaining a kernel
driver. Developing good community around useful software in user space is key to
getting companies to fund participation to enable their products.

View File

@ -0,0 +1,14 @@
.. SPDX-License-Identifier: GPL-2.0
Firmware Control (FWCTL) Userspace API
======================================
A framework that define a common set of limited rules that allows user space
to securely construct and execute RPCs inside device firmware.
.. toctree::
:maxdepth: 1
fwctl
fwctl-cxl
pds_fwctl

View File

@ -0,0 +1,46 @@
.. SPDX-License-Identifier: GPL-2.0
================
fwctl pds driver
================
:Author: Shannon Nelson
Overview
========
The PDS Core device makes a fwctl service available through an
auxiliary_device named pds_core.fwctl.N. The pds_fwctl driver binds to
this device and registers itself with the fwctl subsystem. The resulting
userspace interface is used by an application that is a part of the
AMD Pensando software package for the Distributed Service Card (DSC).
The pds_fwctl driver has little knowledge of the firmware's internals.
It only knows how to send commands through pds_core's message queue to the
firmware for fwctl requests. The set of fwctl operations available
depends on the firmware in the DSC, and the userspace application
version must match the firmware so that they can talk to each other.
When a connection is created the pds_fwctl driver requests from the
firmware a list of firmware object endpoints, and for each endpoint the
driver requests a list of operations for that endpoint.
Each operation description includes a firmware defined command attribute
that maps to the FWCTL scope levels. The driver translates those firmware
values into the FWCTL scope values which can then be used for filtering the
scoped user requests.
pds_fwctl User API
==================
Each RPC request includes the target endpoint and the operation id, and in
and out buffer lengths and pointers. The driver verifies the existence
of the requested endpoint and operations, then checks the request scope
against the required scope of the operation. The request is then put
together with the request data and sent through pds_core's message queue
to the firmware, and the results are returned to the caller.
The RPC endpoints, operations, and buffer contents are defined by the
particular firmware package in the device, which varies across the
available product configurations. The details are available in the
specific product SDK documentation.

View File

@ -46,6 +46,7 @@ Devices and I/O
accelerators/ocxl accelerators/ocxl
dma-buf-heaps dma-buf-heaps
dma-buf-alloc-exchange dma-buf-alloc-exchange
fwctl/index
gpio/index gpio/index
iommufd iommufd
media/index media/index

View File

@ -333,6 +333,7 @@ Code Seq# Include File Comments
0x97 00-7F fs/ceph/ioctl.h Ceph file system 0x97 00-7F fs/ceph/ioctl.h Ceph file system
0x99 00-0F 537-Addinboard driver 0x99 00-0F 537-Addinboard driver
<mailto:buk@buks.ipn.de> <mailto:buk@buks.ipn.de>
0x9A 00-0F include/uapi/fwctl/fwctl.h
0xA0 all linux/sdp/sdp.h Industrial Device Project 0xA0 all linux/sdp/sdp.h Industrial Device Project
<mailto:kenji@bitgate.com> <mailto:kenji@bitgate.com>
0xA1 0 linux/vtpm_proxy.h TPM Emulator Proxy Driver 0xA1 0 linux/vtpm_proxy.h TPM Emulator Proxy Driver

View File

@ -5886,6 +5886,7 @@ M: Dan Williams <dan.j.williams@intel.com>
L: linux-cxl@vger.kernel.org L: linux-cxl@vger.kernel.org
S: Maintained S: Maintained
F: Documentation/driver-api/cxl F: Documentation/driver-api/cxl
F: Documentation/userspace-api/fwctl/fwctl-cxl.rst
F: drivers/cxl/ F: drivers/cxl/
F: include/cxl/ F: include/cxl/
F: include/uapi/linux/cxl_mem.h F: include/uapi/linux/cxl_mem.h
@ -9686,6 +9687,31 @@ F: kernel/futex/*
F: tools/perf/bench/futex* F: tools/perf/bench/futex*
F: tools/testing/selftests/futex/ F: tools/testing/selftests/futex/
FWCTL SUBSYSTEM
M: Dave Jiang <dave.jiang@intel.com>
M: Jason Gunthorpe <jgg@nvidia.com>
M: Saeed Mahameed <saeedm@nvidia.com>
R: Jonathan Cameron <Jonathan.Cameron@huawei.com>
S: Maintained
F: Documentation/userspace-api/fwctl/
F: drivers/fwctl/
F: include/linux/fwctl.h
F: include/uapi/fwctl/
FWCTL MLX5 DRIVER
M: Saeed Mahameed <saeedm@nvidia.com>
R: Itay Avraham <itayavr@nvidia.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/fwctl/mlx5/
FWCTL PDS DRIVER
M: Brett Creeley <brett.creeley@amd.com>
R: Shannon Nelson <shannon.nelson@amd.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/fwctl/pds/
GALAXYCORE GC0308 CAMERA SENSOR DRIVER GALAXYCORE GC0308 CAMERA SENSOR DRIVER
M: Sebastian Reichel <sre@kernel.org> M: Sebastian Reichel <sre@kernel.org>
L: linux-media@vger.kernel.org L: linux-media@vger.kernel.org

View File

@ -21,6 +21,8 @@ source "drivers/connector/Kconfig"
source "drivers/firmware/Kconfig" source "drivers/firmware/Kconfig"
source "drivers/fwctl/Kconfig"
source "drivers/gnss/Kconfig" source "drivers/gnss/Kconfig"
source "drivers/mtd/Kconfig" source "drivers/mtd/Kconfig"

View File

@ -135,6 +135,7 @@ obj-y += ufs/
obj-$(CONFIG_MEMSTICK) += memstick/ obj-$(CONFIG_MEMSTICK) += memstick/
obj-$(CONFIG_INFINIBAND) += infiniband/ obj-$(CONFIG_INFINIBAND) += infiniband/
obj-y += firmware/ obj-y += firmware/
obj-$(CONFIG_FWCTL) += fwctl/
obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_CRYPTO) += crypto/
obj-$(CONFIG_SUPERH) += sh/ obj-$(CONFIG_SUPERH) += sh/
obj-y += clocksource/ obj-y += clocksource/

View File

@ -7,6 +7,7 @@ menuconfig CXL_BUS
select PCI_DOE select PCI_DOE
select FIRMWARE_TABLE select FIRMWARE_TABLE
select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS
select FWCTL if CXL_FEATURES
help help
CXL is a bus that is electrically compatible with PCI Express, but CXL is a bus that is electrically compatible with PCI Express, but
layers three protocols on that signalling (CXL.io, CXL.cache, and layers three protocols on that signalling (CXL.io, CXL.cache, and
@ -102,6 +103,17 @@ config CXL_MEM
If unsure say 'm'. If unsure say 'm'.
config CXL_FEATURES
bool "CXL: Features"
depends on CXL_PCI
help
Enable support for CXL Features. A CXL device that includes a mailbox
supports commands that allows listing, getting, and setting of
optionally defined features such as memory sparing or post package
sparing. Vendors may define custom features for the device.
If unsure say 'n'
config CXL_PORT config CXL_PORT
default CXL_BUS default CXL_BUS
tristate tristate

View File

@ -16,3 +16,4 @@ cxl_core-y += pmu.o
cxl_core-y += cdat.o cxl_core-y += cdat.o
cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o cxl_core-$(CONFIG_CXL_REGION) += region.o
cxl_core-$(CONFIG_CXL_FEATURES) += features.o

View File

@ -4,6 +4,8 @@
#ifndef __CXL_CORE_H__ #ifndef __CXL_CORE_H__
#define __CXL_CORE_H__ #define __CXL_CORE_H__
#include <cxl/mailbox.h>
extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_bridge_type;
extern const struct device_type cxl_nvdimm_type; extern const struct device_type cxl_nvdimm_type;
extern const struct device_type cxl_pmu_type; extern const struct device_type cxl_pmu_type;
@ -65,9 +67,9 @@ static inline void cxl_region_exit(void)
struct cxl_send_command; struct cxl_send_command;
struct cxl_mem_query_commands; struct cxl_mem_query_commands;
int cxl_query_cmd(struct cxl_memdev *cxlmd, int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q); struct cxl_mem_query_commands __user *q);
int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s); int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s);
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length); resource_size_t length);
@ -115,4 +117,15 @@ bool cxl_need_node_perf_attrs_update(int nid);
int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
struct access_coordinate *c); struct access_coordinate *c);
#ifdef CONFIG_CXL_FEATURES
size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
enum cxl_get_feat_selection selection,
void *feat_out, size_t feat_out_size, u16 offset,
u16 *return_code);
int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
u8 feat_version, const void *feat_data,
size_t feat_data_size, u32 feat_flag, u16 offset,
u16 *return_code);
#endif
#endif /* __CXL_CORE_H__ */ #endif /* __CXL_CORE_H__ */

708
drivers/cxl/core/features.c Normal file
View File

@ -0,0 +1,708 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2024-2025 Intel Corporation. All rights reserved. */
#include <linux/fwctl.h>
#include <linux/device.h>
#include <cxl/mailbox.h>
#include <cxl/features.h>
#include <uapi/fwctl/cxl.h>
#include "cxl.h"
#include "core.h"
#include "cxlmem.h"
/* All the features below are exclusive to the kernel */
static const uuid_t cxl_exclusive_feats[] = {
CXL_FEAT_PATROL_SCRUB_UUID,
CXL_FEAT_ECS_UUID,
CXL_FEAT_SPPR_UUID,
CXL_FEAT_HPPR_UUID,
CXL_FEAT_CACHELINE_SPARING_UUID,
CXL_FEAT_ROW_SPARING_UUID,
CXL_FEAT_BANK_SPARING_UUID,
CXL_FEAT_RANK_SPARING_UUID,
};
static bool is_cxl_feature_exclusive_by_uuid(const uuid_t *uuid)
{
for (int i = 0; i < ARRAY_SIZE(cxl_exclusive_feats); i++) {
if (uuid_equal(uuid, &cxl_exclusive_feats[i]))
return true;
}
return false;
}
static bool is_cxl_feature_exclusive(struct cxl_feat_entry *entry)
{
return is_cxl_feature_exclusive_by_uuid(&entry->uuid);
}
inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds)
{
return cxlds->cxlfs;
}
EXPORT_SYMBOL_NS_GPL(to_cxlfs, "CXL");
static int cxl_get_supported_features_count(struct cxl_mailbox *cxl_mbox)
{
struct cxl_mbox_get_sup_feats_out mbox_out;
struct cxl_mbox_get_sup_feats_in mbox_in;
struct cxl_mbox_cmd mbox_cmd;
int rc;
memset(&mbox_in, 0, sizeof(mbox_in));
mbox_in.count = cpu_to_le32(sizeof(mbox_out));
memset(&mbox_out, 0, sizeof(mbox_out));
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES,
.size_in = sizeof(mbox_in),
.payload_in = &mbox_in,
.size_out = sizeof(mbox_out),
.payload_out = &mbox_out,
.min_out = sizeof(mbox_out),
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
return rc;
return le16_to_cpu(mbox_out.supported_feats);
}
static struct cxl_feat_entries *
get_supported_features(struct cxl_features_state *cxlfs)
{
int remain_feats, max_size, max_feats, start, rc, hdr_size;
struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
int feat_size = sizeof(struct cxl_feat_entry);
struct cxl_mbox_get_sup_feats_in mbox_in;
struct cxl_feat_entry *entry;
struct cxl_mbox_cmd mbox_cmd;
int user_feats = 0;
int count;
count = cxl_get_supported_features_count(cxl_mbox);
if (count <= 0)
return NULL;
struct cxl_feat_entries *entries __free(kvfree) =
kvmalloc(struct_size(entries, ent, count), GFP_KERNEL);
if (!entries)
return NULL;
struct cxl_mbox_get_sup_feats_out *mbox_out __free(kvfree) =
kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
if (!mbox_out)
return NULL;
hdr_size = struct_size(mbox_out, ents, 0);
max_size = cxl_mbox->payload_size - hdr_size;
/* max feat entries that can fit in mailbox max payload size */
max_feats = max_size / feat_size;
entry = entries->ent;
start = 0;
remain_feats = count;
do {
int retrieved, alloc_size, copy_feats;
int num_entries;
if (remain_feats > max_feats) {
alloc_size = struct_size(mbox_out, ents, max_feats);
remain_feats = remain_feats - max_feats;
copy_feats = max_feats;
} else {
alloc_size = struct_size(mbox_out, ents, remain_feats);
copy_feats = remain_feats;
remain_feats = 0;
}
memset(&mbox_in, 0, sizeof(mbox_in));
mbox_in.count = cpu_to_le32(alloc_size);
mbox_in.start_idx = cpu_to_le16(start);
memset(mbox_out, 0, alloc_size);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_SUPPORTED_FEATURES,
.size_in = sizeof(mbox_in),
.payload_in = &mbox_in,
.size_out = alloc_size,
.payload_out = mbox_out,
.min_out = hdr_size,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
return NULL;
if (mbox_cmd.size_out <= hdr_size)
return NULL;
/*
* Make sure retrieved out buffer is multiple of feature
* entries.
*/
retrieved = mbox_cmd.size_out - hdr_size;
if (retrieved % feat_size)
return NULL;
num_entries = le16_to_cpu(mbox_out->num_entries);
/*
* If the reported output entries * defined entry size !=
* retrieved output bytes, then the output package is incorrect.
*/
if (num_entries * feat_size != retrieved)
return NULL;
memcpy(entry, mbox_out->ents, retrieved);
for (int i = 0; i < num_entries; i++) {
if (!is_cxl_feature_exclusive(entry + i))
user_feats++;
}
entry += num_entries;
/*
* If the number of output entries is less than expected, add the
* remaining entries to the next batch.
*/
remain_feats += copy_feats - num_entries;
start += num_entries;
} while (remain_feats);
entries->num_features = count;
entries->num_user_features = user_feats;
return no_free_ptr(entries);
}
static void free_cxlfs(void *_cxlfs)
{
struct cxl_features_state *cxlfs = _cxlfs;
struct cxl_dev_state *cxlds = cxlfs->cxlds;
cxlds->cxlfs = NULL;
kvfree(cxlfs->entries);
kfree(cxlfs);
}
/**
* devm_cxl_setup_features() - Allocate and initialize features context
* @cxlds: CXL device context
*
* Return 0 on success or -errno on failure.
*/
int devm_cxl_setup_features(struct cxl_dev_state *cxlds)
{
struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
if (cxl_mbox->feat_cap < CXL_FEATURES_RO)
return -ENODEV;
struct cxl_features_state *cxlfs __free(kfree) =
kzalloc(sizeof(*cxlfs), GFP_KERNEL);
if (!cxlfs)
return -ENOMEM;
cxlfs->cxlds = cxlds;
cxlfs->entries = get_supported_features(cxlfs);
if (!cxlfs->entries)
return -ENOMEM;
cxlds->cxlfs = cxlfs;
return devm_add_action_or_reset(cxlds->dev, free_cxlfs, no_free_ptr(cxlfs));
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_features, "CXL");
size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
enum cxl_get_feat_selection selection,
void *feat_out, size_t feat_out_size, u16 offset,
u16 *return_code)
{
size_t data_to_rd_size, size_out;
struct cxl_mbox_get_feat_in pi;
struct cxl_mbox_cmd mbox_cmd;
size_t data_rcvd_size = 0;
int rc;
if (return_code)
*return_code = CXL_MBOX_CMD_RC_INPUT;
if (!feat_out || !feat_out_size)
return 0;
size_out = min(feat_out_size, cxl_mbox->payload_size);
uuid_copy(&pi.uuid, feat_uuid);
pi.selection = selection;
do {
data_to_rd_size = min(feat_out_size - data_rcvd_size,
cxl_mbox->payload_size);
pi.offset = cpu_to_le16(offset + data_rcvd_size);
pi.count = cpu_to_le16(data_to_rd_size);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_FEATURE,
.size_in = sizeof(pi),
.payload_in = &pi,
.size_out = size_out,
.payload_out = feat_out + data_rcvd_size,
.min_out = data_to_rd_size,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0 || !mbox_cmd.size_out) {
if (return_code)
*return_code = mbox_cmd.return_code;
return 0;
}
data_rcvd_size += mbox_cmd.size_out;
} while (data_rcvd_size < feat_out_size);
if (return_code)
*return_code = CXL_MBOX_CMD_RC_SUCCESS;
return data_rcvd_size;
}
/*
* FEAT_DATA_MIN_PAYLOAD_SIZE - min extra number of bytes should be
* available in the mailbox for storing the actual feature data so that
* the feature data transfer would work as expected.
*/
#define FEAT_DATA_MIN_PAYLOAD_SIZE 10
int cxl_set_feature(struct cxl_mailbox *cxl_mbox,
const uuid_t *feat_uuid, u8 feat_version,
const void *feat_data, size_t feat_data_size,
u32 feat_flag, u16 offset, u16 *return_code)
{
size_t data_in_size, data_sent_size = 0;
struct cxl_mbox_cmd mbox_cmd;
size_t hdr_size;
if (return_code)
*return_code = CXL_MBOX_CMD_RC_INPUT;
struct cxl_mbox_set_feat_in *pi __free(kfree) =
kzalloc(cxl_mbox->payload_size, GFP_KERNEL);
if (!pi)
return -ENOMEM;
uuid_copy(&pi->uuid, feat_uuid);
pi->version = feat_version;
feat_flag &= ~CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK;
feat_flag |= CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET;
hdr_size = sizeof(pi->hdr);
/*
* Check minimum mbox payload size is available for
* the feature data transfer.
*/
if (hdr_size + FEAT_DATA_MIN_PAYLOAD_SIZE > cxl_mbox->payload_size)
return -ENOMEM;
if (hdr_size + feat_data_size <= cxl_mbox->payload_size) {
pi->flags = cpu_to_le32(feat_flag |
CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER);
data_in_size = feat_data_size;
} else {
pi->flags = cpu_to_le32(feat_flag |
CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER);
data_in_size = cxl_mbox->payload_size - hdr_size;
}
do {
int rc;
pi->offset = cpu_to_le16(offset + data_sent_size);
memcpy(pi->feat_data, feat_data + data_sent_size, data_in_size);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_SET_FEATURE,
.size_in = hdr_size + data_in_size,
.payload_in = pi,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0) {
if (return_code)
*return_code = mbox_cmd.return_code;
return rc;
}
data_sent_size += data_in_size;
if (data_sent_size >= feat_data_size) {
if (return_code)
*return_code = CXL_MBOX_CMD_RC_SUCCESS;
return 0;
}
if ((feat_data_size - data_sent_size) <= (cxl_mbox->payload_size - hdr_size)) {
data_in_size = feat_data_size - data_sent_size;
pi->flags = cpu_to_le32(feat_flag |
CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER);
} else {
pi->flags = cpu_to_le32(feat_flag |
CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER);
}
} while (true);
}
/* FWCTL support */
static inline struct cxl_memdev *fwctl_to_memdev(struct fwctl_device *fwctl_dev)
{
return to_cxl_memdev(fwctl_dev->dev.parent);
}
static int cxlctl_open_uctx(struct fwctl_uctx *uctx)
{
return 0;
}
static void cxlctl_close_uctx(struct fwctl_uctx *uctx)
{
}
static struct cxl_feat_entry *
get_support_feature_info(struct cxl_features_state *cxlfs,
const struct fwctl_rpc_cxl *rpc_in)
{
struct cxl_feat_entry *feat;
const uuid_t *uuid;
if (rpc_in->op_size < sizeof(uuid))
return ERR_PTR(-EINVAL);
uuid = &rpc_in->set_feat_in.uuid;
for (int i = 0; i < cxlfs->entries->num_features; i++) {
feat = &cxlfs->entries->ent[i];
if (uuid_equal(uuid, &feat->uuid))
return feat;
}
return ERR_PTR(-EINVAL);
}
static void *cxlctl_get_supported_features(struct cxl_features_state *cxlfs,
const struct fwctl_rpc_cxl *rpc_in,
size_t *out_len)
{
const struct cxl_mbox_get_sup_feats_in *feat_in;
struct cxl_mbox_get_sup_feats_out *feat_out;
struct cxl_feat_entry *pos;
size_t out_size;
int requested;
u32 count;
u16 start;
int i;
if (rpc_in->op_size != sizeof(*feat_in))
return ERR_PTR(-EINVAL);
feat_in = &rpc_in->get_sup_feats_in;
count = le32_to_cpu(feat_in->count);
start = le16_to_cpu(feat_in->start_idx);
requested = count / sizeof(*pos);
/*
* Make sure that the total requested number of entries is not greater
* than the total number of supported features allowed for userspace.
*/
if (start >= cxlfs->entries->num_features)
return ERR_PTR(-EINVAL);
requested = min_t(int, requested, cxlfs->entries->num_features - start);
out_size = sizeof(struct fwctl_rpc_cxl_out) +
struct_size(feat_out, ents, requested);
struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
kvzalloc(out_size, GFP_KERNEL);
if (!rpc_out)
return ERR_PTR(-ENOMEM);
rpc_out->size = struct_size(feat_out, ents, requested);
feat_out = &rpc_out->get_sup_feats_out;
if (requested == 0) {
feat_out->num_entries = cpu_to_le16(requested);
feat_out->supported_feats =
cpu_to_le16(cxlfs->entries->num_features);
rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
*out_len = out_size;
return no_free_ptr(rpc_out);
}
for (i = start, pos = &feat_out->ents[0];
i < cxlfs->entries->num_features; i++, pos++) {
if (i - start == requested)
break;
memcpy(pos, &cxlfs->entries->ent[i], sizeof(*pos));
/*
* If the feature is exclusive, set the set_feat_size to 0 to
* indicate that the feature is not changeable.
*/
if (is_cxl_feature_exclusive(pos)) {
u32 flags;
pos->set_feat_size = 0;
flags = le32_to_cpu(pos->flags);
flags &= ~CXL_FEATURE_F_CHANGEABLE;
pos->flags = cpu_to_le32(flags);
}
}
feat_out->num_entries = cpu_to_le16(requested);
feat_out->supported_feats = cpu_to_le16(cxlfs->entries->num_features);
rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
*out_len = out_size;
return no_free_ptr(rpc_out);
}
static void *cxlctl_get_feature(struct cxl_features_state *cxlfs,
const struct fwctl_rpc_cxl *rpc_in,
size_t *out_len)
{
struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
const struct cxl_mbox_get_feat_in *feat_in;
u16 offset, count, return_code;
size_t out_size = *out_len;
if (rpc_in->op_size != sizeof(*feat_in))
return ERR_PTR(-EINVAL);
feat_in = &rpc_in->get_feat_in;
offset = le16_to_cpu(feat_in->offset);
count = le16_to_cpu(feat_in->count);
if (!count)
return ERR_PTR(-EINVAL);
struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
kvzalloc(out_size, GFP_KERNEL);
if (!rpc_out)
return ERR_PTR(-ENOMEM);
out_size = cxl_get_feature(cxl_mbox, &feat_in->uuid,
feat_in->selection, rpc_out->payload,
count, offset, &return_code);
*out_len = sizeof(struct fwctl_rpc_cxl_out);
if (!out_size) {
rpc_out->size = 0;
rpc_out->retval = return_code;
return no_free_ptr(rpc_out);
}
rpc_out->size = out_size;
rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
*out_len += out_size;
return no_free_ptr(rpc_out);
}
static void *cxlctl_set_feature(struct cxl_features_state *cxlfs,
const struct fwctl_rpc_cxl *rpc_in,
size_t *out_len)
{
struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
const struct cxl_mbox_set_feat_in *feat_in;
size_t out_size, data_size;
u16 offset, return_code;
u32 flags;
int rc;
if (rpc_in->op_size <= sizeof(feat_in->hdr))
return ERR_PTR(-EINVAL);
feat_in = &rpc_in->set_feat_in;
if (is_cxl_feature_exclusive_by_uuid(&feat_in->uuid))
return ERR_PTR(-EPERM);
offset = le16_to_cpu(feat_in->offset);
flags = le32_to_cpu(feat_in->flags);
out_size = *out_len;
struct fwctl_rpc_cxl_out *rpc_out __free(kvfree) =
kvzalloc(out_size, GFP_KERNEL);
if (!rpc_out)
return ERR_PTR(-ENOMEM);
rpc_out->size = 0;
data_size = rpc_in->op_size - sizeof(feat_in->hdr);
rc = cxl_set_feature(cxl_mbox, &feat_in->uuid,
feat_in->version, feat_in->feat_data,
data_size, flags, offset, &return_code);
if (rc) {
rpc_out->retval = return_code;
return no_free_ptr(rpc_out);
}
rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS;
*out_len = sizeof(*rpc_out);
return no_free_ptr(rpc_out);
}
static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs,
const struct fwctl_rpc_cxl *rpc_in,
enum fwctl_rpc_scope scope)
{
u16 effects, imm_mask, reset_mask;
struct cxl_feat_entry *feat;
u32 flags;
feat = get_support_feature_info(cxlfs, rpc_in);
if (IS_ERR(feat))
return false;
/* Ensure that the attribute is changeable */
flags = le32_to_cpu(feat->flags);
if (!(flags & CXL_FEATURE_F_CHANGEABLE))
return false;
effects = le16_to_cpu(feat->effects);
/*
* Reserved bits are set, rejecting since the effects is not
* comprehended by the driver.
*/
if (effects & CXL_CMD_EFFECTS_RESERVED) {
dev_warn_once(cxlfs->cxlds->dev,
"Reserved bits set in the Feature effects field!\n");
return false;
}
/* Currently no user background command support */
if (effects & CXL_CMD_BACKGROUND)
return false;
/* Effects cause immediate change, highest security scope is needed */
imm_mask = CXL_CMD_CONFIG_CHANGE_IMMEDIATE |
CXL_CMD_DATA_CHANGE_IMMEDIATE |
CXL_CMD_POLICY_CHANGE_IMMEDIATE |
CXL_CMD_LOG_CHANGE_IMMEDIATE;
reset_mask = CXL_CMD_CONFIG_CHANGE_COLD_RESET |
CXL_CMD_CONFIG_CHANGE_CONV_RESET |
CXL_CMD_CONFIG_CHANGE_CXL_RESET;
/* If no immediate or reset effect set, The hardware has a bug */
if (!(effects & imm_mask) && !(effects & reset_mask))
return false;
/*
* If the Feature setting causes immediate configuration change
* then we need the full write permission policy.
*/
if (effects & imm_mask && scope >= FWCTL_RPC_DEBUG_WRITE_FULL)
return true;
/*
* If the Feature setting only causes configuration change
* after a reset, then the lesser level of write permission
* policy is ok.
*/
if (!(effects & imm_mask) && scope >= FWCTL_RPC_DEBUG_WRITE)
return true;
return false;
}
static bool cxlctl_validate_hw_command(struct cxl_features_state *cxlfs,
const struct fwctl_rpc_cxl *rpc_in,
enum fwctl_rpc_scope scope,
u16 opcode)
{
struct cxl_mailbox *cxl_mbox = &cxlfs->cxlds->cxl_mbox;
switch (opcode) {
case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
case CXL_MBOX_OP_GET_FEATURE:
if (cxl_mbox->feat_cap < CXL_FEATURES_RO)
return false;
if (scope >= FWCTL_RPC_CONFIGURATION)
return true;
return false;
case CXL_MBOX_OP_SET_FEATURE:
if (cxl_mbox->feat_cap < CXL_FEATURES_RW)
return false;
return cxlctl_validate_set_features(cxlfs, rpc_in, scope);
default:
return false;
}
}
static void *cxlctl_handle_commands(struct cxl_features_state *cxlfs,
const struct fwctl_rpc_cxl *rpc_in,
size_t *out_len, u16 opcode)
{
switch (opcode) {
case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
return cxlctl_get_supported_features(cxlfs, rpc_in, out_len);
case CXL_MBOX_OP_GET_FEATURE:
return cxlctl_get_feature(cxlfs, rpc_in, out_len);
case CXL_MBOX_OP_SET_FEATURE:
return cxlctl_set_feature(cxlfs, rpc_in, out_len);
default:
return ERR_PTR(-EOPNOTSUPP);
}
}
static void *cxlctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
void *in, size_t in_len, size_t *out_len)
{
struct fwctl_device *fwctl_dev = uctx->fwctl;
struct cxl_memdev *cxlmd = fwctl_to_memdev(fwctl_dev);
struct cxl_features_state *cxlfs = to_cxlfs(cxlmd->cxlds);
const struct fwctl_rpc_cxl *rpc_in = in;
u16 opcode = rpc_in->opcode;
if (!cxlctl_validate_hw_command(cxlfs, rpc_in, scope, opcode))
return ERR_PTR(-EINVAL);
return cxlctl_handle_commands(cxlfs, rpc_in, out_len, opcode);
}
static const struct fwctl_ops cxlctl_ops = {
.device_type = FWCTL_DEVICE_TYPE_CXL,
.uctx_size = sizeof(struct fwctl_uctx),
.open_uctx = cxlctl_open_uctx,
.close_uctx = cxlctl_close_uctx,
.fw_rpc = cxlctl_fw_rpc,
};
DEFINE_FREE(free_fwctl_dev, struct fwctl_device *, if (_T) fwctl_put(_T))
static void free_memdev_fwctl(void *_fwctl_dev)
{
struct fwctl_device *fwctl_dev = _fwctl_dev;
fwctl_unregister(fwctl_dev);
fwctl_put(fwctl_dev);
}
int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_features_state *cxlfs;
int rc;
cxlfs = to_cxlfs(cxlds);
if (!cxlfs)
return -ENODEV;
/* No need to setup FWCTL if there are no user allowed features found */
if (!cxlfs->entries->num_user_features)
return -ENODEV;
struct fwctl_device *fwctl_dev __free(free_fwctl_dev) =
_fwctl_alloc_device(&cxlmd->dev, &cxlctl_ops, sizeof(*fwctl_dev));
if (!fwctl_dev)
return -ENOMEM;
rc = fwctl_register(fwctl_dev);
if (rc)
return rc;
return devm_add_action_or_reset(&cxlmd->dev, free_memdev_fwctl,
no_free_ptr(fwctl_dev));
}
EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL");
MODULE_IMPORT_NS("FWCTL");

View File

@ -349,40 +349,39 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
return true; return true;
} }
static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd,
struct cxl_memdev_state *mds, u16 opcode, struct cxl_mailbox *cxl_mbox, u16 opcode,
size_t in_size, size_t out_size, u64 in_payload) size_t in_size, size_t out_size, u64 in_payload)
{ {
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; *mbox_cmd = (struct cxl_mbox_cmd) {
*mbox = (struct cxl_mbox_cmd) {
.opcode = opcode, .opcode = opcode,
.size_in = in_size, .size_in = in_size,
}; };
if (in_size) { if (in_size) {
mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), mbox_cmd->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
in_size); in_size);
if (IS_ERR(mbox->payload_in)) if (IS_ERR(mbox_cmd->payload_in))
return PTR_ERR(mbox->payload_in); return PTR_ERR(mbox_cmd->payload_in);
if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) {
dev_dbg(mds->cxlds.dev, "%s: input payload not allowed\n", dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n",
cxl_mem_opcode_to_name(opcode)); cxl_mem_opcode_to_name(opcode));
kvfree(mbox->payload_in); kvfree(mbox_cmd->payload_in);
return -EBUSY; return -EBUSY;
} }
} }
/* Prepare to handle a full payload for variable sized output */ /* Prepare to handle a full payload for variable sized output */
if (out_size == CXL_VARIABLE_PAYLOAD) if (out_size == CXL_VARIABLE_PAYLOAD)
mbox->size_out = cxl_mbox->payload_size; mbox_cmd->size_out = cxl_mbox->payload_size;
else else
mbox->size_out = out_size; mbox_cmd->size_out = out_size;
if (mbox->size_out) { if (mbox_cmd->size_out) {
mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL); mbox_cmd->payload_out = kvzalloc(mbox_cmd->size_out, GFP_KERNEL);
if (!mbox->payload_out) { if (!mbox_cmd->payload_out) {
kvfree(mbox->payload_in); kvfree(mbox_cmd->payload_in);
return -ENOMEM; return -ENOMEM;
} }
} }
@ -397,10 +396,8 @@ static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox)
static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd, const struct cxl_send_command *send_cmd,
struct cxl_memdev_state *mds) struct cxl_mailbox *cxl_mbox)
{ {
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
if (send_cmd->raw.rsvd) if (send_cmd->raw.rsvd)
return -EINVAL; return -EINVAL;
@ -415,7 +412,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
return -EPERM; return -EPERM;
dev_WARN_ONCE(mds->cxlds.dev, true, "raw command path used\n"); dev_WARN_ONCE(cxl_mbox->host, true, "raw command path used\n");
*mem_cmd = (struct cxl_mem_command) { *mem_cmd = (struct cxl_mem_command) {
.info = { .info = {
@ -431,7 +428,7 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd, const struct cxl_send_command *send_cmd,
struct cxl_memdev_state *mds) struct cxl_mailbox *cxl_mbox)
{ {
struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id]; struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id];
const struct cxl_command_info *info = &c->info; const struct cxl_command_info *info = &c->info;
@ -446,11 +443,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
return -EINVAL; return -EINVAL;
/* Check that the command is enabled for hardware */ /* Check that the command is enabled for hardware */
if (!test_bit(info->id, mds->enabled_cmds)) if (!test_bit(info->id, cxl_mbox->enabled_cmds))
return -ENOTTY; return -ENOTTY;
/* Check that the command is not claimed for exclusive kernel use */ /* Check that the command is not claimed for exclusive kernel use */
if (test_bit(info->id, mds->exclusive_cmds)) if (test_bit(info->id, cxl_mbox->exclusive_cmds))
return -EBUSY; return -EBUSY;
/* Check the input buffer is the expected size */ /* Check the input buffer is the expected size */
@ -479,7 +476,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
/** /**
* cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
* @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd.
* @mds: The driver data for the operation * @cxl_mbox: CXL mailbox context
* @send_cmd: &struct cxl_send_command copied in from userspace. * @send_cmd: &struct cxl_send_command copied in from userspace.
* *
* Return: * Return:
@ -494,10 +491,9 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
* safe to send to the hardware. * safe to send to the hardware.
*/ */
static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
struct cxl_memdev_state *mds, struct cxl_mailbox *cxl_mbox,
const struct cxl_send_command *send_cmd) const struct cxl_send_command *send_cmd)
{ {
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mem_command mem_cmd; struct cxl_mem_command mem_cmd;
int rc; int rc;
@ -514,24 +510,23 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
/* Sanitize and construct a cxl_mem_command */ /* Sanitize and construct a cxl_mem_command */
if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW)
rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, mds); rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxl_mbox);
else else
rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, mds); rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxl_mbox);
if (rc) if (rc)
return rc; return rc;
/* Sanitize and construct a cxl_mbox_cmd */ /* Sanitize and construct a cxl_mbox_cmd */
return cxl_mbox_cmd_ctor(mbox_cmd, mds, mem_cmd.opcode, return cxl_mbox_cmd_ctor(mbox_cmd, cxl_mbox, mem_cmd.opcode,
mem_cmd.info.size_in, mem_cmd.info.size_out, mem_cmd.info.size_in, mem_cmd.info.size_out,
send_cmd->in.payload); send_cmd->in.payload);
} }
int cxl_query_cmd(struct cxl_memdev *cxlmd, int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q) struct cxl_mem_query_commands __user *q)
{ {
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct device *dev = cxl_mbox->host;
struct device *dev = &cxlmd->dev;
struct cxl_mem_command *cmd; struct cxl_mem_command *cmd;
u32 n_commands; u32 n_commands;
int j = 0; int j = 0;
@ -552,9 +547,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
cxl_for_each_cmd(cmd) { cxl_for_each_cmd(cmd) {
struct cxl_command_info info = cmd->info; struct cxl_command_info info = cmd->info;
if (test_bit(info.id, mds->enabled_cmds)) if (test_bit(info.id, cxl_mbox->enabled_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED; info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED;
if (test_bit(info.id, mds->exclusive_cmds)) if (test_bit(info.id, cxl_mbox->exclusive_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE; info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE;
if (copy_to_user(&q->commands[j++], &info, sizeof(info))) if (copy_to_user(&q->commands[j++], &info, sizeof(info)))
@ -569,7 +564,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
/** /**
* handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
* @mds: The driver data for the operation * @cxl_mbox: The mailbox context for the operation.
* @mbox_cmd: The validated mailbox command. * @mbox_cmd: The validated mailbox command.
* @out_payload: Pointer to userspace's output payload. * @out_payload: Pointer to userspace's output payload.
* @size_out: (Input) Max payload size to copy out. * @size_out: (Input) Max payload size to copy out.
@ -590,13 +585,12 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
* *
* See cxl_send_cmd(). * See cxl_send_cmd().
*/ */
static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds, static int handle_mailbox_cmd_from_user(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *mbox_cmd, struct cxl_mbox_cmd *mbox_cmd,
u64 out_payload, s32 *size_out, u64 out_payload, s32 *size_out,
u32 *retval) u32 *retval)
{ {
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct device *dev = cxl_mbox->host;
struct device *dev = mds->cxlds.dev;
int rc; int rc;
dev_dbg(dev, dev_dbg(dev,
@ -633,10 +627,9 @@ static int handle_mailbox_cmd_from_user(struct cxl_memdev_state *mds,
return rc; return rc;
} }
int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s)
{ {
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); struct device *dev = cxl_mbox->host;
struct device *dev = &cxlmd->dev;
struct cxl_send_command send; struct cxl_send_command send;
struct cxl_mbox_cmd mbox_cmd; struct cxl_mbox_cmd mbox_cmd;
int rc; int rc;
@ -646,11 +639,11 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
if (copy_from_user(&send, s, sizeof(send))) if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT; return -EFAULT;
rc = cxl_validate_cmd_from_user(&mbox_cmd, mds, &send); rc = cxl_validate_cmd_from_user(&mbox_cmd, cxl_mbox, &send);
if (rc) if (rc)
return rc; return rc;
rc = handle_mailbox_cmd_from_user(mds, &mbox_cmd, send.out.payload, rc = handle_mailbox_cmd_from_user(cxl_mbox, &mbox_cmd, send.out.payload,
&send.out.size, &send.retval); &send.out.size, &send.retval);
if (rc) if (rc)
return rc; return rc;
@ -713,6 +706,35 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
return 0; return 0;
} }
static int check_features_opcodes(u16 opcode, int *ro_cmds, int *wr_cmds)
{
switch (opcode) {
case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
case CXL_MBOX_OP_GET_FEATURE:
(*ro_cmds)++;
return 1;
case CXL_MBOX_OP_SET_FEATURE:
(*wr_cmds)++;
return 1;
default:
return 0;
}
}
/* 'Get Supported Features' and 'Get Feature' */
#define MAX_FEATURES_READ_CMDS 2
static void set_features_cap(struct cxl_mailbox *cxl_mbox,
int ro_cmds, int wr_cmds)
{
/* Setting up Features capability while walking the CEL */
if (ro_cmds == MAX_FEATURES_READ_CMDS) {
if (wr_cmds)
cxl_mbox->feat_cap = CXL_FEATURES_RW;
else
cxl_mbox->feat_cap = CXL_FEATURES_RO;
}
}
/** /**
* cxl_walk_cel() - Walk through the Command Effects Log. * cxl_walk_cel() - Walk through the Command Effects Log.
* @mds: The driver data for the operation * @mds: The driver data for the operation
@ -724,10 +746,11 @@ static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
*/ */
static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel) static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
{ {
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_cel_entry *cel_entry; struct cxl_cel_entry *cel_entry;
const int cel_entries = size / sizeof(*cel_entry); const int cel_entries = size / sizeof(*cel_entry);
struct device *dev = mds->cxlds.dev; struct device *dev = mds->cxlds.dev;
int i; int i, ro_cmds = 0, wr_cmds = 0;
cel_entry = (struct cxl_cel_entry *) cel; cel_entry = (struct cxl_cel_entry *) cel;
@ -737,10 +760,13 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
int enabled = 0; int enabled = 0;
if (cmd) { if (cmd) {
set_bit(cmd->info.id, mds->enabled_cmds); set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
enabled++; enabled++;
} }
enabled += check_features_opcodes(opcode, &ro_cmds,
&wr_cmds);
if (cxl_is_poison_command(opcode)) { if (cxl_is_poison_command(opcode)) {
cxl_set_poison_cmd_enabled(&mds->poison, opcode); cxl_set_poison_cmd_enabled(&mds->poison, opcode);
enabled++; enabled++;
@ -754,6 +780,8 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
dev_dbg(dev, "Opcode 0x%04x %s\n", opcode, dev_dbg(dev, "Opcode 0x%04x %s\n", opcode,
enabled ? "enabled" : "unsupported by driver"); enabled ? "enabled" : "unsupported by driver");
} }
set_features_cap(cxl_mbox, ro_cmds, wr_cmds);
} }
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds) static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds)
@ -807,6 +835,7 @@ static const uuid_t log_uuid[] = {
*/ */
int cxl_enumerate_cmds(struct cxl_memdev_state *mds) int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
{ {
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_supported_logs *gsl; struct cxl_mbox_get_supported_logs *gsl;
struct device *dev = mds->cxlds.dev; struct device *dev = mds->cxlds.dev;
struct cxl_mem_command *cmd; struct cxl_mem_command *cmd;
@ -845,7 +874,7 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
/* In case CEL was bogus, enable some default commands. */ /* In case CEL was bogus, enable some default commands. */
cxl_for_each_cmd(cmd) cxl_for_each_cmd(cmd)
if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE) if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
set_bit(cmd->info.id, mds->enabled_cmds); set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
/* Found the required CEL */ /* Found the required CEL */
rc = 0; rc = 0;
@ -1448,6 +1477,7 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mutex_init(&mds->event.log_lock); mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev; mds->cxlds.dev = dev;
mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.host = dev;
mds->cxlds.cxl_mbox.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;

View File

@ -564,9 +564,11 @@ EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL");
void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds) unsigned long *cmds)
{ {
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
down_write(&cxl_memdev_rwsem); down_write(&cxl_memdev_rwsem);
bitmap_or(mds->exclusive_cmds, mds->exclusive_cmds, cmds, bitmap_or(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds,
CXL_MEM_COMMAND_ID_MAX); cmds, CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem); up_write(&cxl_memdev_rwsem);
} }
EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL"); EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
@ -579,9 +581,11 @@ EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, "CXL");
void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds) unsigned long *cmds)
{ {
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
down_write(&cxl_memdev_rwsem); down_write(&cxl_memdev_rwsem);
bitmap_andnot(mds->exclusive_cmds, mds->exclusive_cmds, cmds, bitmap_andnot(cxl_mbox->exclusive_cmds, cxl_mbox->exclusive_cmds,
CXL_MEM_COMMAND_ID_MAX); cmds, CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem); up_write(&cxl_memdev_rwsem);
} }
EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL"); EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, "CXL");
@ -656,11 +660,14 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
switch (cmd) { switch (cmd) {
case CXL_MEM_QUERY_COMMANDS: case CXL_MEM_QUERY_COMMANDS:
return cxl_query_cmd(cxlmd, (void __user *)arg); return cxl_query_cmd(cxl_mbox, (void __user *)arg);
case CXL_MEM_SEND_COMMAND: case CXL_MEM_SEND_COMMAND:
return cxl_send_cmd(cxlmd, (void __user *)arg); return cxl_send_cmd(cxl_mbox, (void __user *)arg);
default: default:
return -ENOTTY; return -ENOTTY;
} }
@ -994,10 +1001,11 @@ static void cxl_remove_fw_upload(void *fwl)
int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds) int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds)
{ {
struct cxl_dev_state *cxlds = &mds->cxlds; struct cxl_dev_state *cxlds = &mds->cxlds;
struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
struct device *dev = &cxlds->cxlmd->dev; struct device *dev = &cxlds->cxlmd->dev;
struct fw_upload *fwl; struct fw_upload *fwl;
if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds)) if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, cxl_mbox->enabled_cmds))
return 0; return 0;
fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev), fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev),

View File

@ -106,42 +106,6 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev); return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
} }
/**
* struct cxl_mbox_cmd - A command to be submitted to hardware.
* @opcode: (input) The command set and command submitted to hardware.
* @payload_in: (input) Pointer to the input payload.
* @payload_out: (output) Pointer to the output payload. Must be allocated by
* the caller.
* @size_in: (input) Number of bytes to load from @payload_in.
* @size_out: (input) Max number of bytes loaded into @payload_out.
* (output) Number of bytes generated by the device. For fixed size
* outputs commands this is always expected to be deterministic. For
* variable sized output commands, it tells the exact number of bytes
* written.
* @min_out: (input) internal command output payload size validation
* @poll_count: (input) Number of timeouts to attempt.
* @poll_interval_ms: (input) Time between mailbox background command polling
* interval timeouts.
* @return_code: (output) Error code returned from hardware.
*
* This is the primary mechanism used to send commands to the hardware.
* All the fields except @payload_* correspond exactly to the fields described in
* Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
* @payload_out are written to, and read from the Command Payload Registers
* defined in CXL 2.0 8.2.8.4.8.
*/
struct cxl_mbox_cmd {
u16 opcode;
void *payload_in;
void *payload_out;
size_t size_in;
size_t size_out;
size_t min_out;
int poll_count;
int poll_interval_ms;
u16 return_code;
};
/* /*
* Per CXL 3.0 Section 8.2.8.4.5.1 * Per CXL 3.0 Section 8.2.8.4.5.1
*/ */
@ -428,6 +392,7 @@ struct cxl_dpa_perf {
* @serial: PCIe Device Serial Number * @serial: PCIe Device Serial Number
* @type: Generic Memory Class device or Vendor Specific Memory device * @type: Generic Memory Class device or Vendor Specific Memory device
* @cxl_mbox: CXL mailbox context * @cxl_mbox: CXL mailbox context
* @cxlfs: CXL features context
*/ */
struct cxl_dev_state { struct cxl_dev_state {
struct device *dev; struct device *dev;
@ -443,6 +408,9 @@ struct cxl_dev_state {
u64 serial; u64 serial;
enum cxl_devtype type; enum cxl_devtype type;
struct cxl_mailbox cxl_mbox; struct cxl_mailbox cxl_mbox;
#ifdef CONFIG_CXL_FEATURES
struct cxl_features_state *cxlfs;
#endif
}; };
static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
@ -461,8 +429,6 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
* @lsa_size: Size of Label Storage Area * @lsa_size: Size of Label Storage Area
* (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device)
* @firmware_version: Firmware version for the memory device. * @firmware_version: Firmware version for the memory device.
* @enabled_cmds: Hardware commands found enabled in CEL.
* @exclusive_cmds: Commands that are kernel-internal only
* @total_bytes: sum of all possible capacities * @total_bytes: sum of all possible capacities
* @volatile_only_bytes: hard volatile capacity * @volatile_only_bytes: hard volatile capacity
* @persistent_only_bytes: hard persistent capacity * @persistent_only_bytes: hard persistent capacity
@ -485,8 +451,6 @@ struct cxl_memdev_state {
struct cxl_dev_state cxlds; struct cxl_dev_state cxlds;
size_t lsa_size; size_t lsa_size;
char firmware_version[0x10]; char firmware_version[0x10];
DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
u64 total_bytes; u64 total_bytes;
u64 volatile_only_bytes; u64 volatile_only_bytes;
u64 persistent_only_bytes; u64 persistent_only_bytes;
@ -530,6 +494,9 @@ enum cxl_opcode {
CXL_MBOX_OP_GET_LOG_CAPS = 0x0402, CXL_MBOX_OP_GET_LOG_CAPS = 0x0402,
CXL_MBOX_OP_CLEAR_LOG = 0x0403, CXL_MBOX_OP_CLEAR_LOG = 0x0403,
CXL_MBOX_OP_GET_SUP_LOG_SUBLIST = 0x0405, CXL_MBOX_OP_GET_SUP_LOG_SUBLIST = 0x0405,
CXL_MBOX_OP_GET_SUPPORTED_FEATURES = 0x0500,
CXL_MBOX_OP_GET_FEATURE = 0x0501,
CXL_MBOX_OP_SET_FEATURE = 0x0502,
CXL_MBOX_OP_IDENTIFY = 0x4000, CXL_MBOX_OP_IDENTIFY = 0x4000,
CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100, CXL_MBOX_OP_GET_PARTITION_INFO = 0x4100,
CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101, CXL_MBOX_OP_SET_PARTITION_INFO = 0x4101,

View File

@ -997,6 +997,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc) if (rc)
return rc; return rc;
rc = devm_cxl_setup_features(cxlds);
if (rc)
dev_dbg(&pdev->dev, "No CXL Features discovered\n");
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
if (IS_ERR(cxlmd)) if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd); return PTR_ERR(cxlmd);
@ -1009,6 +1013,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc) if (rc)
return rc; return rc;
rc = devm_cxl_setup_fwctl(cxlmd);
if (rc)
dev_dbg(&pdev->dev, "No CXL FWCTL setup\n");
pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
if (pmu_count < 0) if (pmu_count < 0)
return pmu_count; return pmu_count;

33
drivers/fwctl/Kconfig Normal file
View File

@ -0,0 +1,33 @@
# SPDX-License-Identifier: GPL-2.0-only
menuconfig FWCTL
tristate "fwctl device firmware access framework"
help
fwctl provides a userspace API for restricted access to communicate
with on-device firmware. The communication channel is intended to
support a wide range of lockdown compatible device behaviors including
manipulating device FLASH, debugging, and other activities that don't
fit neatly into an existing subsystem.
if FWCTL
config FWCTL_MLX5
tristate "mlx5 ConnectX control fwctl driver"
depends on MLX5_CORE
help
MLX5 provides interface for the user process to access the debug and
configuration registers of the ConnectX hardware family
(NICs, PCI switches and SmartNIC SoCs).
This will allow configuration and debug tools to work out of the box on
mainstream kernel.
If you don't know what to do here, say N.
config FWCTL_PDS
tristate "AMD/Pensando pds fwctl driver"
depends on PDS_CORE
help
The pds_fwctl driver provides an fwctl interface for a user process
to access the debug and configuration information of the AMD/Pensando
DSC hardware family.
If you don't know what to do here, say N.
endif

6
drivers/fwctl/Makefile Normal file
View File

@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_FWCTL) += fwctl.o
obj-$(CONFIG_FWCTL_MLX5) += mlx5/
obj-$(CONFIG_FWCTL_PDS) += pds/
fwctl-y += main.o

421
drivers/fwctl/main.c Normal file
View File

@ -0,0 +1,421 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
*/
#define pr_fmt(fmt) "fwctl: " fmt
#include <linux/fwctl.h>
#include <linux/container_of.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <uapi/fwctl/fwctl.h>
enum {
FWCTL_MAX_DEVICES = 4096,
MAX_RPC_LEN = SZ_2M,
};
static_assert(FWCTL_MAX_DEVICES < (1U << MINORBITS));
static dev_t fwctl_dev;
static DEFINE_IDA(fwctl_ida);
static unsigned long fwctl_tainted;
struct fwctl_ucmd {
struct fwctl_uctx *uctx;
void __user *ubuffer;
void *cmd;
u32 user_size;
};
static int ucmd_respond(struct fwctl_ucmd *ucmd, size_t cmd_len)
{
if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
min_t(size_t, ucmd->user_size, cmd_len)))
return -EFAULT;
return 0;
}
static int copy_to_user_zero_pad(void __user *to, const void *from,
size_t from_len, size_t user_len)
{
size_t copy_len;
copy_len = min(from_len, user_len);
if (copy_to_user(to, from, copy_len))
return -EFAULT;
if (copy_len < user_len) {
if (clear_user(to + copy_len, user_len - copy_len))
return -EFAULT;
}
return 0;
}
static int fwctl_cmd_info(struct fwctl_ucmd *ucmd)
{
struct fwctl_device *fwctl = ucmd->uctx->fwctl;
struct fwctl_info *cmd = ucmd->cmd;
size_t driver_info_len = 0;
if (cmd->flags)
return -EOPNOTSUPP;
if (!fwctl->ops->info && cmd->device_data_len) {
if (clear_user(u64_to_user_ptr(cmd->out_device_data),
cmd->device_data_len))
return -EFAULT;
} else if (cmd->device_data_len) {
void *driver_info __free(kfree) =
fwctl->ops->info(ucmd->uctx, &driver_info_len);
if (IS_ERR(driver_info))
return PTR_ERR(driver_info);
if (copy_to_user_zero_pad(u64_to_user_ptr(cmd->out_device_data),
driver_info, driver_info_len,
cmd->device_data_len))
return -EFAULT;
}
cmd->out_device_type = fwctl->ops->device_type;
cmd->device_data_len = driver_info_len;
return ucmd_respond(ucmd, sizeof(*cmd));
}
static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd)
{
struct fwctl_device *fwctl = ucmd->uctx->fwctl;
struct fwctl_rpc *cmd = ucmd->cmd;
size_t out_len;
if (cmd->in_len > MAX_RPC_LEN || cmd->out_len > MAX_RPC_LEN)
return -EMSGSIZE;
switch (cmd->scope) {
case FWCTL_RPC_CONFIGURATION:
case FWCTL_RPC_DEBUG_READ_ONLY:
break;
case FWCTL_RPC_DEBUG_WRITE_FULL:
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
fallthrough;
case FWCTL_RPC_DEBUG_WRITE:
if (!test_and_set_bit(0, &fwctl_tainted)) {
dev_warn(
&fwctl->dev,
"%s(%d): has requested full access to the physical device device",
current->comm, task_pid_nr(current));
add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK);
}
break;
default:
return -EOPNOTSUPP;
}
void *inbuf __free(kvfree) = kvzalloc(cmd->in_len, GFP_KERNEL_ACCOUNT);
if (!inbuf)
return -ENOMEM;
if (copy_from_user(inbuf, u64_to_user_ptr(cmd->in), cmd->in_len))
return -EFAULT;
out_len = cmd->out_len;
void *outbuf __free(kvfree) = fwctl->ops->fw_rpc(
ucmd->uctx, cmd->scope, inbuf, cmd->in_len, &out_len);
if (IS_ERR(outbuf))
return PTR_ERR(outbuf);
if (outbuf == inbuf) {
/* The driver can re-use inbuf as outbuf */
inbuf = NULL;
}
if (copy_to_user(u64_to_user_ptr(cmd->out), outbuf,
min(cmd->out_len, out_len)))
return -EFAULT;
cmd->out_len = out_len;
return ucmd_respond(ucmd, sizeof(*cmd));
}
/* On stack memory for the ioctl structs */
union fwctl_ucmd_buffer {
struct fwctl_info info;
struct fwctl_rpc rpc;
};
struct fwctl_ioctl_op {
unsigned int size;
unsigned int min_size;
unsigned int ioctl_num;
int (*execute)(struct fwctl_ucmd *ucmd);
};
#define IOCTL_OP(_ioctl, _fn, _struct, _last) \
[_IOC_NR(_ioctl) - FWCTL_CMD_BASE] = { \
.size = sizeof(_struct) + \
BUILD_BUG_ON_ZERO(sizeof(union fwctl_ucmd_buffer) < \
sizeof(_struct)), \
.min_size = offsetofend(_struct, _last), \
.ioctl_num = _ioctl, \
.execute = _fn, \
}
static const struct fwctl_ioctl_op fwctl_ioctl_ops[] = {
IOCTL_OP(FWCTL_INFO, fwctl_cmd_info, struct fwctl_info, out_device_data),
IOCTL_OP(FWCTL_RPC, fwctl_cmd_rpc, struct fwctl_rpc, out),
};
static long fwctl_fops_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct fwctl_uctx *uctx = filp->private_data;
const struct fwctl_ioctl_op *op;
struct fwctl_ucmd ucmd = {};
union fwctl_ucmd_buffer buf;
unsigned int nr;
int ret;
nr = _IOC_NR(cmd);
if ((nr - FWCTL_CMD_BASE) >= ARRAY_SIZE(fwctl_ioctl_ops))
return -ENOIOCTLCMD;
op = &fwctl_ioctl_ops[nr - FWCTL_CMD_BASE];
if (op->ioctl_num != cmd)
return -ENOIOCTLCMD;
ucmd.uctx = uctx;
ucmd.cmd = &buf;
ucmd.ubuffer = (void __user *)arg;
ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
if (ret)
return ret;
if (ucmd.user_size < op->min_size)
return -EINVAL;
ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
ucmd.user_size);
if (ret)
return ret;
guard(rwsem_read)(&uctx->fwctl->registration_lock);
if (!uctx->fwctl->ops)
return -ENODEV;
return op->execute(&ucmd);
}
static int fwctl_fops_open(struct inode *inode, struct file *filp)
{
struct fwctl_device *fwctl =
container_of(inode->i_cdev, struct fwctl_device, cdev);
int ret;
guard(rwsem_read)(&fwctl->registration_lock);
if (!fwctl->ops)
return -ENODEV;
struct fwctl_uctx *uctx __free(kfree) =
kzalloc(fwctl->ops->uctx_size, GFP_KERNEL_ACCOUNT);
if (!uctx)
return -ENOMEM;
uctx->fwctl = fwctl;
ret = fwctl->ops->open_uctx(uctx);
if (ret)
return ret;
scoped_guard(mutex, &fwctl->uctx_list_lock) {
list_add_tail(&uctx->uctx_list_entry, &fwctl->uctx_list);
}
get_device(&fwctl->dev);
filp->private_data = no_free_ptr(uctx);
return 0;
}
static void fwctl_destroy_uctx(struct fwctl_uctx *uctx)
{
lockdep_assert_held(&uctx->fwctl->uctx_list_lock);
list_del(&uctx->uctx_list_entry);
uctx->fwctl->ops->close_uctx(uctx);
}
static int fwctl_fops_release(struct inode *inode, struct file *filp)
{
struct fwctl_uctx *uctx = filp->private_data;
struct fwctl_device *fwctl = uctx->fwctl;
scoped_guard(rwsem_read, &fwctl->registration_lock) {
/*
* NULL ops means fwctl_unregister() has already removed the
* driver and destroyed the uctx.
*/
if (fwctl->ops) {
guard(mutex)(&fwctl->uctx_list_lock);
fwctl_destroy_uctx(uctx);
}
}
kfree(uctx);
fwctl_put(fwctl);
return 0;
}
static const struct file_operations fwctl_fops = {
.owner = THIS_MODULE,
.open = fwctl_fops_open,
.release = fwctl_fops_release,
.unlocked_ioctl = fwctl_fops_ioctl,
};
static void fwctl_device_release(struct device *device)
{
struct fwctl_device *fwctl =
container_of(device, struct fwctl_device, dev);
ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev);
mutex_destroy(&fwctl->uctx_list_lock);
kfree(fwctl);
}
static char *fwctl_devnode(const struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "fwctl/%s", dev_name(dev));
}
static struct class fwctl_class = {
.name = "fwctl",
.dev_release = fwctl_device_release,
.devnode = fwctl_devnode,
};
static struct fwctl_device *
_alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
{
struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
int devnum;
if (!fwctl)
return NULL;
devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
if (devnum < 0)
return NULL;
fwctl->dev.devt = fwctl_dev + devnum;
fwctl->dev.class = &fwctl_class;
fwctl->dev.parent = parent;
init_rwsem(&fwctl->registration_lock);
mutex_init(&fwctl->uctx_list_lock);
INIT_LIST_HEAD(&fwctl->uctx_list);
device_initialize(&fwctl->dev);
return_ptr(fwctl);
}
/* Drivers use the fwctl_alloc_device() wrapper */
struct fwctl_device *_fwctl_alloc_device(struct device *parent,
const struct fwctl_ops *ops,
size_t size)
{
struct fwctl_device *fwctl __free(fwctl) =
_alloc_device(parent, ops, size);
if (!fwctl)
return NULL;
cdev_init(&fwctl->cdev, &fwctl_fops);
/*
* The driver module is protected by fwctl_register/unregister(),
* unregister won't complete until we are done with the driver's module.
*/
fwctl->cdev.owner = THIS_MODULE;
if (dev_set_name(&fwctl->dev, "fwctl%d", fwctl->dev.devt - fwctl_dev))
return NULL;
fwctl->ops = ops;
return_ptr(fwctl);
}
EXPORT_SYMBOL_NS_GPL(_fwctl_alloc_device, "FWCTL");
/**
* fwctl_register - Register a new device to the subsystem
* @fwctl: Previously allocated fwctl_device
*
* On return the device is visible through sysfs and /dev, driver ops may be
* called.
*/
int fwctl_register(struct fwctl_device *fwctl)
{
return cdev_device_add(&fwctl->cdev, &fwctl->dev);
}
EXPORT_SYMBOL_NS_GPL(fwctl_register, "FWCTL");
/**
* fwctl_unregister - Unregister a device from the subsystem
* @fwctl: Previously allocated and registered fwctl_device
*
* Undoes fwctl_register(). On return no driver ops will be called. The
* caller must still call fwctl_put() to free the fwctl.
*
* Unregister will return even if userspace still has file descriptors open.
* This will call ops->close_uctx() on any open FDs and after return no driver
* op will be called. The FDs remain open but all fops will return -ENODEV.
*
* The design of fwctl allows this sort of disassociation of the driver from the
* subsystem primarily by keeping memory allocations owned by the core subsytem.
* The fwctl_device and fwctl_uctx can both be freed without requiring a driver
* callback. This allows the module to remain unlocked while FDs are open.
*/
void fwctl_unregister(struct fwctl_device *fwctl)
{
struct fwctl_uctx *uctx;
cdev_device_del(&fwctl->cdev, &fwctl->dev);
/* Disable and free the driver's resources for any still open FDs. */
guard(rwsem_write)(&fwctl->registration_lock);
guard(mutex)(&fwctl->uctx_list_lock);
while ((uctx = list_first_entry_or_null(&fwctl->uctx_list,
struct fwctl_uctx,
uctx_list_entry)))
fwctl_destroy_uctx(uctx);
/*
* The driver module may unload after this returns, the op pointer will
* not be valid.
*/
fwctl->ops = NULL;
}
EXPORT_SYMBOL_NS_GPL(fwctl_unregister, "FWCTL");
static int __init fwctl_init(void)
{
int ret;
ret = alloc_chrdev_region(&fwctl_dev, 0, FWCTL_MAX_DEVICES, "fwctl");
if (ret)
return ret;
ret = class_register(&fwctl_class);
if (ret)
goto err_chrdev;
return 0;
err_chrdev:
unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES);
return ret;
}
static void __exit fwctl_exit(void)
{
class_unregister(&fwctl_class);
unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES);
}
module_init(fwctl_init);
module_exit(fwctl_exit);
MODULE_DESCRIPTION("fwctl device firmware access framework");
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_FWCTL_MLX5) += mlx5_fwctl.o
mlx5_fwctl-y += main.o

411
drivers/fwctl/mlx5/main.c Normal file
View File

@ -0,0 +1,411 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
/*
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
*/
#include <linux/fwctl.h>
#include <linux/auxiliary_bus.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/driver.h>
#include <uapi/fwctl/mlx5.h>
#define mlx5ctl_err(mcdev, format, ...) \
dev_err(&mcdev->fwctl.dev, format, ##__VA_ARGS__)
#define mlx5ctl_dbg(mcdev, format, ...) \
dev_dbg(&mcdev->fwctl.dev, "PID %u: " format, current->pid, \
##__VA_ARGS__)
struct mlx5ctl_uctx {
struct fwctl_uctx uctx;
u32 uctx_caps;
u32 uctx_uid;
};
struct mlx5ctl_dev {
struct fwctl_device fwctl;
struct mlx5_core_dev *mdev;
};
DEFINE_FREE(mlx5ctl, struct mlx5ctl_dev *, if (_T) fwctl_put(&_T->fwctl));
struct mlx5_ifc_mbox_in_hdr_bits {
u8 opcode[0x10];
u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
u8 reserved_at_40[0x40];
};
struct mlx5_ifc_mbox_out_hdr_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
u8 syndrome[0x20];
u8 reserved_at_40[0x40];
};
enum {
MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES = 0x4,
};
enum {
MLX5_CMD_OP_QUERY_DRIVER_VERSION = 0x10c,
MLX5_CMD_OP_QUERY_OTHER_HCA_CAP = 0x10e,
MLX5_CMD_OP_QUERY_RDB = 0x512,
MLX5_CMD_OP_QUERY_PSV = 0x602,
MLX5_CMD_OP_QUERY_DC_CNAK_TRACE = 0x716,
MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER = 0x722,
MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT = 0x728,
MLX5_CMD_OP_QUERY_BURST_SIZE = 0x813,
MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS = 0x819,
MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS = 0x820,
MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS = 0x821,
MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS = 0x911,
MLX5_CMD_OP_QUERY_AFU = 0x971,
MLX5_CMD_OP_QUERY_CAPI_PEC = 0x981,
MLX5_CMD_OP_QUERY_UCTX = 0xa05,
MLX5_CMD_OP_QUERY_UMEM = 0xa09,
MLX5_CMD_OP_QUERY_NVMF_CC_RESPONSE = 0xb02,
MLX5_CMD_OP_QUERY_EMULATED_FUNCTIONS_INFO = 0xb03,
MLX5_CMD_OP_QUERY_REGEXP_PARAMS = 0xb05,
MLX5_CMD_OP_QUERY_REGEXP_REGISTER = 0xb07,
MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY = 0xb08,
MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS = 0xb0a,
MLX5_CMD_OP_ACCESS_REGISTER_USER = 0xb0c,
MLX5_CMD_OP_QUERY_EMULATION_DEVICE_EQ_MSIX_MAPPING = 0xb0f,
MLX5_CMD_OP_QUERY_MATCH_SAMPLE_INFO = 0xb13,
MLX5_CMD_OP_QUERY_CRYPTO_STATE = 0xb14,
MLX5_CMD_OP_QUERY_VUID = 0xb22,
MLX5_CMD_OP_QUERY_DPA_PARTITION = 0xb28,
MLX5_CMD_OP_QUERY_DPA_PARTITIONS = 0xb2a,
MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT = 0xb2e,
MLX5_CMD_OP_QUERY_EMULATED_RESOURCES_INFO = 0xb2f,
MLX5_CMD_OP_QUERY_RSV_RESOURCES = 0x8000,
MLX5_CMD_OP_QUERY_MTT = 0x8001,
MLX5_CMD_OP_QUERY_SCHED_QUEUE = 0x8006,
};
static int mlx5ctl_alloc_uid(struct mlx5ctl_dev *mcdev, u32 cap)
{
u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
void *uctx;
int ret;
u16 uid;
uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
mlx5ctl_dbg(mcdev, "%s: caps 0x%x\n", __func__, cap);
MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
MLX5_SET(uctx, uctx, cap, cap);
ret = mlx5_cmd_exec(mcdev->mdev, in, sizeof(in), out, sizeof(out));
if (ret)
return ret;
uid = MLX5_GET(create_uctx_out, out, uid);
mlx5ctl_dbg(mcdev, "allocated uid %u with caps 0x%x\n", uid, cap);
return uid;
}
static void mlx5ctl_release_uid(struct mlx5ctl_dev *mcdev, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
struct mlx5_core_dev *mdev = mcdev->mdev;
int ret;
MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
MLX5_SET(destroy_uctx_in, in, uid, uid);
ret = mlx5_cmd_exec_in(mdev, destroy_uctx, in);
mlx5ctl_dbg(mcdev, "released uid %u %pe\n", uid, ERR_PTR(ret));
}
static int mlx5ctl_open_uctx(struct fwctl_uctx *uctx)
{
struct mlx5ctl_uctx *mfd =
container_of(uctx, struct mlx5ctl_uctx, uctx);
struct mlx5ctl_dev *mcdev =
container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
int uid;
/*
* New FW supports the TOOLS_RESOURCES uid security label
* which allows commands to manipulate the global device state.
* Otherwise only basic existing RDMA devx privilege are allowed.
*/
if (MLX5_CAP_GEN(mcdev->mdev, uctx_cap) &
MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES)
mfd->uctx_caps |= MLX5_UCTX_OBJECT_CAP_TOOLS_RESOURCES;
uid = mlx5ctl_alloc_uid(mcdev, mfd->uctx_caps);
if (uid < 0)
return uid;
mfd->uctx_uid = uid;
return 0;
}
static void mlx5ctl_close_uctx(struct fwctl_uctx *uctx)
{
struct mlx5ctl_dev *mcdev =
container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
struct mlx5ctl_uctx *mfd =
container_of(uctx, struct mlx5ctl_uctx, uctx);
mlx5ctl_release_uid(mcdev, mfd->uctx_uid);
}
static void *mlx5ctl_info(struct fwctl_uctx *uctx, size_t *length)
{
struct mlx5ctl_uctx *mfd =
container_of(uctx, struct mlx5ctl_uctx, uctx);
struct fwctl_info_mlx5 *info;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
info->uid = mfd->uctx_uid;
info->uctx_caps = mfd->uctx_caps;
*length = sizeof(*info);
return info;
}
static bool mlx5ctl_validate_rpc(const void *in, enum fwctl_rpc_scope scope)
{
u16 opcode = MLX5_GET(mbox_in_hdr, in, opcode);
u16 op_mod = MLX5_GET(mbox_in_hdr, in, op_mod);
/*
* Currently the driver can't keep track of commands that allocate
* objects in the FW, these commands are safe from a security
* perspective but nothing will free the memory when the FD is closed.
* For now permit only query commands and set commands that don't alter
* objects. Also the caps for the scope have not been defined yet,
* filter commands manually for now.
*/
switch (opcode) {
case MLX5_CMD_OP_POSTPONE_CONNECTED_QP_TIMEOUT:
case MLX5_CMD_OP_QUERY_ADAPTER:
case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
case MLX5_CMD_OP_QUERY_HCA_CAP:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_OTHER_HCA_CAP:
case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
case MLX5_CMD_OPCODE_QUERY_VUID:
/*
* FW limits SET_HCA_CAP on the tools UID to only the other function
* mode which is used for function pre-configuration
*/
case MLX5_CMD_OP_SET_HCA_CAP:
return true; /* scope >= FWCTL_RPC_CONFIGURATION; */
case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
case MLX5_CMD_OP_FPGA_QUERY_QP:
case MLX5_CMD_OP_NOP:
case MLX5_CMD_OP_QUERY_AFU:
case MLX5_CMD_OP_QUERY_BURST_SIZE:
case MLX5_CMD_OP_QUERY_CAPI_PEC:
case MLX5_CMD_OP_QUERY_CONG_PARAMS:
case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
case MLX5_CMD_OP_QUERY_CONG_STATUS:
case MLX5_CMD_OP_QUERY_CQ:
case MLX5_CMD_OP_QUERY_CRYPTO_STATE:
case MLX5_CMD_OP_QUERY_DC_CNAK_TRACE:
case MLX5_CMD_OP_QUERY_DCT:
case MLX5_CMD_OP_QUERY_DELAY_DROP_PARAMS:
case MLX5_CMD_OP_QUERY_DIAGNOSTIC_COUNTERS:
case MLX5_CMD_OP_QUERY_DIAGNOSTIC_PARAMS:
case MLX5_CMD_OP_QUERY_DPA_PARTITION:
case MLX5_CMD_OP_QUERY_DPA_PARTITIONS:
case MLX5_CMD_OP_QUERY_DRIVER_VERSION:
case MLX5_CMD_OP_QUERY_EMULATED_FUNCTIONS_INFO:
case MLX5_CMD_OP_QUERY_EMULATED_RESOURCES_INFO:
case MLX5_CMD_OP_QUERY_EMULATION_DEVICE_EQ_MSIX_MAPPING:
case MLX5_CMD_OP_QUERY_EQ:
case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
case MLX5_CMD_OP_QUERY_FLOW_GROUP:
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_QUERY_FLOW_TABLE:
case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
case MLX5_CMD_OP_QUERY_HCA_VPORT_GID:
case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY:
case MLX5_CMD_OP_QUERY_ISSI:
case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
case MLX5_CMD_OP_QUERY_LAG:
case MLX5_CMD_OP_QUERY_MAD_DEMUX:
case MLX5_CMD_OP_QUERY_MATCH_SAMPLE_INFO:
case MLX5_CMD_OP_QUERY_MKEY:
case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
case MLX5_CMD_OP_QUERY_MTT:
case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_NVMF_BACKEND_CONTROLLER:
case MLX5_CMD_OP_QUERY_NVMF_CC_RESPONSE:
case MLX5_CMD_OP_QUERY_NVMF_NAMESPACE_CONTEXT:
case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
case MLX5_CMD_OP_QUERY_PAGES:
case MLX5_CMD_OP_QUERY_PSV:
case MLX5_CMD_OP_QUERY_Q_COUNTER:
case MLX5_CMD_OP_QUERY_QP:
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
case MLX5_CMD_OP_QUERY_RDB:
case MLX5_CMD_OP_QUERY_REGEXP_PARAMS:
case MLX5_CMD_OP_QUERY_REGEXP_REGISTER:
case MLX5_CMD_OP_QUERY_RMP:
case MLX5_CMD_OP_QUERY_RQ:
case MLX5_CMD_OP_QUERY_RQT:
case MLX5_CMD_OP_QUERY_RSV_RESOURCES:
case MLX5_CMD_OP_QUERY_SCHED_QUEUE:
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
case MLX5_CMD_OP_QUERY_SF_PARTITION:
case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
case MLX5_CMD_OP_QUERY_SQ:
case MLX5_CMD_OP_QUERY_SRQ:
case MLX5_CMD_OP_QUERY_TIR:
case MLX5_CMD_OP_QUERY_TIS:
case MLX5_CMD_OP_QUERY_UCTX:
case MLX5_CMD_OP_QUERY_UMEM:
case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
case MLX5_CMD_OP_QUERY_VHCA_STATE:
case MLX5_CMD_OP_QUERY_VNIC_ENV:
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
case MLX5_CMD_OP_QUERY_VPORT_STATE:
case MLX5_CMD_OP_QUERY_WOL_ROL:
case MLX5_CMD_OP_QUERY_XRC_SRQ:
case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
case MLX5_CMD_OP_QUERY_XRQ:
case MLX5_CMD_OP_USER_QUERY_XRQ_DC_PARAMS_ENTRY:
case MLX5_CMD_OP_USER_QUERY_XRQ_ERROR_PARAMS:
return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
case MLX5_CMD_OP_SET_DIAGNOSTIC_PARAMS:
return scope >= FWCTL_RPC_DEBUG_WRITE;
case MLX5_CMD_OP_ACCESS_REG:
case MLX5_CMD_OP_ACCESS_REGISTER_USER:
if (op_mod == 0) /* write */
return true; /* scope >= FWCTL_RPC_CONFIGURATION; */
return scope >= FWCTL_RPC_DEBUG_READ_ONLY;
default:
return false;
}
}
static void *mlx5ctl_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
void *rpc_in, size_t in_len, size_t *out_len)
{
struct mlx5ctl_dev *mcdev =
container_of(uctx->fwctl, struct mlx5ctl_dev, fwctl);
struct mlx5ctl_uctx *mfd =
container_of(uctx, struct mlx5ctl_uctx, uctx);
void *rpc_out;
int ret;
if (in_len < MLX5_ST_SZ_BYTES(mbox_in_hdr) ||
*out_len < MLX5_ST_SZ_BYTES(mbox_out_hdr))
return ERR_PTR(-EMSGSIZE);
mlx5ctl_dbg(mcdev, "[UID %d] cmdif: opcode 0x%x inlen %zu outlen %zu\n",
mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
in_len, *out_len);
if (!mlx5ctl_validate_rpc(rpc_in, scope))
return ERR_PTR(-EBADMSG);
/*
* mlx5_cmd_do() copies the input message to its own buffer before
* executing it, so we can reuse the allocation for the output.
*/
if (*out_len <= in_len) {
rpc_out = rpc_in;
} else {
rpc_out = kvzalloc(*out_len, GFP_KERNEL);
if (!rpc_out)
return ERR_PTR(-ENOMEM);
}
/* Enforce the user context for the command */
MLX5_SET(mbox_in_hdr, rpc_in, uid, mfd->uctx_uid);
ret = mlx5_cmd_do(mcdev->mdev, rpc_in, in_len, rpc_out, *out_len);
mlx5ctl_dbg(mcdev,
"[UID %d] cmdif: opcode 0x%x status 0x%x retval %pe\n",
mfd->uctx_uid, MLX5_GET(mbox_in_hdr, rpc_in, opcode),
MLX5_GET(mbox_out_hdr, rpc_out, status), ERR_PTR(ret));
/*
* -EREMOTEIO means execution succeeded and the out is valid,
* but an error code was returned inside out. Everything else
* means the RPC did not make it to the device.
*/
if (ret && ret != -EREMOTEIO) {
if (rpc_out != rpc_in)
kfree(rpc_out);
return ERR_PTR(ret);
}
return rpc_out;
}
static const struct fwctl_ops mlx5ctl_ops = {
.device_type = FWCTL_DEVICE_TYPE_MLX5,
.uctx_size = sizeof(struct mlx5ctl_uctx),
.open_uctx = mlx5ctl_open_uctx,
.close_uctx = mlx5ctl_close_uctx,
.info = mlx5ctl_info,
.fw_rpc = mlx5ctl_fw_rpc,
};
static int mlx5ctl_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
struct mlx5_core_dev *mdev = madev->mdev;
struct mlx5ctl_dev *mcdev __free(mlx5ctl) = fwctl_alloc_device(
&mdev->pdev->dev, &mlx5ctl_ops, struct mlx5ctl_dev, fwctl);
int ret;
if (!mcdev)
return -ENOMEM;
mcdev->mdev = mdev;
ret = fwctl_register(&mcdev->fwctl);
if (ret)
return ret;
auxiliary_set_drvdata(adev, no_free_ptr(mcdev));
return 0;
}
static void mlx5ctl_remove(struct auxiliary_device *adev)
{
struct mlx5ctl_dev *mcdev = auxiliary_get_drvdata(adev);
fwctl_unregister(&mcdev->fwctl);
fwctl_put(&mcdev->fwctl);
}
static const struct auxiliary_device_id mlx5ctl_id_table[] = {
{.name = MLX5_ADEV_NAME ".fwctl",},
{}
};
MODULE_DEVICE_TABLE(auxiliary, mlx5ctl_id_table);
static struct auxiliary_driver mlx5ctl_driver = {
.name = "mlx5_fwctl",
.probe = mlx5ctl_probe,
.remove = mlx5ctl_remove,
.id_table = mlx5ctl_id_table,
};
module_auxiliary_driver(mlx5ctl_driver);
MODULE_IMPORT_NS("FWCTL");
MODULE_DESCRIPTION("mlx5 ConnectX fwctl driver");
MODULE_AUTHOR("Saeed Mahameed <saeedm@nvidia.com>");
MODULE_LICENSE("Dual BSD/GPL");

View File

@ -0,0 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_FWCTL_PDS) += pds_fwctl.o
pds_fwctl-y += main.o

536
drivers/fwctl/pds/main.c Normal file
View File

@ -0,0 +1,536 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) Advanced Micro Devices, Inc */
#include <linux/module.h>
#include <linux/auxiliary_bus.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/bitfield.h>
#include <uapi/fwctl/fwctl.h>
#include <uapi/fwctl/pds.h>
#include <linux/fwctl.h>
#include <linux/pds/pds_common.h>
#include <linux/pds/pds_core_if.h>
#include <linux/pds/pds_adminq.h>
#include <linux/pds/pds_auxbus.h>
struct pdsfc_uctx {
struct fwctl_uctx uctx;
u32 uctx_caps;
};
struct pdsfc_rpc_endpoint_info {
u32 endpoint;
dma_addr_t operations_pa;
struct pds_fwctl_query_data *operations;
struct mutex lock; /* lock for endpoint info management */
};
struct pdsfc_dev {
struct fwctl_device fwctl;
struct pds_auxiliary_dev *padev;
u32 caps;
struct pds_fwctl_ident ident;
dma_addr_t endpoints_pa;
struct pds_fwctl_query_data *endpoints;
struct pdsfc_rpc_endpoint_info *endpoint_info;
};
static int pdsfc_open_uctx(struct fwctl_uctx *uctx)
{
struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
struct pdsfc_uctx *pdsfc_uctx = container_of(uctx, struct pdsfc_uctx, uctx);
pdsfc_uctx->uctx_caps = pdsfc->caps;
return 0;
}
static void pdsfc_close_uctx(struct fwctl_uctx *uctx)
{
}
static void *pdsfc_info(struct fwctl_uctx *uctx, size_t *length)
{
struct pdsfc_uctx *pdsfc_uctx = container_of(uctx, struct pdsfc_uctx, uctx);
struct fwctl_info_pds *info;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
info->uctx_caps = pdsfc_uctx->uctx_caps;
return info;
}
static int pdsfc_identify(struct pdsfc_dev *pdsfc)
{
struct device *dev = &pdsfc->fwctl.dev;
union pds_core_adminq_comp comp = {0};
union pds_core_adminq_cmd cmd;
struct pds_fwctl_ident *ident;
dma_addr_t ident_pa;
int err;
ident = dma_alloc_coherent(dev->parent, sizeof(*ident), &ident_pa, GFP_KERNEL);
if (!ident) {
dev_err(dev, "Failed to map ident buffer\n");
return -ENOMEM;
}
cmd = (union pds_core_adminq_cmd) {
.fwctl_ident = {
.opcode = PDS_FWCTL_CMD_IDENT,
.version = 0,
.len = cpu_to_le32(sizeof(*ident)),
.ident_pa = cpu_to_le64(ident_pa),
}
};
err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
if (err)
dev_err(dev, "Failed to send adminq cmd opcode: %u err: %d\n",
cmd.fwctl_ident.opcode, err);
else
pdsfc->ident = *ident;
dma_free_coherent(dev->parent, sizeof(*ident), ident, ident_pa);
return err;
}
static void pdsfc_free_endpoints(struct pdsfc_dev *pdsfc)
{
struct device *dev = &pdsfc->fwctl.dev;
int i;
if (!pdsfc->endpoints)
return;
for (i = 0; pdsfc->endpoint_info && i < pdsfc->endpoints->num_entries; i++)
mutex_destroy(&pdsfc->endpoint_info[i].lock);
vfree(pdsfc->endpoint_info);
pdsfc->endpoint_info = NULL;
dma_free_coherent(dev->parent, PAGE_SIZE,
pdsfc->endpoints, pdsfc->endpoints_pa);
pdsfc->endpoints = NULL;
pdsfc->endpoints_pa = DMA_MAPPING_ERROR;
}
static void pdsfc_free_operations(struct pdsfc_dev *pdsfc)
{
struct device *dev = &pdsfc->fwctl.dev;
u32 num_endpoints;
int i;
num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries);
for (i = 0; i < num_endpoints; i++) {
struct pdsfc_rpc_endpoint_info *ei = &pdsfc->endpoint_info[i];
if (ei->operations) {
dma_free_coherent(dev->parent, PAGE_SIZE,
ei->operations, ei->operations_pa);
ei->operations = NULL;
ei->operations_pa = DMA_MAPPING_ERROR;
}
}
}
static struct pds_fwctl_query_data *pdsfc_get_endpoints(struct pdsfc_dev *pdsfc,
dma_addr_t *pa)
{
struct device *dev = &pdsfc->fwctl.dev;
union pds_core_adminq_comp comp = {0};
struct pds_fwctl_query_data *data;
union pds_core_adminq_cmd cmd;
dma_addr_t data_pa;
int err;
data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL);
if (!data) {
dev_err(dev, "Failed to map endpoint list\n");
return ERR_PTR(-ENOMEM);
}
cmd = (union pds_core_adminq_cmd) {
.fwctl_query = {
.opcode = PDS_FWCTL_CMD_QUERY,
.entity = PDS_FWCTL_RPC_ROOT,
.version = 0,
.query_data_buf_len = cpu_to_le32(PAGE_SIZE),
.query_data_buf_pa = cpu_to_le64(data_pa),
}
};
err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
if (err) {
dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n",
cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err);
dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa);
return ERR_PTR(err);
}
*pa = data_pa;
return data;
}
static int pdsfc_init_endpoints(struct pdsfc_dev *pdsfc)
{
struct pds_fwctl_query_data_endpoint *ep_entry;
u32 num_endpoints;
int i;
pdsfc->endpoints = pdsfc_get_endpoints(pdsfc, &pdsfc->endpoints_pa);
if (IS_ERR(pdsfc->endpoints))
return PTR_ERR(pdsfc->endpoints);
num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries);
pdsfc->endpoint_info = vcalloc(num_endpoints,
sizeof(*pdsfc->endpoint_info));
if (!pdsfc->endpoint_info) {
pdsfc_free_endpoints(pdsfc);
return -ENOMEM;
}
ep_entry = (struct pds_fwctl_query_data_endpoint *)pdsfc->endpoints->entries;
for (i = 0; i < num_endpoints; i++) {
mutex_init(&pdsfc->endpoint_info[i].lock);
pdsfc->endpoint_info[i].endpoint = ep_entry[i].id;
}
return 0;
}
static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc,
dma_addr_t *pa, u32 ep)
{
struct pds_fwctl_query_data_operation *entries;
struct device *dev = &pdsfc->fwctl.dev;
union pds_core_adminq_comp comp = {0};
struct pds_fwctl_query_data *data;
union pds_core_adminq_cmd cmd;
dma_addr_t data_pa;
int err;
int i;
/* Query the operations list for the given endpoint */
data = dma_alloc_coherent(dev->parent, PAGE_SIZE, &data_pa, GFP_KERNEL);
if (!data) {
dev_err(dev, "Failed to map operations list\n");
return ERR_PTR(-ENOMEM);
}
cmd = (union pds_core_adminq_cmd) {
.fwctl_query = {
.opcode = PDS_FWCTL_CMD_QUERY,
.entity = PDS_FWCTL_RPC_ENDPOINT,
.version = 0,
.query_data_buf_len = cpu_to_le32(PAGE_SIZE),
.query_data_buf_pa = cpu_to_le64(data_pa),
.ep = cpu_to_le32(ep),
}
};
err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
if (err) {
dev_err(dev, "Failed to send adminq cmd opcode: %u entity: %u err: %d\n",
cmd.fwctl_query.opcode, cmd.fwctl_query.entity, err);
dma_free_coherent(dev->parent, PAGE_SIZE, data, data_pa);
return ERR_PTR(err);
}
*pa = data_pa;
entries = (struct pds_fwctl_query_data_operation *)data->entries;
dev_dbg(dev, "num_entries %d\n", data->num_entries);
for (i = 0; i < data->num_entries; i++) {
/* Translate FW command attribute to fwctl scope */
switch (entries[i].scope) {
case PDSFC_FW_CMD_ATTR_READ:
case PDSFC_FW_CMD_ATTR_WRITE:
case PDSFC_FW_CMD_ATTR_SYNC:
entries[i].scope = FWCTL_RPC_CONFIGURATION;
break;
case PDSFC_FW_CMD_ATTR_DEBUG_READ:
entries[i].scope = FWCTL_RPC_DEBUG_READ_ONLY;
break;
case PDSFC_FW_CMD_ATTR_DEBUG_WRITE:
entries[i].scope = FWCTL_RPC_DEBUG_WRITE;
break;
default:
entries[i].scope = FWCTL_RPC_DEBUG_WRITE_FULL;
break;
}
dev_dbg(dev, "endpoint %d operation: id %x scope %d\n",
ep, entries[i].id, entries[i].scope);
}
return data;
}
static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc,
struct fwctl_rpc_pds *rpc,
enum fwctl_rpc_scope scope)
{
struct pds_fwctl_query_data_operation *op_entry;
struct pdsfc_rpc_endpoint_info *ep_info = NULL;
struct device *dev = &pdsfc->fwctl.dev;
int i;
/* validate rpc in_len & out_len based
* on ident.max_req_sz & max_resp_sz
*/
if (rpc->in.len > pdsfc->ident.max_req_sz) {
dev_dbg(dev, "Invalid request size %u, max %u\n",
rpc->in.len, pdsfc->ident.max_req_sz);
return -EINVAL;
}
if (rpc->out.len > pdsfc->ident.max_resp_sz) {
dev_dbg(dev, "Invalid response size %u, max %u\n",
rpc->out.len, pdsfc->ident.max_resp_sz);
return -EINVAL;
}
for (i = 0; i < pdsfc->endpoints->num_entries; i++) {
if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) {
ep_info = &pdsfc->endpoint_info[i];
break;
}
}
if (!ep_info) {
dev_dbg(dev, "Invalid endpoint %d\n", rpc->in.ep);
return -EINVAL;
}
/* query and cache this endpoint's operations */
mutex_lock(&ep_info->lock);
if (!ep_info->operations) {
struct pds_fwctl_query_data *operations;
operations = pdsfc_get_operations(pdsfc,
&ep_info->operations_pa,
rpc->in.ep);
if (IS_ERR(operations)) {
mutex_unlock(&ep_info->lock);
return -ENOMEM;
}
ep_info->operations = operations;
}
mutex_unlock(&ep_info->lock);
/* reject unsupported and/or out of scope commands */
op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries;
for (i = 0; i < ep_info->operations->num_entries; i++) {
if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) {
if (scope < op_entry[i].scope)
return -EPERM;
return 0;
}
}
dev_dbg(dev, "Invalid operation %d for endpoint %d\n", rpc->in.op, rpc->in.ep);
return -EINVAL;
}
static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
void *in, size_t in_len, size_t *out_len)
{
struct pdsfc_dev *pdsfc = container_of(uctx->fwctl, struct pdsfc_dev, fwctl);
struct device *dev = &uctx->fwctl->dev;
union pds_core_adminq_comp comp = {0};
dma_addr_t out_payload_dma_addr = 0;
dma_addr_t in_payload_dma_addr = 0;
struct fwctl_rpc_pds *rpc = in;
union pds_core_adminq_cmd cmd;
void *out_payload = NULL;
void *in_payload = NULL;
void *out = NULL;
int err;
err = pdsfc_validate_rpc(pdsfc, rpc, scope);
if (err)
return ERR_PTR(err);
if (rpc->in.len > 0) {
in_payload = kzalloc(rpc->in.len, GFP_KERNEL);
if (!in_payload) {
dev_err(dev, "Failed to allocate in_payload\n");
err = -ENOMEM;
goto err_out;
}
if (copy_from_user(in_payload, u64_to_user_ptr(rpc->in.payload),
rpc->in.len)) {
dev_dbg(dev, "Failed to copy in_payload from user\n");
err = -EFAULT;
goto err_in_payload;
}
in_payload_dma_addr = dma_map_single(dev->parent, in_payload,
rpc->in.len, DMA_TO_DEVICE);
err = dma_mapping_error(dev->parent, in_payload_dma_addr);
if (err) {
dev_dbg(dev, "Failed to map in_payload\n");
goto err_in_payload;
}
}
if (rpc->out.len > 0) {
out_payload = kzalloc(rpc->out.len, GFP_KERNEL);
if (!out_payload) {
dev_dbg(dev, "Failed to allocate out_payload\n");
err = -ENOMEM;
goto err_out_payload;
}
out_payload_dma_addr = dma_map_single(dev->parent, out_payload,
rpc->out.len, DMA_FROM_DEVICE);
err = dma_mapping_error(dev->parent, out_payload_dma_addr);
if (err) {
dev_dbg(dev, "Failed to map out_payload\n");
goto err_out_payload;
}
}
cmd = (union pds_core_adminq_cmd) {
.fwctl_rpc = {
.opcode = PDS_FWCTL_CMD_RPC,
.flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP,
.ep = cpu_to_le32(rpc->in.ep),
.op = cpu_to_le32(rpc->in.op),
.req_pa = cpu_to_le64(in_payload_dma_addr),
.req_sz = cpu_to_le32(rpc->in.len),
.resp_pa = cpu_to_le64(out_payload_dma_addr),
.resp_sz = cpu_to_le32(rpc->out.len),
}
};
err = pds_client_adminq_cmd(pdsfc->padev, &cmd, sizeof(cmd), &comp, 0);
if (err) {
dev_dbg(dev, "%s: ep %d op %x req_pa %llx req_sz %d req_sg %d resp_pa %llx resp_sz %d resp_sg %d err %d\n",
__func__, rpc->in.ep, rpc->in.op,
cmd.fwctl_rpc.req_pa, cmd.fwctl_rpc.req_sz, cmd.fwctl_rpc.req_sg_elems,
cmd.fwctl_rpc.resp_pa, cmd.fwctl_rpc.resp_sz, cmd.fwctl_rpc.resp_sg_elems,
err);
goto done;
}
dynamic_hex_dump("out ", DUMP_PREFIX_OFFSET, 16, 1, out_payload, rpc->out.len, true);
if (copy_to_user(u64_to_user_ptr(rpc->out.payload), out_payload, rpc->out.len)) {
dev_dbg(dev, "Failed to copy out_payload to user\n");
out = ERR_PTR(-EFAULT);
goto done;
}
rpc->out.retval = le32_to_cpu(comp.fwctl_rpc.err);
*out_len = in_len;
out = in;
done:
if (out_payload_dma_addr)
dma_unmap_single(dev->parent, out_payload_dma_addr,
rpc->out.len, DMA_FROM_DEVICE);
err_out_payload:
kfree(out_payload);
if (in_payload_dma_addr)
dma_unmap_single(dev->parent, in_payload_dma_addr,
rpc->in.len, DMA_TO_DEVICE);
err_in_payload:
kfree(in_payload);
err_out:
if (err)
return ERR_PTR(err);
return out;
}
static const struct fwctl_ops pdsfc_ops = {
.device_type = FWCTL_DEVICE_TYPE_PDS,
.uctx_size = sizeof(struct pdsfc_uctx),
.open_uctx = pdsfc_open_uctx,
.close_uctx = pdsfc_close_uctx,
.info = pdsfc_info,
.fw_rpc = pdsfc_fw_rpc,
};
static int pdsfc_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct pds_auxiliary_dev *padev =
container_of(adev, struct pds_auxiliary_dev, aux_dev);
struct device *dev = &adev->dev;
struct pdsfc_dev *pdsfc;
int err;
pdsfc = fwctl_alloc_device(&padev->vf_pdev->dev, &pdsfc_ops,
struct pdsfc_dev, fwctl);
if (!pdsfc)
return dev_err_probe(dev, -ENOMEM, "Failed to allocate fwctl device struct\n");
pdsfc->padev = padev;
err = pdsfc_identify(pdsfc);
if (err) {
fwctl_put(&pdsfc->fwctl);
return dev_err_probe(dev, err, "Failed to identify device\n");
}
err = pdsfc_init_endpoints(pdsfc);
if (err) {
fwctl_put(&pdsfc->fwctl);
return dev_err_probe(dev, err, "Failed to init endpoints\n");
}
pdsfc->caps = PDS_FWCTL_QUERY_CAP | PDS_FWCTL_SEND_CAP;
err = fwctl_register(&pdsfc->fwctl);
if (err) {
pdsfc_free_endpoints(pdsfc);
fwctl_put(&pdsfc->fwctl);
return dev_err_probe(dev, err, "Failed to register device\n");
}
auxiliary_set_drvdata(adev, pdsfc);
return 0;
}
static void pdsfc_remove(struct auxiliary_device *adev)
{
struct pdsfc_dev *pdsfc = auxiliary_get_drvdata(adev);
fwctl_unregister(&pdsfc->fwctl);
pdsfc_free_operations(pdsfc);
pdsfc_free_endpoints(pdsfc);
fwctl_put(&pdsfc->fwctl);
}
static const struct auxiliary_device_id pdsfc_id_table[] = {
{.name = PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_FWCTL_STR },
{}
};
MODULE_DEVICE_TABLE(auxiliary, pdsfc_id_table);
static struct auxiliary_driver pdsfc_driver = {
.name = "pds_fwctl",
.probe = pdsfc_probe,
.remove = pdsfc_remove,
.id_table = pdsfc_id_table,
};
module_auxiliary_driver(pdsfc_driver);
MODULE_IMPORT_NS("FWCTL");
MODULE_DESCRIPTION("pds fwctl driver");
MODULE_AUTHOR("Shannon Nelson <shannon.nelson@amd.com>");
MODULE_AUTHOR("Brett Creeley <brett.creeley@amd.com>");
MODULE_LICENSE("GPL");

View File

@ -175,34 +175,32 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf,
return padev; return padev;
} }
int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf) void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf,
struct pds_auxiliary_dev **pd_ptr)
{ {
struct pds_auxiliary_dev *padev; struct pds_auxiliary_dev *padev;
int err = 0;
if (!cf) if (!*pd_ptr)
return -ENODEV; return;
mutex_lock(&pf->config_lock); mutex_lock(&pf->config_lock);
padev = pf->vfs[cf->vf_id].padev; padev = *pd_ptr;
if (padev) { pds_client_unregister(pf, padev->client_id);
pds_client_unregister(pf, padev->client_id); auxiliary_device_delete(&padev->aux_dev);
auxiliary_device_delete(&padev->aux_dev); auxiliary_device_uninit(&padev->aux_dev);
auxiliary_device_uninit(&padev->aux_dev); padev->client_id = 0;
padev->client_id = 0; *pd_ptr = NULL;
}
pf->vfs[cf->vf_id].padev = NULL;
mutex_unlock(&pf->config_lock); mutex_unlock(&pf->config_lock);
return err;
} }
int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf) int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf,
enum pds_core_vif_types vt,
struct pds_auxiliary_dev **pd_ptr)
{ {
struct pds_auxiliary_dev *padev; struct pds_auxiliary_dev *padev;
char devname[PDS_DEVNAME_LEN]; char devname[PDS_DEVNAME_LEN];
enum pds_core_vif_types vt;
unsigned long mask; unsigned long mask;
u16 vt_support; u16 vt_support;
int client_id; int client_id;
@ -211,6 +209,9 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
if (!cf) if (!cf)
return -ENODEV; return -ENODEV;
if (vt >= PDS_DEV_TYPE_MAX)
return -EINVAL;
mutex_lock(&pf->config_lock); mutex_lock(&pf->config_lock);
mask = BIT_ULL(PDSC_S_FW_DEAD) | mask = BIT_ULL(PDSC_S_FW_DEAD) |
@ -222,17 +223,10 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
goto out_unlock; goto out_unlock;
} }
/* We only support vDPA so far, so it is the only one to
* be verified that it is available in the Core device and
* enabled in the devlink param. In the future this might
* become a loop for several VIF types.
*/
/* Verify that the type is supported and enabled. It is not /* Verify that the type is supported and enabled. It is not
* an error if there is no auxbus device support for this * an error if the firmware doesn't support the feature, the
* VF, it just means something else needs to happen with it. * driver just won't set up an auxiliary_device for it.
*/ */
vt = PDS_DEV_TYPE_VDPA;
vt_support = !!le16_to_cpu(pf->dev_ident.vif_types[vt]); vt_support = !!le16_to_cpu(pf->dev_ident.vif_types[vt]);
if (!(vt_support && if (!(vt_support &&
pf->viftype_status[vt].supported && pf->viftype_status[vt].supported &&
@ -258,7 +252,7 @@ int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf)
err = PTR_ERR(padev); err = PTR_ERR(padev);
goto out_unlock; goto out_unlock;
} }
pf->vfs[cf->vf_id].padev = padev; *pd_ptr = padev;
out_unlock: out_unlock:
mutex_unlock(&pf->config_lock); mutex_unlock(&pf->config_lock);

View File

@ -402,6 +402,9 @@ static int pdsc_core_init(struct pdsc *pdsc)
} }
static struct pdsc_viftype pdsc_viftype_defaults[] = { static struct pdsc_viftype pdsc_viftype_defaults[] = {
[PDS_DEV_TYPE_FWCTL] = { .name = PDS_DEV_TYPE_FWCTL_STR,
.vif_id = PDS_DEV_TYPE_FWCTL,
.dl_id = -1 },
[PDS_DEV_TYPE_VDPA] = { .name = PDS_DEV_TYPE_VDPA_STR, [PDS_DEV_TYPE_VDPA] = { .name = PDS_DEV_TYPE_VDPA_STR,
.vif_id = PDS_DEV_TYPE_VDPA, .vif_id = PDS_DEV_TYPE_VDPA,
.dl_id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET }, .dl_id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET },
@ -428,6 +431,10 @@ static int pdsc_viftypes_init(struct pdsc *pdsc)
/* See what the Core device has for support */ /* See what the Core device has for support */
vt_support = !!le16_to_cpu(pdsc->dev_ident.vif_types[vt]); vt_support = !!le16_to_cpu(pdsc->dev_ident.vif_types[vt]);
if (vt == PDS_DEV_TYPE_FWCTL)
pdsc->viftype_status[vt].enabled = true;
dev_dbg(pdsc->dev, "VIF %s is %ssupported\n", dev_dbg(pdsc->dev, "VIF %s is %ssupported\n",
pdsc->viftype_status[vt].name, pdsc->viftype_status[vt].name,
vt_support ? "" : "not "); vt_support ? "" : "not ");

View File

@ -156,6 +156,7 @@ struct pdsc {
struct dentry *dentry; struct dentry *dentry;
struct device *dev; struct device *dev;
struct pdsc_dev_bar bars[PDS_CORE_BARS_MAX]; struct pdsc_dev_bar bars[PDS_CORE_BARS_MAX];
struct pds_auxiliary_dev *padev;
struct pdsc_vf *vfs; struct pdsc_vf *vfs;
int num_vfs; int num_vfs;
int vf_id; int vf_id;
@ -303,8 +304,11 @@ void pdsc_health_thread(struct work_struct *work);
int pdsc_register_notify(struct notifier_block *nb); int pdsc_register_notify(struct notifier_block *nb);
void pdsc_unregister_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb);
void pdsc_notify(unsigned long event, void *data); void pdsc_notify(unsigned long event, void *data);
int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf); int pdsc_auxbus_dev_add(struct pdsc *cf, struct pdsc *pf,
int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf); enum pds_core_vif_types vt,
struct pds_auxiliary_dev **pd_ptr);
void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf,
struct pds_auxiliary_dev **pd_ptr);
void pdsc_process_adminq(struct pdsc_qcq *qcq); void pdsc_process_adminq(struct pdsc_qcq *qcq);
void pdsc_work_thread(struct work_struct *work); void pdsc_work_thread(struct work_struct *work);

View File

@ -56,8 +56,11 @@ int pdsc_dl_enable_set(struct devlink *dl, u32 id,
for (vf_id = 0; vf_id < pdsc->num_vfs; vf_id++) { for (vf_id = 0; vf_id < pdsc->num_vfs; vf_id++) {
struct pdsc *vf = pdsc->vfs[vf_id].vf; struct pdsc *vf = pdsc->vfs[vf_id].vf;
err = ctx->val.vbool ? pdsc_auxbus_dev_add(vf, pdsc) : if (ctx->val.vbool)
pdsc_auxbus_dev_del(vf, pdsc); err = pdsc_auxbus_dev_add(vf, pdsc, vt_entry->vif_id,
&pdsc->vfs[vf_id].padev);
else
pdsc_auxbus_dev_del(vf, pdsc, &pdsc->vfs[vf_id].padev);
} }
return err; return err;

View File

@ -190,7 +190,8 @@ static int pdsc_init_vf(struct pdsc *vf)
devl_unlock(dl); devl_unlock(dl);
pf->vfs[vf->vf_id].vf = vf; pf->vfs[vf->vf_id].vf = vf;
err = pdsc_auxbus_dev_add(vf, pf); err = pdsc_auxbus_dev_add(vf, pf, PDS_DEV_TYPE_VDPA,
&pf->vfs[vf->vf_id].padev);
if (err) { if (err) {
devl_lock(dl); devl_lock(dl);
devl_unregister(dl); devl_unregister(dl);
@ -264,6 +265,10 @@ static int pdsc_init_pf(struct pdsc *pdsc)
mutex_unlock(&pdsc->config_lock); mutex_unlock(&pdsc->config_lock);
err = pdsc_auxbus_dev_add(pdsc, pdsc, PDS_DEV_TYPE_FWCTL, &pdsc->padev);
if (err)
goto err_out_stop;
dl = priv_to_devlink(pdsc); dl = priv_to_devlink(pdsc);
devl_lock(dl); devl_lock(dl);
err = devl_params_register(dl, pdsc_dl_params, err = devl_params_register(dl, pdsc_dl_params,
@ -272,7 +277,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
devl_unlock(dl); devl_unlock(dl);
dev_warn(pdsc->dev, "Failed to register devlink params: %pe\n", dev_warn(pdsc->dev, "Failed to register devlink params: %pe\n",
ERR_PTR(err)); ERR_PTR(err));
goto err_out_stop; goto err_out_del_dev;
} }
hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc); hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
@ -295,6 +300,8 @@ static int pdsc_init_pf(struct pdsc *pdsc)
err_out_unreg_params: err_out_unreg_params:
devlink_params_unregister(dl, pdsc_dl_params, devlink_params_unregister(dl, pdsc_dl_params,
ARRAY_SIZE(pdsc_dl_params)); ARRAY_SIZE(pdsc_dl_params));
err_out_del_dev:
pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
err_out_stop: err_out_stop:
pdsc_stop(pdsc); pdsc_stop(pdsc);
err_out_teardown: err_out_teardown:
@ -417,7 +424,7 @@ static void pdsc_remove(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev); pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf)) { if (!IS_ERR(pf)) {
pdsc_auxbus_dev_del(pdsc, pf); pdsc_auxbus_dev_del(pdsc, pf, &pf->vfs[pdsc->vf_id].padev);
pf->vfs[pdsc->vf_id].vf = NULL; pf->vfs[pdsc->vf_id].vf = NULL;
} }
} else { } else {
@ -426,6 +433,7 @@ static void pdsc_remove(struct pci_dev *pdev)
* shut themselves down. * shut themselves down.
*/ */
pdsc_sriov_configure(pdev, 0); pdsc_sriov_configure(pdev, 0);
pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
timer_shutdown_sync(&pdsc->wdtimer); timer_shutdown_sync(&pdsc->wdtimer);
if (pdsc->wq) if (pdsc->wq)
@ -482,7 +490,10 @@ static void pdsc_reset_prepare(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev); pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf)) if (!IS_ERR(pf))
pdsc_auxbus_dev_del(pdsc, pf); pdsc_auxbus_dev_del(pdsc, pf,
&pf->vfs[pdsc->vf_id].padev);
} else {
pdsc_auxbus_dev_del(pdsc, pdsc, &pdsc->padev);
} }
pdsc_unmap_bars(pdsc); pdsc_unmap_bars(pdsc);
@ -527,7 +538,11 @@ static void pdsc_reset_done(struct pci_dev *pdev)
pf = pdsc_get_pf_struct(pdsc->pdev); pf = pdsc_get_pf_struct(pdsc->pdev);
if (!IS_ERR(pf)) if (!IS_ERR(pf))
pdsc_auxbus_dev_add(pdsc, pf); pdsc_auxbus_dev_add(pdsc, pf, PDS_DEV_TYPE_VDPA,
&pf->vfs[pdsc->vf_id].padev);
} else {
pdsc_auxbus_dev_add(pdsc, pdsc, PDS_DEV_TYPE_FWCTL,
&pdsc->padev);
} }
} }

View File

@ -228,8 +228,15 @@ enum {
MLX5_INTERFACE_PROTOCOL_VNET, MLX5_INTERFACE_PROTOCOL_VNET,
MLX5_INTERFACE_PROTOCOL_DPLL, MLX5_INTERFACE_PROTOCOL_DPLL,
MLX5_INTERFACE_PROTOCOL_FWCTL,
}; };
static bool is_fwctl_supported(struct mlx5_core_dev *dev)
{
/* fwctl is most useful on PFs, prevent fwctl on SFs for now */
return MLX5_CAP_GEN(dev, uctx_cap) && !mlx5_core_is_sf(dev);
}
static const struct mlx5_adev_device { static const struct mlx5_adev_device {
const char *suffix; const char *suffix;
bool (*is_supported)(struct mlx5_core_dev *dev); bool (*is_supported)(struct mlx5_core_dev *dev);
@ -252,6 +259,8 @@ static const struct mlx5_adev_device {
.is_supported = &is_mp_supported }, .is_supported = &is_mp_supported },
[MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll", [MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll",
.is_supported = &is_dpll_supported }, .is_supported = &is_dpll_supported },
[MLX5_INTERFACE_PROTOCOL_FWCTL] = { .suffix = "fwctl",
.is_supported = &is_fwctl_supported },
}; };
int mlx5_adev_idx_alloc(void) int mlx5_adev_idx_alloc(void)

87
include/cxl/features.h Normal file
View File

@ -0,0 +1,87 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright(c) 2024-2025 Intel Corporation. */
#ifndef __CXL_FEATURES_H__
#define __CXL_FEATURES_H__
#include <linux/uuid.h>
#include <linux/fwctl.h>
#include <uapi/cxl/features.h>
/* Feature UUIDs used by the kernel */
#define CXL_FEAT_PATROL_SCRUB_UUID \
UUID_INIT(0x96dad7d6, 0xfde8, 0x482b, 0xa7, 0x33, 0x75, 0x77, 0x4e, \
0x06, 0xdb, 0x8a)
#define CXL_FEAT_ECS_UUID \
UUID_INIT(0xe5b13f22, 0x2328, 0x4a14, 0xb8, 0xba, 0xb9, 0x69, 0x1e, \
0x89, 0x33, 0x86)
#define CXL_FEAT_SPPR_UUID \
UUID_INIT(0x892ba475, 0xfad8, 0x474e, 0x9d, 0x3e, 0x69, 0x2c, 0x91, \
0x75, 0x68, 0xbb)
#define CXL_FEAT_HPPR_UUID \
UUID_INIT(0x80ea4521, 0x786f, 0x4127, 0xaf, 0xb1, 0xec, 0x74, 0x59, \
0xfb, 0x0e, 0x24)
#define CXL_FEAT_CACHELINE_SPARING_UUID \
UUID_INIT(0x96C33386, 0x91dd, 0x44c7, 0x9e, 0xcb, 0xfd, 0xaf, 0x65, \
0x03, 0xba, 0xc4)
#define CXL_FEAT_ROW_SPARING_UUID \
UUID_INIT(0x450ebf67, 0xb135, 0x4f97, 0xa4, 0x98, 0xc2, 0xd5, 0x7f, \
0x27, 0x9b, 0xed)
#define CXL_FEAT_BANK_SPARING_UUID \
UUID_INIT(0x78b79636, 0x90ac, 0x4b64, 0xa4, 0xef, 0xfa, 0xac, 0x5d, \
0x18, 0xa8, 0x63)
#define CXL_FEAT_RANK_SPARING_UUID \
UUID_INIT(0x34dbaff5, 0x0552, 0x4281, 0x8f, 0x76, 0xda, 0x0b, 0x5e, \
0x7a, 0x76, 0xa7)
/* Feature commands capability supported by a device */
enum cxl_features_capability {
CXL_FEATURES_NONE = 0,
CXL_FEATURES_RO,
CXL_FEATURES_RW,
};
/**
* struct cxl_features_state - The Features state for the device
* @cxlds: Pointer to CXL device state
* @entries: CXl feature entry context
*/
struct cxl_features_state {
struct cxl_dev_state *cxlds;
struct cxl_feat_entries {
int num_features;
int num_user_features;
struct cxl_feat_entry ent[] __counted_by(num_features);
} *entries;
};
struct cxl_mailbox;
struct cxl_memdev;
#ifdef CONFIG_CXL_FEATURES
inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds);
int devm_cxl_setup_features(struct cxl_dev_state *cxlds);
int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd);
#else
static inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds)
{
return NULL;
}
static inline int devm_cxl_setup_features(struct cxl_dev_state *cxlds)
{
return -EOPNOTSUPP;
}
static inline int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd)
{
return -EOPNOTSUPP;
}
#endif
#endif

View File

@ -3,24 +3,66 @@
#ifndef __CXL_MBOX_H__ #ifndef __CXL_MBOX_H__
#define __CXL_MBOX_H__ #define __CXL_MBOX_H__
#include <linux/rcuwait.h> #include <linux/rcuwait.h>
#include <cxl/features.h>
#include <uapi/linux/cxl_mem.h>
struct cxl_mbox_cmd; /**
* struct cxl_mbox_cmd - A command to be submitted to hardware.
* @opcode: (input) The command set and command submitted to hardware.
* @payload_in: (input) Pointer to the input payload.
* @payload_out: (output) Pointer to the output payload. Must be allocated by
* the caller.
* @size_in: (input) Number of bytes to load from @payload_in.
* @size_out: (input) Max number of bytes loaded into @payload_out.
* (output) Number of bytes generated by the device. For fixed size
* outputs commands this is always expected to be deterministic. For
* variable sized output commands, it tells the exact number of bytes
* written.
* @min_out: (input) internal command output payload size validation
* @poll_count: (input) Number of timeouts to attempt.
* @poll_interval_ms: (input) Time between mailbox background command polling
* interval timeouts.
* @return_code: (output) Error code returned from hardware.
*
* This is the primary mechanism used to send commands to the hardware.
* All the fields except @payload_* correspond exactly to the fields described in
* Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
* @payload_out are written to, and read from the Command Payload Registers
* defined in CXL 2.0 8.2.8.4.8.
*/
struct cxl_mbox_cmd {
u16 opcode;
void *payload_in;
void *payload_out;
size_t size_in;
size_t size_out;
size_t min_out;
int poll_count;
int poll_interval_ms;
u16 return_code;
};
/** /**
* struct cxl_mailbox - context for CXL mailbox operations * struct cxl_mailbox - context for CXL mailbox operations
* @host: device that hosts the mailbox * @host: device that hosts the mailbox
* @enabled_cmds: mailbox commands that are enabled by the driver
* @exclusive_cmds: mailbox commands that are exclusive to the kernel
* @payload_size: Size of space for payload * @payload_size: Size of space for payload
* (CXL 3.1 8.2.8.4.3 Mailbox Capabilities Register) * (CXL 3.1 8.2.8.4.3 Mailbox Capabilities Register)
* @mbox_mutex: mutex protects device mailbox and firmware * @mbox_mutex: mutex protects device mailbox and firmware
* @mbox_wait: rcuwait for mailbox * @mbox_wait: rcuwait for mailbox
* @mbox_send: @dev specific transport for transmitting mailbox commands * @mbox_send: @dev specific transport for transmitting mailbox commands
* @feat_cap: Features capability
*/ */
struct cxl_mailbox { struct cxl_mailbox {
struct device *host; struct device *host;
DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
size_t payload_size; size_t payload_size;
struct mutex mbox_mutex; /* lock to protect mailbox context */ struct mutex mbox_mutex; /* lock to protect mailbox context */
struct rcuwait mbox_wait; struct rcuwait mbox_wait;
int (*mbox_send)(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd); int (*mbox_send)(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd);
enum cxl_features_capability feat_cap;
}; };
int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host); int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host);

135
include/linux/fwctl.h Normal file
View File

@ -0,0 +1,135 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
*/
#ifndef __LINUX_FWCTL_H
#define __LINUX_FWCTL_H
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/cleanup.h>
#include <uapi/fwctl/fwctl.h>
struct fwctl_device;
struct fwctl_uctx;
/**
* struct fwctl_ops - Driver provided operations
*
* fwctl_unregister() will wait until all excuting ops are completed before it
* returns. Drivers should be mindful to not let their ops run for too long as
* it will block device hot unplug and module unloading.
*/
struct fwctl_ops {
/**
* @device_type: The drivers assigned device_type number. This is uABI.
*/
enum fwctl_device_type device_type;
/**
* @uctx_size: The size of the fwctl_uctx struct to allocate. The first
* bytes of this memory will be a fwctl_uctx. The driver can use the
* remaining bytes as its private memory.
*/
size_t uctx_size;
/**
* @open_uctx: Called when a file descriptor is opened before the uctx
* is ever used.
*/
int (*open_uctx)(struct fwctl_uctx *uctx);
/**
* @close_uctx: Called when the uctx is destroyed, usually when the FD
* is closed.
*/
void (*close_uctx)(struct fwctl_uctx *uctx);
/**
* @info: Implement FWCTL_INFO. Return a kmalloc() memory that is copied
* to out_device_data. On input length indicates the size of the user
* buffer on output it indicates the size of the memory. The driver can
* ignore length on input, the core code will handle everything.
*/
void *(*info)(struct fwctl_uctx *uctx, size_t *length);
/**
* @fw_rpc: Implement FWCTL_RPC. Deliver rpc_in/in_len to the FW and
* return the response and set out_len. rpc_in can be returned as the
* response pointer. Otherwise the returned pointer is freed with
* kvfree().
*/
void *(*fw_rpc)(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope,
void *rpc_in, size_t in_len, size_t *out_len);
};
/**
* struct fwctl_device - Per-driver registration struct
* @dev: The sysfs (class/fwctl/fwctlXX) device
*
* Each driver instance will have one of these structs with the driver private
* data following immediately after. This struct is refcounted, it is freed by
* calling fwctl_put().
*/
struct fwctl_device {
struct device dev;
/* private: */
struct cdev cdev;
/* Protect uctx_list */
struct mutex uctx_list_lock;
struct list_head uctx_list;
/*
* Protect ops, held for write when ops becomes NULL during unregister,
* held for read whenever ops is loaded or an ops function is running.
*/
struct rw_semaphore registration_lock;
const struct fwctl_ops *ops;
};
struct fwctl_device *_fwctl_alloc_device(struct device *parent,
const struct fwctl_ops *ops,
size_t size);
/**
* fwctl_alloc_device - Allocate a fwctl
* @parent: Physical device that provides the FW interface
* @ops: Driver ops to register
* @drv_struct: 'struct driver_fwctl' that holds the struct fwctl_device
* @member: Name of the struct fwctl_device in @drv_struct
*
* This allocates and initializes the fwctl_device embedded in the drv_struct.
* Upon success the pointer must be freed via fwctl_put(). Returns a 'drv_struct
* \*' on success, NULL on error.
*/
#define fwctl_alloc_device(parent, ops, drv_struct, member) \
({ \
static_assert(__same_type(struct fwctl_device, \
((drv_struct *)NULL)->member)); \
static_assert(offsetof(drv_struct, member) == 0); \
(drv_struct *)_fwctl_alloc_device(parent, ops, \
sizeof(drv_struct)); \
})
static inline struct fwctl_device *fwctl_get(struct fwctl_device *fwctl)
{
get_device(&fwctl->dev);
return fwctl;
}
static inline void fwctl_put(struct fwctl_device *fwctl)
{
put_device(&fwctl->dev);
}
DEFINE_FREE(fwctl, struct fwctl_device *, if (_T) fwctl_put(_T));
int fwctl_register(struct fwctl_device *fwctl);
void fwctl_unregister(struct fwctl_device *fwctl);
/**
* struct fwctl_uctx - Per user FD context
* @fwctl: fwctl instance that owns the context
*
* Every FD opened by userspace will get a unique context allocation. Any driver
* private data will follow immediately after.
*/
struct fwctl_uctx {
struct fwctl_device *fwctl;
/* private: */
/* Head at fwctl_device::uctx_list */
struct list_head uctx_list_entry;
};
#endif

View File

@ -74,7 +74,8 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout)
#define TAINT_AUX 16 #define TAINT_AUX 16
#define TAINT_RANDSTRUCT 17 #define TAINT_RANDSTRUCT 17
#define TAINT_TEST 18 #define TAINT_TEST 18
#define TAINT_FLAGS_COUNT 19 #define TAINT_FWCTL 19
#define TAINT_FLAGS_COUNT 20
#define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1)
struct taint_flag { struct taint_flag {

View File

@ -1179,6 +1179,274 @@ struct pds_lm_host_vf_status_cmd {
u8 status; u8 status;
}; };
enum pds_fwctl_cmd_opcode {
PDS_FWCTL_CMD_IDENT = 70,
PDS_FWCTL_CMD_RPC = 71,
PDS_FWCTL_CMD_QUERY = 72,
};
/**
* struct pds_fwctl_cmd - Firmware control command structure
* @opcode: Opcode
* @rsvd: Reserved
* @ep: Endpoint identifier
* @op: Operation identifier
*/
struct pds_fwctl_cmd {
u8 opcode;
u8 rsvd[3];
__le32 ep;
__le32 op;
} __packed;
/**
* struct pds_fwctl_comp - Firmware control completion structure
* @status: Status of the firmware control operation
* @rsvd: Reserved
* @comp_index: Completion index in little-endian format
* @rsvd2: Reserved
* @color: Color bit indicating the state of the completion
*/
struct pds_fwctl_comp {
u8 status;
u8 rsvd;
__le16 comp_index;
u8 rsvd2[11];
u8 color;
} __packed;
/**
* struct pds_fwctl_ident_cmd - Firmware control identification command structure
* @opcode: Operation code for the command
* @rsvd: Reserved
* @version: Interface version
* @rsvd2: Reserved
* @len: Length of the identification data
* @ident_pa: Physical address of the identification data
*/
struct pds_fwctl_ident_cmd {
u8 opcode;
u8 rsvd;
u8 version;
u8 rsvd2;
__le32 len;
__le64 ident_pa;
} __packed;
/* future feature bits here
* enum pds_fwctl_features {
* };
* (compilers don't like empty enums)
*/
/**
* struct pds_fwctl_ident - Firmware control identification structure
* @features: Supported features (enum pds_fwctl_features)
* @version: Interface version
* @rsvd: Reserved
* @max_req_sz: Maximum request size
* @max_resp_sz: Maximum response size
* @max_req_sg_elems: Maximum number of request SGs
* @max_resp_sg_elems: Maximum number of response SGs
*/
struct pds_fwctl_ident {
__le64 features;
u8 version;
u8 rsvd[3];
__le32 max_req_sz;
__le32 max_resp_sz;
u8 max_req_sg_elems;
u8 max_resp_sg_elems;
} __packed;
enum pds_fwctl_query_entity {
PDS_FWCTL_RPC_ROOT = 0,
PDS_FWCTL_RPC_ENDPOINT = 1,
PDS_FWCTL_RPC_OPERATION = 2,
};
#define PDS_FWCTL_RPC_OPCODE_CMD_SHIFT 0
#define PDS_FWCTL_RPC_OPCODE_CMD_MASK GENMASK(15, PDS_FWCTL_RPC_OPCODE_CMD_SHIFT)
#define PDS_FWCTL_RPC_OPCODE_VER_SHIFT 16
#define PDS_FWCTL_RPC_OPCODE_VER_MASK GENMASK(23, PDS_FWCTL_RPC_OPCODE_VER_SHIFT)
#define PDS_FWCTL_RPC_OPCODE_GET_CMD(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_CMD_MASK, op)
#define PDS_FWCTL_RPC_OPCODE_GET_VER(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_VER_MASK, op)
#define PDS_FWCTL_RPC_OPCODE_CMP(op1, op2) \
(PDS_FWCTL_RPC_OPCODE_GET_CMD(op1) == PDS_FWCTL_RPC_OPCODE_GET_CMD(op2) && \
PDS_FWCTL_RPC_OPCODE_GET_VER(op1) <= PDS_FWCTL_RPC_OPCODE_GET_VER(op2))
/*
* FW command attributes that map to the FWCTL scope values
*/
#define PDSFC_FW_CMD_ATTR_READ 0x00
#define PDSFC_FW_CMD_ATTR_DEBUG_READ 0x02
#define PDSFC_FW_CMD_ATTR_WRITE 0x04
#define PDSFC_FW_CMD_ATTR_DEBUG_WRITE 0x08
#define PDSFC_FW_CMD_ATTR_SYNC 0x10
/**
* struct pds_fwctl_query_cmd - Firmware control query command structure
* @opcode: Operation code for the command
* @entity: Entity type to query (enum pds_fwctl_query_entity)
* @version: Version of the query data structure supported by the driver
* @rsvd: Reserved
* @query_data_buf_len: Length of the query data buffer
* @query_data_buf_pa: Physical address of the query data buffer
* @ep: Endpoint identifier to query (when entity is PDS_FWCTL_RPC_ENDPOINT)
* @op: Operation identifier to query (when entity is PDS_FWCTL_RPC_OPERATION)
*
* This structure is used to send a query command to the firmware control
* interface. The structure is packed to ensure there is no padding between
* the fields.
*/
struct pds_fwctl_query_cmd {
u8 opcode;
u8 entity;
u8 version;
u8 rsvd;
__le32 query_data_buf_len;
__le64 query_data_buf_pa;
union {
__le32 ep;
__le32 op;
};
} __packed;
/**
* struct pds_fwctl_query_comp - Firmware control query completion structure
* @status: Status of the query command
* @rsvd: Reserved
* @comp_index: Completion index in little-endian format
* @version: Version of the query data structure returned by firmware. This
* should be less than or equal to the version supported by the driver
* @rsvd2: Reserved
* @color: Color bit indicating the state of the completion
*/
struct pds_fwctl_query_comp {
u8 status;
u8 rsvd;
__le16 comp_index;
u8 version;
u8 rsvd2[2];
u8 color;
} __packed;
/**
* struct pds_fwctl_query_data_endpoint - query data for entity PDS_FWCTL_RPC_ROOT
* @id: The identifier for the data endpoint
*/
struct pds_fwctl_query_data_endpoint {
__le32 id;
} __packed;
/**
* struct pds_fwctl_query_data_operation - query data for entity PDS_FWCTL_RPC_ENDPOINT
* @id: Operation identifier
* @scope: Scope of the operation (enum fwctl_rpc_scope)
* @rsvd: Reserved
*/
struct pds_fwctl_query_data_operation {
__le32 id;
u8 scope;
u8 rsvd[3];
} __packed;
/**
* struct pds_fwctl_query_data - query data structure
* @version: Version of the query data structure
* @rsvd: Reserved
* @num_entries: Number of entries in the union
* @entries: Array of query data entries, depending on the entity type
*/
struct pds_fwctl_query_data {
u8 version;
u8 rsvd[3];
__le32 num_entries;
u8 entries[] __counted_by_le(num_entries);
} __packed;
/**
* struct pds_fwctl_rpc_cmd - Firmware control RPC command
* @opcode: opcode PDS_FWCTL_CMD_RPC
* @rsvd: Reserved
* @flags: Indicates indirect request and/or response handling
* @ep: Endpoint identifier
* @op: Operation identifier
* @inline_req0: Buffer for inline request
* @inline_req1: Buffer for inline request
* @req_pa: Physical address of request data
* @req_sz: Size of the request
* @req_sg_elems: Number of request SGs
* @req_rsvd: Reserved
* @inline_req2: Buffer for inline request
* @resp_pa: Physical address of response data
* @resp_sz: Size of the response
* @resp_sg_elems: Number of response SGs
* @resp_rsvd: Reserved
*/
struct pds_fwctl_rpc_cmd {
u8 opcode;
u8 rsvd;
__le16 flags;
#define PDS_FWCTL_RPC_IND_REQ 0x1
#define PDS_FWCTL_RPC_IND_RESP 0x2
__le32 ep;
__le32 op;
u8 inline_req0[16];
union {
u8 inline_req1[16];
struct {
__le64 req_pa;
__le32 req_sz;
u8 req_sg_elems;
u8 req_rsvd[3];
};
};
union {
u8 inline_req2[16];
struct {
__le64 resp_pa;
__le32 resp_sz;
u8 resp_sg_elems;
u8 resp_rsvd[3];
};
};
} __packed;
/**
* struct pds_sg_elem - Transmit scatter-gather (SG) descriptor element
* @addr: DMA address of SG element data buffer
* @len: Length of SG element data buffer, in bytes
* @rsvd: Reserved
*/
struct pds_sg_elem {
__le64 addr;
__le32 len;
u8 rsvd[4];
} __packed;
/**
* struct pds_fwctl_rpc_comp - Completion of a firmware control RPC
* @status: Status of the command
* @rsvd: Reserved
* @comp_index: Completion index of the command
* @err: Error code, if any, from the RPC
* @resp_sz: Size of the response
* @rsvd2: Reserved
* @color: Color bit indicating the state of the completion
*/
struct pds_fwctl_rpc_comp {
u8 status;
u8 rsvd;
__le16 comp_index;
__le32 err;
__le32 resp_sz;
u8 rsvd2[3];
u8 color;
} __packed;
union pds_core_adminq_cmd { union pds_core_adminq_cmd {
u8 opcode; u8 opcode;
u8 bytes[64]; u8 bytes[64];
@ -1216,6 +1484,11 @@ union pds_core_adminq_cmd {
struct pds_lm_dirty_enable_cmd lm_dirty_enable; struct pds_lm_dirty_enable_cmd lm_dirty_enable;
struct pds_lm_dirty_disable_cmd lm_dirty_disable; struct pds_lm_dirty_disable_cmd lm_dirty_disable;
struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack; struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack;
struct pds_fwctl_cmd fwctl;
struct pds_fwctl_ident_cmd fwctl_ident;
struct pds_fwctl_rpc_cmd fwctl_rpc;
struct pds_fwctl_query_cmd fwctl_query;
}; };
union pds_core_adminq_comp { union pds_core_adminq_comp {
@ -1243,6 +1516,10 @@ union pds_core_adminq_comp {
struct pds_lm_state_size_comp lm_state_size; struct pds_lm_state_size_comp lm_state_size;
struct pds_lm_dirty_status_comp lm_dirty_status; struct pds_lm_dirty_status_comp lm_dirty_status;
struct pds_fwctl_comp fwctl;
struct pds_fwctl_rpc_comp fwctl_rpc;
struct pds_fwctl_query_comp fwctl_query;
}; };
#ifndef __CHECKER__ #ifndef __CHECKER__

View File

@ -29,6 +29,7 @@ enum pds_core_vif_types {
PDS_DEV_TYPE_ETH = 3, PDS_DEV_TYPE_ETH = 3,
PDS_DEV_TYPE_RDMA = 4, PDS_DEV_TYPE_RDMA = 4,
PDS_DEV_TYPE_LM = 5, PDS_DEV_TYPE_LM = 5,
PDS_DEV_TYPE_FWCTL = 6,
/* new ones added before this line */ /* new ones added before this line */
PDS_DEV_TYPE_MAX = 16 /* don't change - used in struct size */ PDS_DEV_TYPE_MAX = 16 /* don't change - used in struct size */
@ -40,6 +41,7 @@ enum pds_core_vif_types {
#define PDS_DEV_TYPE_ETH_STR "Eth" #define PDS_DEV_TYPE_ETH_STR "Eth"
#define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_RDMA_STR "RDMA"
#define PDS_DEV_TYPE_LM_STR "LM" #define PDS_DEV_TYPE_LM_STR "LM"
#define PDS_DEV_TYPE_FWCTL_STR "fwctl"
#define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR #define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR
#define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR #define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR

170
include/uapi/cxl/features.h Normal file
View File

@ -0,0 +1,170 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (c) 2024,2025, Intel Corporation
*
* These are definitions for the mailbox command interface of CXL subsystem.
*/
#ifndef _UAPI_CXL_FEATURES_H_
#define _UAPI_CXL_FEATURES_H_
#include <linux/types.h>
#ifndef __KERNEL__
#include <uuid/uuid.h>
#else
#include <linux/uuid.h>
#endif
/*
* struct cxl_mbox_get_sup_feats_in - Get Supported Features input
*
* @count: bytes of Feature data to return in output
* @start_idx: index of first requested Supported Feature Entry, 0 based.
* @reserved: reserved field, must be 0s.
*
* Get Supported Features (0x500h) CXL r3.2 8.2.9.6.1 command.
* Input block for Get support Feature
*/
struct cxl_mbox_get_sup_feats_in {
__le32 count;
__le16 start_idx;
__u8 reserved[2];
} __attribute__ ((__packed__));
/* CXL spec r3.2 Table 8-87 command effects */
#define CXL_CMD_CONFIG_CHANGE_COLD_RESET BIT(0)
#define CXL_CMD_CONFIG_CHANGE_IMMEDIATE BIT(1)
#define CXL_CMD_DATA_CHANGE_IMMEDIATE BIT(2)
#define CXL_CMD_POLICY_CHANGE_IMMEDIATE BIT(3)
#define CXL_CMD_LOG_CHANGE_IMMEDIATE BIT(4)
#define CXL_CMD_SECURITY_STATE_CHANGE BIT(5)
#define CXL_CMD_BACKGROUND BIT(6)
#define CXL_CMD_BGCMD_ABORT_SUPPORTED BIT(7)
#define CXL_CMD_EFFECTS_VALID BIT(9)
#define CXL_CMD_CONFIG_CHANGE_CONV_RESET BIT(10)
#define CXL_CMD_CONFIG_CHANGE_CXL_RESET BIT(11)
#define CXL_CMD_EFFECTS_RESERVED GENMASK(15, 12)
/*
* struct cxl_feat_entry - Supported Feature Entry
* @uuid: UUID of the Feature
* @id: id to identify the feature. 0 based
* @get_feat_size: max bytes required for Get Feature command for this Feature
* @set_feat_size: max bytes required for Set Feature command for this Feature
* @flags: attribute flags
* @get_feat_ver: Get Feature version
* @set_feat_ver: Set Feature version
* @effects: Set Feature command effects
* @reserved: reserved, must be 0
*
* CXL spec r3.2 Table 8-109
* Get Supported Features Supported Feature Entry
*/
struct cxl_feat_entry {
uuid_t uuid;
__le16 id;
__le16 get_feat_size;
__le16 set_feat_size;
__le32 flags;
__u8 get_feat_ver;
__u8 set_feat_ver;
__le16 effects;
__u8 reserved[18];
} __attribute__ ((__packed__));
/* @flags field for 'struct cxl_feat_entry' */
#define CXL_FEATURE_F_CHANGEABLE BIT(0)
#define CXL_FEATURE_F_PERSIST_FW_UPDATE BIT(4)
#define CXL_FEATURE_F_DEFAULT_SEL BIT(5)
#define CXL_FEATURE_F_SAVED_SEL BIT(6)
/*
* struct cxl_mbox_get_sup_feats_out - Get Supported Features output
* @num_entries: number of Supported Feature Entries returned
* @supported_feats: number of supported Features
* @reserved: reserved, must be 0s.
* @ents: Supported Feature Entries array
*
* CXL spec r3.2 Table 8-108
* Get supported Features Output Payload
*/
struct cxl_mbox_get_sup_feats_out {
__struct_group(cxl_mbox_get_sup_feats_out_hdr, hdr, /* no attrs */,
__le16 num_entries;
__le16 supported_feats;
__u8 reserved[4];
);
struct cxl_feat_entry ents[] __counted_by_le(num_entries);
} __attribute__ ((__packed__));
/*
* Get Feature CXL spec r3.2 Spec 8.2.9.6.2
*/
/*
* struct cxl_mbox_get_feat_in - Get Feature input
* @uuid: UUID for Feature
* @offset: offset of the first byte in Feature data for output payload
* @count: count in bytes of Feature data returned
* @selection: 0 current value, 1 default value, 2 saved value
*
* CXL spec r3.2 section 8.2.9.6.2 Table 8-99
*/
struct cxl_mbox_get_feat_in {
uuid_t uuid;
__le16 offset;
__le16 count;
__u8 selection;
} __attribute__ ((__packed__));
/*
* enum cxl_get_feat_selection - selection field of Get Feature input
*/
enum cxl_get_feat_selection {
CXL_GET_FEAT_SEL_CURRENT_VALUE,
CXL_GET_FEAT_SEL_DEFAULT_VALUE,
CXL_GET_FEAT_SEL_SAVED_VALUE,
CXL_GET_FEAT_SEL_MAX
};
/*
* Set Feature CXL spec r3.2 8.2.9.6.3
*/
/*
* struct cxl_mbox_set_feat_in - Set Features input
* @uuid: UUID for Feature
* @flags: set feature flags
* @offset: byte offset of Feature data to update
* @version: Feature version of the data in Feature Data
* @rsvd: reserved, must be 0s.
* @feat_data: raw byte stream of Features data to update
*
* CXL spec r3.2 section 8.2.9.6.3 Table 8-101
*/
struct cxl_mbox_set_feat_in {
__struct_group(cxl_mbox_set_feat_hdr, hdr, /* no attrs */,
uuid_t uuid;
__le32 flags;
__le16 offset;
__u8 version;
__u8 rsvd[9];
);
__u8 feat_data[];
} __packed;
/*
* enum cxl_set_feat_flag_data_transfer - Set Feature flags field
*/
enum cxl_set_feat_flag_data_transfer {
CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER = 0,
CXL_SET_FEAT_FLAG_INITIATE_DATA_TRANSFER,
CXL_SET_FEAT_FLAG_CONTINUE_DATA_TRANSFER,
CXL_SET_FEAT_FLAG_FINISH_DATA_TRANSFER,
CXL_SET_FEAT_FLAG_ABORT_DATA_TRANSFER,
CXL_SET_FEAT_FLAG_DATA_TRANSFER_MAX
};
#define CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK GENMASK(2, 0)
#define CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET BIT(3)
#endif

56
include/uapi/fwctl/cxl.h Normal file
View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (c) 2024-2025 Intel Corporation
*
* These are definitions for the mailbox command interface of CXL subsystem.
*/
#ifndef _UAPI_FWCTL_CXL_H_
#define _UAPI_FWCTL_CXL_H_
#include <linux/types.h>
#include <linux/stddef.h>
#include <cxl/features.h>
/**
* struct fwctl_rpc_cxl - ioctl(FWCTL_RPC) input for CXL
* @opcode: CXL mailbox command opcode
* @flags: Flags for the command (input).
* @op_size: Size of input payload.
* @reserved1: Reserved. Must be 0s.
* @get_sup_feats_in: Get Supported Features input
* @get_feat_in: Get Feature input
* @set_feat_in: Set Feature input
*/
struct fwctl_rpc_cxl {
__struct_group(fwctl_rpc_cxl_hdr, hdr, /* no attrs */,
__u32 opcode;
__u32 flags;
__u32 op_size;
__u32 reserved1;
);
union {
struct cxl_mbox_get_sup_feats_in get_sup_feats_in;
struct cxl_mbox_get_feat_in get_feat_in;
struct cxl_mbox_set_feat_in set_feat_in;
};
};
/**
* struct fwctl_rpc_cxl_out - ioctl(FWCTL_RPC) output for CXL
* @size: Size of the output payload
* @retval: Return value from device
* @get_sup_feats_out: Get Supported Features output
* @payload: raw byte stream of payload
*/
struct fwctl_rpc_cxl_out {
__struct_group(fwctl_rpc_cxl_out_hdr, hdr, /* no attrs */,
__u32 size;
__u32 retval;
);
union {
struct cxl_mbox_get_sup_feats_out get_sup_feats_out;
__DECLARE_FLEX_ARRAY(__u8, payload);
};
};
#endif

141
include/uapi/fwctl/fwctl.h Normal file
View File

@ -0,0 +1,141 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
*/
#ifndef _UAPI_FWCTL_H
#define _UAPI_FWCTL_H
#include <linux/types.h>
#include <linux/ioctl.h>
#define FWCTL_TYPE 0x9A
/**
* DOC: General ioctl format
*
* The ioctl interface follows a general format to allow for extensibility. Each
* ioctl is passed a structure pointer as the argument providing the size of
* the structure in the first u32. The kernel checks that any structure space
* beyond what it understands is 0. This allows userspace to use the backward
* compatible portion while consistently using the newer, larger, structures.
*
* ioctls use a standard meaning for common errnos:
*
* - ENOTTY: The IOCTL number itself is not supported at all
* - E2BIG: The IOCTL number is supported, but the provided structure has
* non-zero in a part the kernel does not understand.
* - EOPNOTSUPP: The IOCTL number is supported, and the structure is
* understood, however a known field has a value the kernel does not
* understand or support.
* - EINVAL: Everything about the IOCTL was understood, but a field is not
* correct.
* - ENOMEM: Out of memory.
* - ENODEV: The underlying device has been hot-unplugged and the FD is
* orphaned.
*
* As well as additional errnos, within specific ioctls.
*/
enum {
FWCTL_CMD_BASE = 0,
FWCTL_CMD_INFO = 0,
FWCTL_CMD_RPC = 1,
};
enum fwctl_device_type {
FWCTL_DEVICE_TYPE_ERROR = 0,
FWCTL_DEVICE_TYPE_MLX5 = 1,
FWCTL_DEVICE_TYPE_CXL = 2,
FWCTL_DEVICE_TYPE_PDS = 4,
};
/**
* struct fwctl_info - ioctl(FWCTL_INFO)
* @size: sizeof(struct fwctl_info)
* @flags: Must be 0
* @out_device_type: Returns the type of the device from enum fwctl_device_type
* @device_data_len: On input the length of the out_device_data memory. On
* output the size of the kernel's device_data which may be larger or
* smaller than the input. Maybe 0 on input.
* @out_device_data: Pointer to a memory of device_data_len bytes. Kernel will
* fill the entire memory, zeroing as required.
*
* Returns basic information about this fwctl instance, particularly what driver
* is being used to define the device_data format.
*/
struct fwctl_info {
__u32 size;
__u32 flags;
__u32 out_device_type;
__u32 device_data_len;
__aligned_u64 out_device_data;
};
#define FWCTL_INFO _IO(FWCTL_TYPE, FWCTL_CMD_INFO)
/**
* enum fwctl_rpc_scope - Scope of access for the RPC
*
* Refer to fwctl.rst for a more detailed discussion of these scopes.
*/
enum fwctl_rpc_scope {
/**
* @FWCTL_RPC_CONFIGURATION: Device configuration access scope
*
* Read/write access to device configuration. When configuration
* is written to the device it remains in a fully supported state.
*/
FWCTL_RPC_CONFIGURATION = 0,
/**
* @FWCTL_RPC_DEBUG_READ_ONLY: Read only access to debug information
*
* Readable debug information. Debug information is compatible with
* kernel lockdown, and does not disclose any sensitive information. For
* instance exposing any encryption secrets from this information is
* forbidden.
*/
FWCTL_RPC_DEBUG_READ_ONLY = 1,
/**
* @FWCTL_RPC_DEBUG_WRITE: Writable access to lockdown compatible debug information
*
* Allows write access to data in the device which may leave a fully
* supported state. This is intended to permit intensive and possibly
* invasive debugging. This scope will taint the kernel.
*/
FWCTL_RPC_DEBUG_WRITE = 2,
/**
* @FWCTL_RPC_DEBUG_WRITE_FULL: Write access to all debug information
*
* Allows read/write access to everything. Requires CAP_SYS_RAW_IO, so
* it is not required to follow lockdown principals. If in doubt
* debugging should be placed in this scope. This scope will taint the
* kernel.
*/
FWCTL_RPC_DEBUG_WRITE_FULL = 3,
};
/**
* struct fwctl_rpc - ioctl(FWCTL_RPC)
* @size: sizeof(struct fwctl_rpc)
* @scope: One of enum fwctl_rpc_scope, required scope for the RPC
* @in_len: Length of the in memory
* @out_len: Length of the out memory
* @in: Request message in device specific format
* @out: Response message in device specific format
*
* Deliver a Remote Procedure Call to the device FW and return the response. The
* call's parameters and return are marshaled into linear buffers of memory. Any
* errno indicates that delivery of the RPC to the device failed. Return status
* originating in the device during a successful delivery must be encoded into
* out.
*
* The format of the buffers matches the out_device_type from FWCTL_INFO.
*/
struct fwctl_rpc {
__u32 size;
__u32 scope;
__u32 in_len;
__u32 out_len;
__aligned_u64 in;
__aligned_u64 out;
};
#define FWCTL_RPC _IO(FWCTL_TYPE, FWCTL_CMD_RPC)
#endif

36
include/uapi/fwctl/mlx5.h Normal file
View File

@ -0,0 +1,36 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
*
* These are definitions for the command interface for mlx5 HW. mlx5 FW has a
* User Context mechanism which allows the FW to understand a security scope.
* FWCTL binds each FD to a FW user context and then places the User Context ID
* (UID) in each command header. The created User Context has a capability set
* that is appropriate for FWCTL's security model.
*
* Command formation should use a copy of the structs in mlx5_ifc.h following
* the Programmers Reference Manual. A open release is available here:
*
* https://network.nvidia.com/files/doc-2020/ethernet-adapters-programming-manual.pdf
*
* The device_type for this file is FWCTL_DEVICE_TYPE_MLX5.
*/
#ifndef _UAPI_FWCTL_MLX5_H
#define _UAPI_FWCTL_MLX5_H
#include <linux/types.h>
/**
* struct fwctl_info_mlx5 - ioctl(FWCTL_INFO) out_device_data
* @uid: The FW UID this FD is bound to. Each command header will force
* this value.
* @uctx_caps: The FW capabilities that are enabled for the uid.
*
* Return basic information about the FW interface available.
*/
struct fwctl_info_mlx5 {
__u32 uid;
__u32 uctx_caps;
};
#endif

62
include/uapi/fwctl/pds.h Normal file
View File

@ -0,0 +1,62 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* Copyright(c) Advanced Micro Devices, Inc */
/*
* fwctl interface info for pds_fwctl
*/
#ifndef _UAPI_FWCTL_PDS_H_
#define _UAPI_FWCTL_PDS_H_
#include <linux/types.h>
/**
* struct fwctl_info_pds
* @uctx_caps: bitmap of firmware capabilities
*
* Return basic information about the FW interface available.
*/
struct fwctl_info_pds {
__u32 uctx_caps;
};
/**
* enum pds_fwctl_capabilities
* @PDS_FWCTL_QUERY_CAP: firmware can be queried for information
* @PDS_FWCTL_SEND_CAP: firmware can be sent commands
*/
enum pds_fwctl_capabilities {
PDS_FWCTL_QUERY_CAP = 0,
PDS_FWCTL_SEND_CAP,
};
/**
* struct fwctl_rpc_pds
* @in.op: requested operation code
* @in.ep: firmware endpoint to operate on
* @in.rsvd: reserved
* @in.len: length of payload data
* @in.payload: address of payload buffer
* @in: rpc in parameters
* @out.retval: operation result value
* @out.rsvd: reserved
* @out.len: length of result data buffer
* @out.payload: address of payload data buffer
* @out: rpc out parameters
*/
struct fwctl_rpc_pds {
struct {
__u32 op;
__u32 ep;
__u32 rsvd;
__u32 len;
__aligned_u64 payload;
} in;
struct {
__u32 retval;
__u32 rsvd[2];
__u32 len;
__aligned_u64 payload;
} out;
};
#endif /* _UAPI_FWCTL_PDS_H_ */

View File

@ -511,6 +511,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
TAINT_FLAG(AUX, 'X', ' ', true), TAINT_FLAG(AUX, 'X', ' ', true),
TAINT_FLAG(RANDSTRUCT, 'T', ' ', true), TAINT_FLAG(RANDSTRUCT, 'T', ' ', true),
TAINT_FLAG(TEST, 'N', ' ', true), TAINT_FLAG(TEST, 'N', ' ', true),
TAINT_FLAG(FWCTL, 'J', ' ', true),
}; };
#undef TAINT_FLAG #undef TAINT_FLAG

View File

@ -204,6 +204,14 @@ else
echo " * an in-kernel test (such as a KUnit test) has been run (#18)" echo " * an in-kernel test (such as a KUnit test) has been run (#18)"
fi fi
T=`expr $T / 2`
if [ `expr $T % 2` -eq 0 ]; then
addout " "
else
addout "J"
echo " * fwctl's mutating debug interface was used (#19)"
fi
echo "For a more detailed explanation of the various taint flags see" echo "For a more detailed explanation of the various taint flags see"
echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources" echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources"
echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html" echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html"

View File

@ -63,6 +63,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-y += $(CXL_CORE_SRC)/cdat.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
cxl_core-y += config_check.o cxl_core-y += config_check.o
cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_test.o
cxl_core-y += cxl_core_exports.o cxl_core-y += cxl_core_exports.o

View File

@ -44,6 +44,18 @@ static struct cxl_cel_entry mock_cel[] = {
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_LOGS), .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_LOGS),
.effect = CXL_CMD_EFFECT_NONE, .effect = CXL_CMD_EFFECT_NONE,
}, },
{
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_FEATURES),
.effect = CXL_CMD_EFFECT_NONE,
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_FEATURE),
.effect = CXL_CMD_EFFECT_NONE,
},
{
.opcode = cpu_to_le16(CXL_MBOX_OP_SET_FEATURE),
.effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE)),
},
{ {
.opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY), .opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY),
.effect = CXL_CMD_EFFECT_NONE, .effect = CXL_CMD_EFFECT_NONE,
@ -145,6 +157,10 @@ struct mock_event_store {
u32 ev_status; u32 ev_status;
}; };
struct vendor_test_feat {
__le32 data;
} __packed;
struct cxl_mockmem_data { struct cxl_mockmem_data {
void *lsa; void *lsa;
void *fw; void *fw;
@ -161,6 +177,7 @@ struct cxl_mockmem_data {
u8 event_buf[SZ_4K]; u8 event_buf[SZ_4K];
u64 timestamp; u64 timestamp;
unsigned long sanitize_timeout; unsigned long sanitize_timeout;
struct vendor_test_feat test_feat;
}; };
static struct mock_event_log *event_find_log(struct device *dev, int log_type) static struct mock_event_log *event_find_log(struct device *dev, int log_type)
@ -1354,6 +1371,151 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata,
return -EINVAL; return -EINVAL;
} }
#define CXL_VENDOR_FEATURE_TEST \
UUID_INIT(0xffffffff, 0xffff, 0xffff, 0xff, 0xff, 0xff, 0xff, 0xff, \
0xff, 0xff, 0xff)
static void fill_feature_vendor_test(struct cxl_feat_entry *feat)
{
feat->uuid = CXL_VENDOR_FEATURE_TEST;
feat->id = 0;
feat->get_feat_size = cpu_to_le16(0x4);
feat->set_feat_size = cpu_to_le16(0x4);
feat->flags = cpu_to_le32(CXL_FEATURE_F_CHANGEABLE |
CXL_FEATURE_F_DEFAULT_SEL |
CXL_FEATURE_F_SAVED_SEL);
feat->get_feat_ver = 1;
feat->set_feat_ver = 1;
feat->effects = cpu_to_le16(CXL_CMD_CONFIG_CHANGE_COLD_RESET |
CXL_CMD_EFFECTS_VALID);
}
#define MAX_CXL_TEST_FEATS 1
static int mock_get_test_feature(struct cxl_mockmem_data *mdata,
struct cxl_mbox_cmd *cmd)
{
struct vendor_test_feat *output = cmd->payload_out;
struct cxl_mbox_get_feat_in *input = cmd->payload_in;
u16 offset = le16_to_cpu(input->offset);
u16 count = le16_to_cpu(input->count);
u8 *ptr;
if (offset > sizeof(*output)) {
cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
return -EINVAL;
}
if (offset + count > sizeof(*output)) {
cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
return -EINVAL;
}
ptr = (u8 *)&mdata->test_feat + offset;
memcpy((u8 *)output + offset, ptr, count);
return 0;
}
static int mock_get_feature(struct cxl_mockmem_data *mdata,
struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_get_feat_in *input = cmd->payload_in;
if (uuid_equal(&input->uuid, &CXL_VENDOR_FEATURE_TEST))
return mock_get_test_feature(mdata, cmd);
cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
return -EOPNOTSUPP;
}
static int mock_set_test_feature(struct cxl_mockmem_data *mdata,
struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_set_feat_in *input = cmd->payload_in;
struct vendor_test_feat *test =
(struct vendor_test_feat *)input->feat_data;
u32 action;
action = FIELD_GET(CXL_SET_FEAT_FLAG_DATA_TRANSFER_MASK,
le32_to_cpu(input->hdr.flags));
/*
* While it is spec compliant to support other set actions, it is not
* necessary to add the complication in the emulation currently. Reject
* anything besides full xfer.
*/
if (action != CXL_SET_FEAT_FLAG_FULL_DATA_TRANSFER) {
cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
return -EINVAL;
}
/* Offset should be reserved when doing full transfer */
if (input->hdr.offset) {
cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
return -EINVAL;
}
memcpy(&mdata->test_feat.data, &test->data, sizeof(u32));
return 0;
}
static int mock_set_feature(struct cxl_mockmem_data *mdata,
struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_set_feat_in *input = cmd->payload_in;
if (uuid_equal(&input->hdr.uuid, &CXL_VENDOR_FEATURE_TEST))
return mock_set_test_feature(mdata, cmd);
cmd->return_code = CXL_MBOX_CMD_RC_UNSUPPORTED;
return -EOPNOTSUPP;
}
static int mock_get_supported_features(struct cxl_mockmem_data *mdata,
struct cxl_mbox_cmd *cmd)
{
struct cxl_mbox_get_sup_feats_in *in = cmd->payload_in;
struct cxl_mbox_get_sup_feats_out *out = cmd->payload_out;
struct cxl_feat_entry *feat;
u16 start_idx, count;
if (cmd->size_out < sizeof(*out)) {
cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
return -EINVAL;
}
/*
* Current emulation only supports 1 feature
*/
start_idx = le16_to_cpu(in->start_idx);
if (start_idx != 0) {
cmd->return_code = CXL_MBOX_CMD_RC_INPUT;
return -EINVAL;
}
count = le16_to_cpu(in->count);
if (count < struct_size(out, ents, 0)) {
cmd->return_code = CXL_MBOX_CMD_RC_PAYLOADLEN;
return -EINVAL;
}
out->supported_feats = cpu_to_le16(MAX_CXL_TEST_FEATS);
cmd->return_code = 0;
if (count < struct_size(out, ents, MAX_CXL_TEST_FEATS)) {
out->num_entries = 0;
return 0;
}
out->num_entries = cpu_to_le16(MAX_CXL_TEST_FEATS);
feat = out->ents;
fill_feature_vendor_test(feat);
return 0;
}
static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *cmd) struct cxl_mbox_cmd *cmd)
{ {
@ -1439,6 +1601,15 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
case CXL_MBOX_OP_ACTIVATE_FW: case CXL_MBOX_OP_ACTIVATE_FW:
rc = mock_activate_fw(mdata, cmd); rc = mock_activate_fw(mdata, cmd);
break; break;
case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
rc = mock_get_supported_features(mdata, cmd);
break;
case CXL_MBOX_OP_GET_FEATURE:
rc = mock_get_feature(mdata, cmd);
break;
case CXL_MBOX_OP_SET_FEATURE:
rc = mock_set_feature(mdata, cmd);
break;
default: default:
break; break;
} }
@ -1486,6 +1657,11 @@ static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds)
return 0; return 0;
} }
static void cxl_mock_test_feat_init(struct cxl_mockmem_data *mdata)
{
mdata->test_feat.data = cpu_to_le32(0xdeadbeef);
}
static int cxl_mock_mem_probe(struct platform_device *pdev) static int cxl_mock_mem_probe(struct platform_device *pdev)
{ {
struct device *dev = &pdev->dev; struct device *dev = &pdev->dev;
@ -1558,6 +1734,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc) if (rc)
return rc; return rc;
rc = devm_cxl_setup_features(cxlds);
if (rc)
dev_dbg(dev, "No CXL Features discovered\n");
cxl_mock_add_event_logs(&mdata->mes); cxl_mock_add_event_logs(&mdata->mes);
cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
@ -1572,7 +1752,12 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc) if (rc)
return rc; return rc;
rc = devm_cxl_setup_fwctl(cxlmd);
if (rc)
dev_dbg(dev, "No CXL FWCTL setup\n");
cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
cxl_mock_test_feat_init(mdata);
return 0; return 0;
} }