linux/drivers/gpu/nova-core/gsp/cmdq.rs

680 lines
25 KiB
Rust

// SPDX-License-Identifier: GPL-2.0
use core::{
cmp,
mem,
sync::atomic::{
fence,
Ordering, //
}, //
};
use kernel::{
device,
dma::{
CoherentAllocation,
DmaAddress, //
},
dma_write,
io::poll::read_poll_timeout,
prelude::*,
sync::aref::ARef,
time::Delta,
transmute::{
AsBytes,
FromBytes, //
},
};
use crate::{
driver::Bar0,
gsp::{
fw::{
GspMsgElement,
MsgFunction,
MsgqRxHeader,
MsgqTxHeader, //
},
PteArray,
GSP_PAGE_SHIFT,
GSP_PAGE_SIZE, //
},
num,
regs,
sbuffer::SBufferIter, //
};
/// Trait implemented by types representing a command to send to the GSP.
///
/// The main purpose of this trait is to provide [`Cmdq::send_command`] with the information it
/// needs to send a given command.
///
/// [`CommandToGsp::init`] in particular is responsible for initializing the command directly
/// into the space reserved for it in the command queue buffer.
///
/// Some commands may be followed by a variable-length payload. For these, the
/// [`CommandToGsp::variable_payload_len`] and [`CommandToGsp::init_variable_payload`] need to be
/// defined as well.
pub(crate) trait CommandToGsp {
/// Function identifying this command to the GSP.
const FUNCTION: MsgFunction;
/// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer.
type Command: FromBytes + AsBytes;
/// Error type returned by [`CommandToGsp::init`].
type InitError;
/// In-place command initializer responsible for filling the command in the command queue
/// buffer.
fn init(&self) -> impl Init<Self::Command, Self::InitError>;
/// Size of the variable-length payload following the command structure generated by
/// [`CommandToGsp::init`].
///
/// Most commands don't have a variable-length payload, so this is zero by default.
fn variable_payload_len(&self) -> usize {
0
}
/// Method initializing the variable-length payload.
///
/// The command buffer is circular, which means that we may need to jump back to its beginning
/// while in the middle of a command. For this reason, the variable-length payload is
/// initialized using a [`SBufferIter`].
///
/// This method will receive a buffer of the length returned by
/// [`CommandToGsp::variable_payload_len`], and must write every single byte of it. Leaving
/// unwritten space will lead to an error.
///
/// Most commands don't have a variable-length payload, so this does nothing by default.
fn init_variable_payload(
&self,
_dst: &mut SBufferIter<core::array::IntoIter<&mut [u8], 2>>,
) -> Result {
Ok(())
}
}
/// Trait representing messages received from the GSP.
///
/// This trait tells [`Cmdq::receive_msg`] how it can receive a given type of message.
pub(crate) trait MessageFromGsp: Sized {
/// Function identifying this message from the GSP.
const FUNCTION: MsgFunction;
/// Error type returned by [`MessageFromGsp::read`].
type InitError;
/// Type containing the raw message to be read from the message queue.
type Message: FromBytes;
/// Method reading the message from the message queue and returning it.
///
/// From a `Self::Message` and a [`SBufferIter`], constructs an instance of `Self` and returns
/// it.
fn read(
msg: &Self::Message,
sbuffer: &mut SBufferIter<core::array::IntoIter<&[u8], 2>>,
) -> Result<Self, Self::InitError>;
}
/// Number of GSP pages making the [`Msgq`].
pub(crate) const MSGQ_NUM_PAGES: u32 = 0x3f;
/// Circular buffer of a [`Msgq`].
///
/// This area of memory is to be shared between the driver and the GSP to exchange commands or
/// messages.
#[repr(C, align(0x1000))]
#[derive(Debug)]
struct MsgqData {
data: [[u8; GSP_PAGE_SIZE]; num::u32_as_usize(MSGQ_NUM_PAGES)],
}
// Annoyingly we are forced to use a literal to specify the alignment of
// `MsgqData`, so check that it corresponds to the actual GSP page size here.
static_assert!(align_of::<MsgqData>() == GSP_PAGE_SIZE);
/// Unidirectional message queue.
///
/// Contains the data for a message queue, that either the driver or GSP writes to.
///
/// Note that while the write pointer of `tx` corresponds to the `msgq` of the same instance, the
/// read pointer of `rx` actually refers to the `Msgq` owned by the other side.
/// This design ensures that only the driver or GSP ever writes to a given instance of this struct.
#[repr(C)]
// There is no struct defined for this in the open-gpu-kernel-source headers.
// Instead it is defined by code in `GspMsgQueuesInit()`.
struct Msgq {
/// Header for sending messages, including the write pointer.
tx: MsgqTxHeader,
/// Header for receiving messages, including the read pointer.
rx: MsgqRxHeader,
/// The message queue proper.
msgq: MsgqData,
}
/// Structure shared between the driver and the GSP and containing the command and message queues.
#[repr(C)]
struct GspMem {
/// Self-mapping page table entries.
ptes: PteArray<{ GSP_PAGE_SIZE / size_of::<u64>() }>,
/// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the
/// write and read pointers that the CPU updates.
///
/// This member is read-only for the GSP.
cpuq: Msgq,
/// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the
/// write and read pointers that the GSP updates.
///
/// This member is read-only for the driver.
gspq: Msgq,
}
// SAFETY: These structs don't meet the no-padding requirements of AsBytes but
// that is not a problem because they are not used outside the kernel.
unsafe impl AsBytes for GspMem {}
// SAFETY: These structs don't meet the no-padding requirements of FromBytes but
// that is not a problem because they are not used outside the kernel.
unsafe impl FromBytes for GspMem {}
/// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`].
///
/// This provides the low-level functionality to communicate with the GSP, including allocation of
/// queue space to write messages to and management of read/write pointers.
///
/// This is shared with the GSP, with clear ownership rules regarding the command queues:
///
/// * The driver owns (i.e. can write to) the part of the CPU message queue between the CPU write
/// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`].
/// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read
/// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`].
struct DmaGspMem(CoherentAllocation<GspMem>);
impl DmaGspMem {
/// Allocate a new instance and map it for `dev`.
fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>();
const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>();
let gsp_mem =
CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?;
dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?;
dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?;
Ok(Self(gsp_mem))
}
/// Returns the region of the CPU message queue that the driver is currently allowed to write
/// to.
///
/// As the message queue is a circular buffer, the region may be discontiguous in memory. In
/// that case the second slice will have a non-zero length.
fn driver_write_area(&mut self) -> (&mut [[u8; GSP_PAGE_SIZE]], &mut [[u8; GSP_PAGE_SIZE]]) {
let tx = self.cpu_write_ptr() as usize;
let rx = self.gsp_read_ptr() as usize;
// SAFETY:
// - The `CoherentAllocation` contains exactly one object.
// - We will only access the driver-owned part of the shared memory.
// - Per the safety statement of the function, no concurrent access will be performed.
let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0];
// PANIC: per the invariant of `cpu_write_ptr`, `tx` is `<= MSGQ_NUM_PAGES`.
let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx);
if rx <= tx {
// The area from `tx` up to the end of the ring, and from the beginning of the ring up
// to `rx`, minus one unit, belongs to the driver.
if rx == 0 {
let last = after_tx.len() - 1;
(&mut after_tx[..last], &mut before_tx[0..0])
} else {
(after_tx, &mut before_tx[..rx])
}
} else {
// The area from `tx` to `rx`, minus one unit, belongs to the driver.
//
// PANIC: per the invariants of `cpu_write_ptr` and `gsp_read_ptr`, `rx` and `tx` are
// `<= MSGQ_NUM_PAGES`, and the test above ensured that `rx > tx`.
(after_tx.split_at_mut(rx - tx).0, &mut before_tx[0..0])
}
}
/// Returns the region of the GSP message queue that the driver is currently allowed to read
/// from.
///
/// As the message queue is a circular buffer, the region may be discontiguous in memory. In
/// that case the second slice will have a non-zero length.
fn driver_read_area(&self) -> (&[[u8; GSP_PAGE_SIZE]], &[[u8; GSP_PAGE_SIZE]]) {
let tx = self.gsp_write_ptr() as usize;
let rx = self.cpu_read_ptr() as usize;
// SAFETY:
// - The `CoherentAllocation` contains exactly one object.
// - We will only access the driver-owned part of the shared memory.
// - Per the safety statement of the function, no concurrent access will be performed.
let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0];
// PANIC: per the invariant of `cpu_read_ptr`, `xx` is `<= MSGQ_NUM_PAGES`.
let (before_rx, after_rx) = gsp_mem.gspq.msgq.data.split_at(rx);
match tx.cmp(&rx) {
cmp::Ordering::Equal => (&after_rx[0..0], &after_rx[0..0]),
cmp::Ordering::Greater => (&after_rx[..tx], &before_rx[0..0]),
cmp::Ordering::Less => (after_rx, &before_rx[..tx]),
}
}
/// Allocates a region on the command queue that is large enough to send a command of `size`
/// bytes.
///
/// This returns a [`GspCommand`] ready to be written to by the caller.
///
/// # Errors
///
/// - `EAGAIN` if the driver area is too small to hold the requested command.
/// - `EIO` if the command header is not properly aligned.
fn allocate_command(&mut self, size: usize) -> Result<GspCommand<'_>> {
// Get the current writable area as an array of bytes.
let (slice_1, slice_2) = {
let (slice_1, slice_2) = self.driver_write_area();
#[allow(clippy::incompatible_msrv)]
(slice_1.as_flattened_mut(), slice_2.as_flattened_mut())
};
// If the GSP is still processing previous messages the shared region
// may be full in which case we will have to retry once the GSP has
// processed the existing commands.
if size_of::<GspMsgElement>() + size > slice_1.len() + slice_2.len() {
return Err(EAGAIN);
}
// Extract area for the `GspMsgElement`.
let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?;
// Create the contents area.
let (slice_1, slice_2) = if slice_1.len() > size {
// Contents fits entirely in `slice_1`.
(&mut slice_1[..size], &mut slice_2[0..0])
} else {
// Need all of `slice_1` and some of `slice_2`.
let slice_2_len = size - slice_1.len();
(slice_1, &mut slice_2[..slice_2_len])
};
Ok(GspCommand {
header,
contents: (slice_1, slice_2),
})
}
// Returns the index of the memory page the GSP will write the next message to.
//
// # Invariants
//
// - The returned value is between `0` and `MSGQ_NUM_PAGES`.
fn gsp_write_ptr(&self) -> u32 {
let gsp_mem = self.0.start_ptr();
// SAFETY:
// - The 'CoherentAllocation' contains at least one object.
// - By the invariants of `CoherentAllocation` the pointer is valid.
(unsafe { (*gsp_mem).gspq.tx.write_ptr() } % MSGQ_NUM_PAGES)
}
// Returns the index of the memory page the GSP will read the next command from.
//
// # Invariants
//
// - The returned value is between `0` and `MSGQ_NUM_PAGES`.
fn gsp_read_ptr(&self) -> u32 {
let gsp_mem = self.0.start_ptr();
// SAFETY:
// - The 'CoherentAllocation' contains at least one object.
// - By the invariants of `CoherentAllocation` the pointer is valid.
(unsafe { (*gsp_mem).gspq.rx.read_ptr() } % MSGQ_NUM_PAGES)
}
// Returns the index of the memory page the CPU can read the next message from.
//
// # Invariants
//
// - The returned value is between `0` and `MSGQ_NUM_PAGES`.
fn cpu_read_ptr(&self) -> u32 {
let gsp_mem = self.0.start_ptr();
// SAFETY:
// - The ['CoherentAllocation'] contains at least one object.
// - By the invariants of CoherentAllocation the pointer is valid.
(unsafe { (*gsp_mem).cpuq.rx.read_ptr() } % MSGQ_NUM_PAGES)
}
// Informs the GSP that it can send `elem_count` new pages into the message queue.
fn advance_cpu_read_ptr(&mut self, elem_count: u32) {
let rptr = self.cpu_read_ptr().wrapping_add(elem_count) % MSGQ_NUM_PAGES;
// Ensure read pointer is properly ordered.
fence(Ordering::SeqCst);
let gsp_mem = self.0.start_ptr_mut();
// SAFETY:
// - The 'CoherentAllocation' contains at least one object.
// - By the invariants of `CoherentAllocation` the pointer is valid.
unsafe { (*gsp_mem).cpuq.rx.set_read_ptr(rptr) };
}
// Returns the index of the memory page the CPU can write the next command to.
//
// # Invariants
//
// - The returned value is between `0` and `MSGQ_NUM_PAGES`.
fn cpu_write_ptr(&self) -> u32 {
let gsp_mem = self.0.start_ptr();
// SAFETY:
// - The 'CoherentAllocation' contains at least one object.
// - By the invariants of `CoherentAllocation` the pointer is valid.
(unsafe { (*gsp_mem).cpuq.tx.write_ptr() } % MSGQ_NUM_PAGES)
}
// Informs the GSP that it can process `elem_count` new pages from the command queue.
fn advance_cpu_write_ptr(&mut self, elem_count: u32) {
let wptr = self.cpu_write_ptr().wrapping_add(elem_count) & MSGQ_NUM_PAGES;
let gsp_mem = self.0.start_ptr_mut();
// SAFETY:
// - The 'CoherentAllocation' contains at least one object.
// - By the invariants of `CoherentAllocation` the pointer is valid.
unsafe { (*gsp_mem).cpuq.tx.set_write_ptr(wptr) };
// Ensure all command data is visible before triggering the GSP read.
fence(Ordering::SeqCst);
}
}
/// A command ready to be sent on the command queue.
///
/// This is the type returned by [`DmaGspMem::allocate_command`].
struct GspCommand<'a> {
// Writable reference to the header of the command.
header: &'a mut GspMsgElement,
// Writable slices to the contents of the command. The second slice is zero unless the command
// loops over the command queue.
contents: (&'a mut [u8], &'a mut [u8]),
}
/// A message ready to be processed from the message queue.
///
/// This is the type returned by [`Cmdq::wait_for_msg`].
struct GspMessage<'a> {
// Reference to the header of the message.
header: &'a GspMsgElement,
// Slices to the contents of the message. The second slice is zero unless the message loops
// over the message queue.
contents: (&'a [u8], &'a [u8]),
}
/// GSP command queue.
///
/// Provides the ability to send commands and receive messages from the GSP using a shared memory
/// area.
pub(crate) struct Cmdq {
/// Device this command queue belongs to.
dev: ARef<device::Device>,
/// Current command sequence number.
seq: u32,
/// Memory area shared with the GSP for communicating commands and messages.
gsp_mem: DmaGspMem,
}
impl Cmdq {
/// Offset of the data after the PTEs.
const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);
/// Offset of command queue ring buffer.
pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
+ core::mem::offset_of!(Msgq, msgq)
- Self::POST_PTE_OFFSET;
/// Offset of message queue ring buffer.
pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
+ core::mem::offset_of!(Msgq, msgq)
- Self::POST_PTE_OFFSET;
/// Number of page table entries for the GSP shared region.
pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
/// Creates a new command queue for `dev`.
pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> {
let gsp_mem = DmaGspMem::new(dev)?;
Ok(Cmdq {
dev: dev.into(),
seq: 0,
gsp_mem,
})
}
/// Computes the checksum for the message pointed to by `it`.
///
/// A message is made of several parts, so `it` is an iterator over byte slices representing
/// these parts.
fn calculate_checksum<T: Iterator<Item = u8>>(it: T) -> u32 {
let sum64 = it
.enumerate()
.map(|(idx, byte)| (((idx % 8) * 8) as u32, byte))
.fold(0, |acc, (rol, byte)| acc ^ u64::from(byte).rotate_left(rol));
((sum64 >> 32) as u32) ^ (sum64 as u32)
}
/// Notifies the GSP that we have updated the command queue pointers.
fn notify_gsp(bar: &Bar0) {
regs::NV_PGSP_QUEUE_HEAD::default()
.set_address(0)
.write(bar);
}
/// Sends `command` to the GSP.
///
/// # Errors
///
/// - `EAGAIN` if there was not enough space in the command queue to send the command.
/// - `EIO` if the variable payload requested by the command has not been entirely
/// written to by its [`CommandToGsp::init_variable_payload`] method.
///
/// Error codes returned by the command initializers are propagated as-is.
pub(crate) fn send_command<M>(&mut self, bar: &Bar0, command: M) -> Result
where
M: CommandToGsp,
// This allows all error types, including `Infallible`, to be used for `M::InitError`.
Error: From<M::InitError>,
{
let command_size = size_of::<M::Command>() + command.variable_payload_len();
let dst = self.gsp_mem.allocate_command(command_size)?;
// Extract area for the command itself.
let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?;
// Fill the header and command in-place.
let msg_element = GspMsgElement::init(self.seq, command_size, M::FUNCTION);
// SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer
// fails.
unsafe {
msg_element.__init(core::ptr::from_mut(dst.header))?;
command.init().__init(core::ptr::from_mut(cmd))?;
}
// Fill the variable-length payload.
if command_size > size_of::<M::Command>() {
let mut sbuffer =
SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]);
command.init_variable_payload(&mut sbuffer)?;
if !sbuffer.is_empty() {
return Err(EIO);
}
}
// Compute checksum now that the whole message is ready.
dst.header
.set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([
dst.header.as_bytes(),
dst.contents.0,
dst.contents.1,
])));
dev_dbg!(
&self.dev,
"GSP RPC: send: seq# {}, function={}, length=0x{:x}\n",
self.seq,
M::FUNCTION,
dst.header.length(),
);
// All set - update the write pointer and inform the GSP of the new command.
let elem_count = dst.header.element_count();
self.seq += 1;
self.gsp_mem.advance_cpu_write_ptr(elem_count);
Cmdq::notify_gsp(bar);
Ok(())
}
/// Wait for a message to become available on the message queue.
///
/// This works purely at the transport layer and does not interpret or validate the message
/// beyond the advertised length in its [`GspMsgElement`].
///
/// This method returns:
///
/// - A reference to the [`GspMsgElement`] of the message,
/// - Two byte slices with the contents of the message. The second slice is empty unless the
/// message loops across the message queue.
///
/// # Errors
///
/// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available.
/// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the
/// message queue.
///
/// Error codes returned by the message constructor are propagated as-is.
fn wait_for_msg(&self, timeout: Delta) -> Result<GspMessage<'_>> {
// Wait for a message to arrive from the GSP.
let (slice_1, slice_2) = read_poll_timeout(
|| Ok(self.gsp_mem.driver_read_area()),
|driver_area| !driver_area.0.is_empty(),
Delta::from_millis(1),
timeout,
)
.map(|(slice_1, slice_2)| {
#[allow(clippy::incompatible_msrv)]
(slice_1.as_flattened(), slice_2.as_flattened())
})?;
// Extract the `GspMsgElement`.
let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?;
dev_dbg!(
self.dev,
"GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n",
header.sequence(),
header.function(),
header.length(),
);
// Check that the driver read area is large enough for the message.
if slice_1.len() + slice_2.len() < header.length() {
return Err(EIO);
}
// Cut the message slices down to the actual length of the message.
let (slice_1, slice_2) = if slice_1.len() > header.length() {
// PANIC: we checked above that `slice_1` is at least as long as `msg_header.length()`.
(slice_1.split_at(header.length()).0, &slice_2[0..0])
} else {
(
slice_1,
// PANIC: we checked above that `slice_1.len() + slice_2.len()` is at least as
// large as `msg_header.length()`.
slice_2.split_at(header.length() - slice_1.len()).0,
)
};
// Validate checksum.
if Cmdq::calculate_checksum(SBufferIter::new_reader([
header.as_bytes(),
slice_1,
slice_2,
])) != 0
{
dev_err!(
self.dev,
"GSP RPC: receive: Call {} - bad checksum",
header.sequence()
);
return Err(EIO);
}
Ok(GspMessage {
header,
contents: (slice_1, slice_2),
})
}
/// Receive a message from the GSP.
///
/// `init` is a closure tasked with processing the message. It receives a reference to the
/// message in the message queue, and a [`SBufferIter`] pointing to its variable-length
/// payload, if any.
///
/// The expected message is specified using the `M` generic parameter. If the pending message
/// is different, `EAGAIN` is returned and the unexpected message is dropped.
///
/// This design is by no means final, but it is simple and will let us go through GSP
/// initialization.
///
/// # Errors
///
/// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available.
/// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the
/// message queue.
/// - `EINVAL` if the function of the message was unrecognized.
pub(crate) fn receive_msg<M: MessageFromGsp>(&mut self, timeout: Delta) -> Result<M>
where
// This allows all error types, including `Infallible`, to be used for `M::InitError`.
Error: From<M::InitError>,
{
let message = self.wait_for_msg(timeout)?;
let function = message.header.function().map_err(|_| EINVAL)?;
// Extract the message. Store the result as we want to advance the read pointer even in
// case of failure.
let result = if function == M::FUNCTION {
let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?;
let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]);
M::read(cmd, &mut sbuffer).map_err(|e| e.into())
} else {
Err(ERANGE)
};
// Advance the read pointer past this message.
self.gsp_mem.advance_cpu_read_ptr(u32::try_from(
message.header.length().div_ceil(GSP_PAGE_SIZE),
)?);
result
}
/// Returns the DMA handle of the command queue's shared memory region.
pub(crate) fn dma_handle(&self) -> DmaAddress {
self.gsp_mem.0.dma_handle()
}
}