// SPDX-License-Identifier: GPL-2.0 use core::{ cmp, mem, sync::atomic::{ fence, Ordering, // }, // }; use kernel::{ device, dma::{ CoherentAllocation, DmaAddress, // }, dma_write, io::poll::read_poll_timeout, prelude::*, sync::aref::ARef, time::Delta, transmute::{ AsBytes, FromBytes, // }, }; use crate::{ driver::Bar0, gsp::{ fw::{ GspMsgElement, MsgFunction, MsgqRxHeader, MsgqTxHeader, // }, PteArray, GSP_PAGE_SHIFT, GSP_PAGE_SIZE, // }, num, regs, sbuffer::SBufferIter, // }; /// Trait implemented by types representing a command to send to the GSP. /// /// The main purpose of this trait is to provide [`Cmdq::send_command`] with the information it /// needs to send a given command. /// /// [`CommandToGsp::init`] in particular is responsible for initializing the command directly /// into the space reserved for it in the command queue buffer. /// /// Some commands may be followed by a variable-length payload. For these, the /// [`CommandToGsp::variable_payload_len`] and [`CommandToGsp::init_variable_payload`] need to be /// defined as well. pub(crate) trait CommandToGsp { /// Function identifying this command to the GSP. const FUNCTION: MsgFunction; /// Type generated by [`CommandToGsp::init`], to be written into the command queue buffer. type Command: FromBytes + AsBytes; /// Error type returned by [`CommandToGsp::init`]. type InitError; /// In-place command initializer responsible for filling the command in the command queue /// buffer. fn init(&self) -> impl Init; /// Size of the variable-length payload following the command structure generated by /// [`CommandToGsp::init`]. /// /// Most commands don't have a variable-length payload, so this is zero by default. fn variable_payload_len(&self) -> usize { 0 } /// Method initializing the variable-length payload. /// /// The command buffer is circular, which means that we may need to jump back to its beginning /// while in the middle of a command. For this reason, the variable-length payload is /// initialized using a [`SBufferIter`]. /// /// This method will receive a buffer of the length returned by /// [`CommandToGsp::variable_payload_len`], and must write every single byte of it. Leaving /// unwritten space will lead to an error. /// /// Most commands don't have a variable-length payload, so this does nothing by default. fn init_variable_payload( &self, _dst: &mut SBufferIter>, ) -> Result { Ok(()) } } /// Trait representing messages received from the GSP. /// /// This trait tells [`Cmdq::receive_msg`] how it can receive a given type of message. pub(crate) trait MessageFromGsp: Sized { /// Function identifying this message from the GSP. const FUNCTION: MsgFunction; /// Error type returned by [`MessageFromGsp::read`]. type InitError; /// Type containing the raw message to be read from the message queue. type Message: FromBytes; /// Method reading the message from the message queue and returning it. /// /// From a `Self::Message` and a [`SBufferIter`], constructs an instance of `Self` and returns /// it. fn read( msg: &Self::Message, sbuffer: &mut SBufferIter>, ) -> Result; } /// Number of GSP pages making the [`Msgq`]. pub(crate) const MSGQ_NUM_PAGES: u32 = 0x3f; /// Circular buffer of a [`Msgq`]. /// /// This area of memory is to be shared between the driver and the GSP to exchange commands or /// messages. #[repr(C, align(0x1000))] #[derive(Debug)] struct MsgqData { data: [[u8; GSP_PAGE_SIZE]; num::u32_as_usize(MSGQ_NUM_PAGES)], } // Annoyingly we are forced to use a literal to specify the alignment of // `MsgqData`, so check that it corresponds to the actual GSP page size here. static_assert!(align_of::() == GSP_PAGE_SIZE); /// Unidirectional message queue. /// /// Contains the data for a message queue, that either the driver or GSP writes to. /// /// Note that while the write pointer of `tx` corresponds to the `msgq` of the same instance, the /// read pointer of `rx` actually refers to the `Msgq` owned by the other side. /// This design ensures that only the driver or GSP ever writes to a given instance of this struct. #[repr(C)] // There is no struct defined for this in the open-gpu-kernel-source headers. // Instead it is defined by code in `GspMsgQueuesInit()`. struct Msgq { /// Header for sending messages, including the write pointer. tx: MsgqTxHeader, /// Header for receiving messages, including the read pointer. rx: MsgqRxHeader, /// The message queue proper. msgq: MsgqData, } /// Structure shared between the driver and the GSP and containing the command and message queues. #[repr(C)] struct GspMem { /// Self-mapping page table entries. ptes: PteArray<{ GSP_PAGE_SIZE / size_of::() }>, /// CPU queue: the driver writes commands here, and the GSP reads them. It also contains the /// write and read pointers that the CPU updates. /// /// This member is read-only for the GSP. cpuq: Msgq, /// GSP queue: the GSP writes messages here, and the driver reads them. It also contains the /// write and read pointers that the GSP updates. /// /// This member is read-only for the driver. gspq: Msgq, } // SAFETY: These structs don't meet the no-padding requirements of AsBytes but // that is not a problem because they are not used outside the kernel. unsafe impl AsBytes for GspMem {} // SAFETY: These structs don't meet the no-padding requirements of FromBytes but // that is not a problem because they are not used outside the kernel. unsafe impl FromBytes for GspMem {} /// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`]. /// /// This provides the low-level functionality to communicate with the GSP, including allocation of /// queue space to write messages to and management of read/write pointers. /// /// This is shared with the GSP, with clear ownership rules regarding the command queues: /// /// * The driver owns (i.e. can write to) the part of the CPU message queue between the CPU write /// pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`]. /// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read /// pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`]. struct DmaGspMem(CoherentAllocation); impl DmaGspMem { /// Allocate a new instance and map it for `dev`. fn new(dev: &device::Device) -> Result { const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::() }>(); const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>(); let gsp_mem = CoherentAllocation::::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?; dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?; dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?; dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?; Ok(Self(gsp_mem)) } /// Returns the region of the CPU message queue that the driver is currently allowed to write /// to. /// /// As the message queue is a circular buffer, the region may be discontiguous in memory. In /// that case the second slice will have a non-zero length. fn driver_write_area(&mut self) -> (&mut [[u8; GSP_PAGE_SIZE]], &mut [[u8; GSP_PAGE_SIZE]]) { let tx = self.cpu_write_ptr() as usize; let rx = self.gsp_read_ptr() as usize; // SAFETY: // - The `CoherentAllocation` contains exactly one object. // - We will only access the driver-owned part of the shared memory. // - Per the safety statement of the function, no concurrent access will be performed. let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0]; // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `<= MSGQ_NUM_PAGES`. let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx); if rx <= tx { // The area from `tx` up to the end of the ring, and from the beginning of the ring up // to `rx`, minus one unit, belongs to the driver. if rx == 0 { let last = after_tx.len() - 1; (&mut after_tx[..last], &mut before_tx[0..0]) } else { (after_tx, &mut before_tx[..rx]) } } else { // The area from `tx` to `rx`, minus one unit, belongs to the driver. // // PANIC: per the invariants of `cpu_write_ptr` and `gsp_read_ptr`, `rx` and `tx` are // `<= MSGQ_NUM_PAGES`, and the test above ensured that `rx > tx`. (after_tx.split_at_mut(rx - tx).0, &mut before_tx[0..0]) } } /// Returns the region of the GSP message queue that the driver is currently allowed to read /// from. /// /// As the message queue is a circular buffer, the region may be discontiguous in memory. In /// that case the second slice will have a non-zero length. fn driver_read_area(&self) -> (&[[u8; GSP_PAGE_SIZE]], &[[u8; GSP_PAGE_SIZE]]) { let tx = self.gsp_write_ptr() as usize; let rx = self.cpu_read_ptr() as usize; // SAFETY: // - The `CoherentAllocation` contains exactly one object. // - We will only access the driver-owned part of the shared memory. // - Per the safety statement of the function, no concurrent access will be performed. let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0]; // PANIC: per the invariant of `cpu_read_ptr`, `xx` is `<= MSGQ_NUM_PAGES`. let (before_rx, after_rx) = gsp_mem.gspq.msgq.data.split_at(rx); match tx.cmp(&rx) { cmp::Ordering::Equal => (&after_rx[0..0], &after_rx[0..0]), cmp::Ordering::Greater => (&after_rx[..tx], &before_rx[0..0]), cmp::Ordering::Less => (after_rx, &before_rx[..tx]), } } /// Allocates a region on the command queue that is large enough to send a command of `size` /// bytes. /// /// This returns a [`GspCommand`] ready to be written to by the caller. /// /// # Errors /// /// - `EAGAIN` if the driver area is too small to hold the requested command. /// - `EIO` if the command header is not properly aligned. fn allocate_command(&mut self, size: usize) -> Result> { // Get the current writable area as an array of bytes. let (slice_1, slice_2) = { let (slice_1, slice_2) = self.driver_write_area(); #[allow(clippy::incompatible_msrv)] (slice_1.as_flattened_mut(), slice_2.as_flattened_mut()) }; // If the GSP is still processing previous messages the shared region // may be full in which case we will have to retry once the GSP has // processed the existing commands. if size_of::() + size > slice_1.len() + slice_2.len() { return Err(EAGAIN); } // Extract area for the `GspMsgElement`. let (header, slice_1) = GspMsgElement::from_bytes_mut_prefix(slice_1).ok_or(EIO)?; // Create the contents area. let (slice_1, slice_2) = if slice_1.len() > size { // Contents fits entirely in `slice_1`. (&mut slice_1[..size], &mut slice_2[0..0]) } else { // Need all of `slice_1` and some of `slice_2`. let slice_2_len = size - slice_1.len(); (slice_1, &mut slice_2[..slice_2_len]) }; Ok(GspCommand { header, contents: (slice_1, slice_2), }) } // Returns the index of the memory page the GSP will write the next message to. // // # Invariants // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn gsp_write_ptr(&self) -> u32 { let gsp_mem = self.0.start_ptr(); // SAFETY: // - The 'CoherentAllocation' contains at least one object. // - By the invariants of `CoherentAllocation` the pointer is valid. (unsafe { (*gsp_mem).gspq.tx.write_ptr() } % MSGQ_NUM_PAGES) } // Returns the index of the memory page the GSP will read the next command from. // // # Invariants // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn gsp_read_ptr(&self) -> u32 { let gsp_mem = self.0.start_ptr(); // SAFETY: // - The 'CoherentAllocation' contains at least one object. // - By the invariants of `CoherentAllocation` the pointer is valid. (unsafe { (*gsp_mem).gspq.rx.read_ptr() } % MSGQ_NUM_PAGES) } // Returns the index of the memory page the CPU can read the next message from. // // # Invariants // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn cpu_read_ptr(&self) -> u32 { let gsp_mem = self.0.start_ptr(); // SAFETY: // - The ['CoherentAllocation'] contains at least one object. // - By the invariants of CoherentAllocation the pointer is valid. (unsafe { (*gsp_mem).cpuq.rx.read_ptr() } % MSGQ_NUM_PAGES) } // Informs the GSP that it can send `elem_count` new pages into the message queue. fn advance_cpu_read_ptr(&mut self, elem_count: u32) { let rptr = self.cpu_read_ptr().wrapping_add(elem_count) % MSGQ_NUM_PAGES; // Ensure read pointer is properly ordered. fence(Ordering::SeqCst); let gsp_mem = self.0.start_ptr_mut(); // SAFETY: // - The 'CoherentAllocation' contains at least one object. // - By the invariants of `CoherentAllocation` the pointer is valid. unsafe { (*gsp_mem).cpuq.rx.set_read_ptr(rptr) }; } // Returns the index of the memory page the CPU can write the next command to. // // # Invariants // // - The returned value is between `0` and `MSGQ_NUM_PAGES`. fn cpu_write_ptr(&self) -> u32 { let gsp_mem = self.0.start_ptr(); // SAFETY: // - The 'CoherentAllocation' contains at least one object. // - By the invariants of `CoherentAllocation` the pointer is valid. (unsafe { (*gsp_mem).cpuq.tx.write_ptr() } % MSGQ_NUM_PAGES) } // Informs the GSP that it can process `elem_count` new pages from the command queue. fn advance_cpu_write_ptr(&mut self, elem_count: u32) { let wptr = self.cpu_write_ptr().wrapping_add(elem_count) & MSGQ_NUM_PAGES; let gsp_mem = self.0.start_ptr_mut(); // SAFETY: // - The 'CoherentAllocation' contains at least one object. // - By the invariants of `CoherentAllocation` the pointer is valid. unsafe { (*gsp_mem).cpuq.tx.set_write_ptr(wptr) }; // Ensure all command data is visible before triggering the GSP read. fence(Ordering::SeqCst); } } /// A command ready to be sent on the command queue. /// /// This is the type returned by [`DmaGspMem::allocate_command`]. struct GspCommand<'a> { // Writable reference to the header of the command. header: &'a mut GspMsgElement, // Writable slices to the contents of the command. The second slice is zero unless the command // loops over the command queue. contents: (&'a mut [u8], &'a mut [u8]), } /// A message ready to be processed from the message queue. /// /// This is the type returned by [`Cmdq::wait_for_msg`]. struct GspMessage<'a> { // Reference to the header of the message. header: &'a GspMsgElement, // Slices to the contents of the message. The second slice is zero unless the message loops // over the message queue. contents: (&'a [u8], &'a [u8]), } /// GSP command queue. /// /// Provides the ability to send commands and receive messages from the GSP using a shared memory /// area. pub(crate) struct Cmdq { /// Device this command queue belongs to. dev: ARef, /// Current command sequence number. seq: u32, /// Memory area shared with the GSP for communicating commands and messages. gsp_mem: DmaGspMem, } impl Cmdq { /// Offset of the data after the PTEs. const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq); /// Offset of command queue ring buffer. pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq) + core::mem::offset_of!(Msgq, msgq) - Self::POST_PTE_OFFSET; /// Offset of message queue ring buffer. pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq) + core::mem::offset_of!(Msgq, msgq) - Self::POST_PTE_OFFSET; /// Number of page table entries for the GSP shared region. pub(crate) const NUM_PTES: usize = size_of::() >> GSP_PAGE_SHIFT; /// Creates a new command queue for `dev`. pub(crate) fn new(dev: &device::Device) -> Result { let gsp_mem = DmaGspMem::new(dev)?; Ok(Cmdq { dev: dev.into(), seq: 0, gsp_mem, }) } /// Computes the checksum for the message pointed to by `it`. /// /// A message is made of several parts, so `it` is an iterator over byte slices representing /// these parts. fn calculate_checksum>(it: T) -> u32 { let sum64 = it .enumerate() .map(|(idx, byte)| (((idx % 8) * 8) as u32, byte)) .fold(0, |acc, (rol, byte)| acc ^ u64::from(byte).rotate_left(rol)); ((sum64 >> 32) as u32) ^ (sum64 as u32) } /// Notifies the GSP that we have updated the command queue pointers. fn notify_gsp(bar: &Bar0) { regs::NV_PGSP_QUEUE_HEAD::default() .set_address(0) .write(bar); } /// Sends `command` to the GSP. /// /// # Errors /// /// - `EAGAIN` if there was not enough space in the command queue to send the command. /// - `EIO` if the variable payload requested by the command has not been entirely /// written to by its [`CommandToGsp::init_variable_payload`] method. /// /// Error codes returned by the command initializers are propagated as-is. pub(crate) fn send_command(&mut self, bar: &Bar0, command: M) -> Result where M: CommandToGsp, // This allows all error types, including `Infallible`, to be used for `M::InitError`. Error: From, { let command_size = size_of::() + command.variable_payload_len(); let dst = self.gsp_mem.allocate_command(command_size)?; // Extract area for the command itself. let (cmd, payload_1) = M::Command::from_bytes_mut_prefix(dst.contents.0).ok_or(EIO)?; // Fill the header and command in-place. let msg_element = GspMsgElement::init(self.seq, command_size, M::FUNCTION); // SAFETY: `msg_header` and `cmd` are valid references, and not touched if the initializer // fails. unsafe { msg_element.__init(core::ptr::from_mut(dst.header))?; command.init().__init(core::ptr::from_mut(cmd))?; } // Fill the variable-length payload. if command_size > size_of::() { let mut sbuffer = SBufferIter::new_writer([&mut payload_1[..], &mut dst.contents.1[..]]); command.init_variable_payload(&mut sbuffer)?; if !sbuffer.is_empty() { return Err(EIO); } } // Compute checksum now that the whole message is ready. dst.header .set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([ dst.header.as_bytes(), dst.contents.0, dst.contents.1, ]))); dev_dbg!( &self.dev, "GSP RPC: send: seq# {}, function={}, length=0x{:x}\n", self.seq, M::FUNCTION, dst.header.length(), ); // All set - update the write pointer and inform the GSP of the new command. let elem_count = dst.header.element_count(); self.seq += 1; self.gsp_mem.advance_cpu_write_ptr(elem_count); Cmdq::notify_gsp(bar); Ok(()) } /// Wait for a message to become available on the message queue. /// /// This works purely at the transport layer and does not interpret or validate the message /// beyond the advertised length in its [`GspMsgElement`]. /// /// This method returns: /// /// - A reference to the [`GspMsgElement`] of the message, /// - Two byte slices with the contents of the message. The second slice is empty unless the /// message loops across the message queue. /// /// # Errors /// /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the /// message queue. /// /// Error codes returned by the message constructor are propagated as-is. fn wait_for_msg(&self, timeout: Delta) -> Result> { // Wait for a message to arrive from the GSP. let (slice_1, slice_2) = read_poll_timeout( || Ok(self.gsp_mem.driver_read_area()), |driver_area| !driver_area.0.is_empty(), Delta::from_millis(1), timeout, ) .map(|(slice_1, slice_2)| { #[allow(clippy::incompatible_msrv)] (slice_1.as_flattened(), slice_2.as_flattened()) })?; // Extract the `GspMsgElement`. let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?; dev_dbg!( self.dev, "GSP RPC: receive: seq# {}, function={:?}, length=0x{:x}\n", header.sequence(), header.function(), header.length(), ); // Check that the driver read area is large enough for the message. if slice_1.len() + slice_2.len() < header.length() { return Err(EIO); } // Cut the message slices down to the actual length of the message. let (slice_1, slice_2) = if slice_1.len() > header.length() { // PANIC: we checked above that `slice_1` is at least as long as `msg_header.length()`. (slice_1.split_at(header.length()).0, &slice_2[0..0]) } else { ( slice_1, // PANIC: we checked above that `slice_1.len() + slice_2.len()` is at least as // large as `msg_header.length()`. slice_2.split_at(header.length() - slice_1.len()).0, ) }; // Validate checksum. if Cmdq::calculate_checksum(SBufferIter::new_reader([ header.as_bytes(), slice_1, slice_2, ])) != 0 { dev_err!( self.dev, "GSP RPC: receive: Call {} - bad checksum", header.sequence() ); return Err(EIO); } Ok(GspMessage { header, contents: (slice_1, slice_2), }) } /// Receive a message from the GSP. /// /// `init` is a closure tasked with processing the message. It receives a reference to the /// message in the message queue, and a [`SBufferIter`] pointing to its variable-length /// payload, if any. /// /// The expected message is specified using the `M` generic parameter. If the pending message /// is different, `EAGAIN` is returned and the unexpected message is dropped. /// /// This design is by no means final, but it is simple and will let us go through GSP /// initialization. /// /// # Errors /// /// - `ETIMEDOUT` if `timeout` has elapsed before any message becomes available. /// - `EIO` if there was some inconsistency (e.g. message shorter than advertised) on the /// message queue. /// - `EINVAL` if the function of the message was unrecognized. pub(crate) fn receive_msg(&mut self, timeout: Delta) -> Result where // This allows all error types, including `Infallible`, to be used for `M::InitError`. Error: From, { let message = self.wait_for_msg(timeout)?; let function = message.header.function().map_err(|_| EINVAL)?; // Extract the message. Store the result as we want to advance the read pointer even in // case of failure. let result = if function == M::FUNCTION { let (cmd, contents_1) = M::Message::from_bytes_prefix(message.contents.0).ok_or(EIO)?; let mut sbuffer = SBufferIter::new_reader([contents_1, message.contents.1]); M::read(cmd, &mut sbuffer).map_err(|e| e.into()) } else { Err(ERANGE) }; // Advance the read pointer past this message. self.gsp_mem.advance_cpu_read_ptr(u32::try_from( message.header.length().div_ceil(GSP_PAGE_SIZE), )?); result } /// Returns the DMA handle of the command queue's shared memory region. pub(crate) fn dma_handle(&self) -> DmaAddress { self.gsp_mem.0.dma_handle() } }