diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs index 0121b38c59e6..6ba6bdf143cb 100644 --- a/rust/kernel/fs.rs +++ b/rust/kernel/fs.rs @@ -6,3 +6,6 @@ pub mod file; pub use self::file::{File, LocalFile}; + +mod kiocb; +pub use self::kiocb::Kiocb; diff --git a/rust/kernel/fs/kiocb.rs b/rust/kernel/fs/kiocb.rs new file mode 100644 index 000000000000..84c936cd69b0 --- /dev/null +++ b/rust/kernel/fs/kiocb.rs @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Kernel IO callbacks. +//! +//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) + +use core::marker::PhantomData; +use core::ptr::NonNull; +use kernel::types::ForeignOwnable; + +/// Wrapper for the kernel's `struct kiocb`. +/// +/// Currently this abstractions is incomplete and is essentially just a tuple containing a +/// reference to a file and a file position. +/// +/// The type `T` represents the filesystem or driver specific data associated with the file. +/// +/// # Invariants +/// +/// `inner` points at a valid `struct kiocb` whose file has the type `T` as its private data. +pub struct Kiocb<'a, T> { + inner: NonNull, + _phantom: PhantomData<&'a T>, +} + +impl<'a, T: ForeignOwnable> Kiocb<'a, T> { + /// Create a `Kiocb` from a raw pointer. + /// + /// # Safety + /// + /// The pointer must reference a valid `struct kiocb` for the duration of `'a`. The private + /// data of the file must be `T`. + pub unsafe fn from_raw(kiocb: *mut bindings::kiocb) -> Self { + Self { + // SAFETY: If a pointer is valid it is not null. + inner: unsafe { NonNull::new_unchecked(kiocb) }, + _phantom: PhantomData, + } + } + + /// Access the underlying `struct kiocb` directly. + pub fn as_raw(&self) -> *mut bindings::kiocb { + self.inner.as_ptr() + } + + /// Get the filesystem or driver specific data associated with the file. + pub fn file(&self) -> ::Borrowed<'a> { + // SAFETY: We have shared access to this kiocb and hence the underlying file, so we can + // read the file's private data. + let private = unsafe { (*(*self.as_raw()).ki_filp).private_data }; + // SAFETY: The kiocb has shared access to the private data. + unsafe { ::borrow(private) } + } + + /// Gets the current value of `ki_pos`. + pub fn ki_pos(&self) -> i64 { + // SAFETY: We have shared access to the kiocb, so we can read its `ki_pos` field. + unsafe { (*self.as_raw()).ki_pos } + } + + /// Gets a mutable reference to the `ki_pos` field. + pub fn ki_pos_mut(&mut self) -> &mut i64 { + // SAFETY: We have exclusive access to the kiocb, so we can write to `ki_pos`. + unsafe { &mut (*self.as_raw()).ki_pos } + } +} diff --git a/rust/kernel/iov.rs b/rust/kernel/iov.rs new file mode 100644 index 000000000000..43bae8923c46 --- /dev/null +++ b/rust/kernel/iov.rs @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2025 Google LLC. + +//! IO vectors. +//! +//! C headers: [`include/linux/iov_iter.h`](srctree/include/linux/iov_iter.h), +//! [`include/linux/uio.h`](srctree/include/linux/uio.h) + +use crate::{ + alloc::{Allocator, Flags}, + bindings, + prelude::*, + types::Opaque, +}; +use core::{marker::PhantomData, mem::MaybeUninit, ptr, slice}; + +const ITER_SOURCE: bool = bindings::ITER_SOURCE != 0; +const ITER_DEST: bool = bindings::ITER_DEST != 0; + +// Compile-time assertion for the above constants. +const _: () = { + build_assert!( + ITER_SOURCE != ITER_DEST, + "ITER_DEST and ITER_SOURCE should be different." + ); +}; + +/// An IO vector that acts as a source of data. +/// +/// The data may come from many different sources. This includes both things in kernel-space and +/// reading from userspace. It's not necessarily the case that the data source is immutable, so +/// rewinding the IO vector to read the same data twice is not guaranteed to result in the same +/// bytes. It's also possible that the data source is mapped in a thread-local manner using e.g. +/// `kmap_local_page()`, so this type is not `Send` to ensure that the mapping is read from the +/// right context in that scenario. +/// +/// # Invariants +/// +/// Must hold a valid `struct iov_iter` with `data_source` set to `ITER_SOURCE`. For the duration +/// of `'data`, it must be safe to read from this IO vector using the standard C methods for this +/// purpose. +#[repr(transparent)] +pub struct IovIterSource<'data> { + iov: Opaque, + /// Represent to the type system that this value contains a pointer to readable data it does + /// not own. + _source: PhantomData<&'data [u8]>, +} + +impl<'data> IovIterSource<'data> { + /// Obtain an `IovIterSource` from a raw pointer. + /// + /// # Safety + /// + /// * The referenced `struct iov_iter` must be valid and must only be accessed through the + /// returned reference for the duration of `'iov`. + /// * The referenced `struct iov_iter` must have `data_source` set to `ITER_SOURCE`. + /// * For the duration of `'data`, it must be safe to read from this IO vector using the + /// standard C methods for this purpose. + #[track_caller] + #[inline] + pub unsafe fn from_raw<'iov>(ptr: *mut bindings::iov_iter) -> &'iov mut IovIterSource<'data> { + // SAFETY: The caller ensures that `ptr` is valid. + let data_source = unsafe { (*ptr).data_source }; + assert_eq!(data_source, ITER_SOURCE); + + // SAFETY: The caller ensures the type invariants for the right durations, and + // `IovIterSource` is layout compatible with `struct iov_iter`. + unsafe { &mut *ptr.cast::>() } + } + + /// Access this as a raw `struct iov_iter`. + #[inline] + pub fn as_raw(&mut self) -> *mut bindings::iov_iter { + self.iov.get() + } + + /// Returns the number of bytes available in this IO vector. + /// + /// Note that this may overestimate the number of bytes. For example, reading from userspace + /// memory could fail with `EFAULT`, which will be treated as the end of the IO vector. + #[inline] + pub fn len(&self) -> usize { + // SAFETY: We have shared access to this IO vector, so we can read its `count` field. + unsafe { + (*self.iov.get()) + .__bindgen_anon_1 + .__bindgen_anon_1 + .as_ref() + .count + } + } + + /// Returns whether there are any bytes left in this IO vector. + /// + /// This may return `true` even if there are no more bytes available. For example, reading from + /// userspace memory could fail with `EFAULT`, which will be treated as the end of the IO vector. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Advance this IO vector by `bytes` bytes. + /// + /// If `bytes` is larger than the size of this IO vector, it is advanced to the end. + #[inline] + pub fn advance(&mut self, bytes: usize) { + // SAFETY: By the type invariants, `self.iov` is a valid IO vector. + unsafe { bindings::iov_iter_advance(self.as_raw(), bytes) }; + } + + /// Advance this IO vector backwards by `bytes` bytes. + /// + /// # Safety + /// + /// The IO vector must not be reverted to before its beginning. + #[inline] + pub unsafe fn revert(&mut self, bytes: usize) { + // SAFETY: By the type invariants, `self.iov` is a valid IO vector, and the caller + // ensures that `bytes` is in bounds. + unsafe { bindings::iov_iter_revert(self.as_raw(), bytes) }; + } + + /// Read data from this IO vector. + /// + /// Returns the number of bytes that have been copied. + #[inline] + pub fn copy_from_iter(&mut self, out: &mut [u8]) -> usize { + // SAFETY: `Self::copy_from_iter_raw` guarantees that it will not write any uninitialized + // bytes in the provided buffer, so `out` is still a valid `u8` slice after this call. + let out = unsafe { &mut *(ptr::from_mut(out) as *mut [MaybeUninit]) }; + + self.copy_from_iter_raw(out).len() + } + + /// Read data from this IO vector and append it to a vector. + /// + /// Returns the number of bytes that have been copied. + #[inline] + pub fn copy_from_iter_vec( + &mut self, + out: &mut Vec, + flags: Flags, + ) -> Result { + out.reserve(self.len(), flags)?; + let len = self.copy_from_iter_raw(out.spare_capacity_mut()).len(); + // SAFETY: + // - `len` is the length of a subslice of the spare capacity, so `len` is at most the + // length of the spare capacity. + // - `Self::copy_from_iter_raw` guarantees that the first `len` bytes of the spare capacity + // have been initialized. + unsafe { out.inc_len(len) }; + Ok(len) + } + + /// Read data from this IO vector into potentially uninitialized memory. + /// + /// Returns the sub-slice of the output that has been initialized. If the returned slice is + /// shorter than the input buffer, then the entire IO vector has been read. + /// + /// This will never write uninitialized bytes to the provided buffer. + #[inline] + pub fn copy_from_iter_raw(&mut self, out: &mut [MaybeUninit]) -> &mut [u8] { + let capacity = out.len(); + let out = out.as_mut_ptr().cast::(); + + // GUARANTEES: The C API guarantees that it does not write uninitialized bytes to the + // provided buffer. + // SAFETY: + // * By the type invariants, it is still valid to read from this IO vector. + // * `out` is valid for writing for `capacity` bytes because it comes from a slice of + // that length. + let len = unsafe { bindings::_copy_from_iter(out.cast(), capacity, self.as_raw()) }; + + // SAFETY: The underlying C api guarantees that initialized bytes have been written to the + // first `len` bytes of the spare capacity. + unsafe { slice::from_raw_parts_mut(out, len) } + } +} + +/// An IO vector that acts as a destination for data. +/// +/// IO vectors support many different types of destinations. This includes both buffers in +/// kernel-space and writing to userspace. It's possible that the destination buffer is mapped in a +/// thread-local manner using e.g. `kmap_local_page()`, so this type is not `Send` to ensure that +/// the mapping is written to the right context in that scenario. +/// +/// # Invariants +/// +/// Must hold a valid `struct iov_iter` with `data_source` set to `ITER_DEST`. For the duration of +/// `'data`, it must be safe to write to this IO vector using the standard C methods for this +/// purpose. +#[repr(transparent)] +pub struct IovIterDest<'data> { + iov: Opaque, + /// Represent to the type system that this value contains a pointer to writable data it does + /// not own. + _source: PhantomData<&'data mut [u8]>, +} + +impl<'data> IovIterDest<'data> { + /// Obtain an `IovIterDest` from a raw pointer. + /// + /// # Safety + /// + /// * The referenced `struct iov_iter` must be valid and must only be accessed through the + /// returned reference for the duration of `'iov`. + /// * The referenced `struct iov_iter` must have `data_source` set to `ITER_DEST`. + /// * For the duration of `'data`, it must be safe to write to this IO vector using the + /// standard C methods for this purpose. + #[track_caller] + #[inline] + pub unsafe fn from_raw<'iov>(ptr: *mut bindings::iov_iter) -> &'iov mut IovIterDest<'data> { + // SAFETY: The caller ensures that `ptr` is valid. + let data_source = unsafe { (*ptr).data_source }; + assert_eq!(data_source, ITER_DEST); + + // SAFETY: The caller ensures the type invariants for the right durations, and + // `IovIterSource` is layout compatible with `struct iov_iter`. + unsafe { &mut *ptr.cast::>() } + } + + /// Access this as a raw `struct iov_iter`. + #[inline] + pub fn as_raw(&mut self) -> *mut bindings::iov_iter { + self.iov.get() + } + + /// Returns the number of bytes available in this IO vector. + /// + /// Note that this may overestimate the number of bytes. For example, reading from userspace + /// memory could fail with EFAULT, which will be treated as the end of the IO vector. + #[inline] + pub fn len(&self) -> usize { + // SAFETY: We have shared access to this IO vector, so we can read its `count` field. + unsafe { + (*self.iov.get()) + .__bindgen_anon_1 + .__bindgen_anon_1 + .as_ref() + .count + } + } + + /// Returns whether there are any bytes left in this IO vector. + /// + /// This may return `true` even if there are no more bytes available. For example, reading from + /// userspace memory could fail with EFAULT, which will be treated as the end of the IO vector. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Advance this IO vector by `bytes` bytes. + /// + /// If `bytes` is larger than the size of this IO vector, it is advanced to the end. + #[inline] + pub fn advance(&mut self, bytes: usize) { + // SAFETY: By the type invariants, `self.iov` is a valid IO vector. + unsafe { bindings::iov_iter_advance(self.as_raw(), bytes) }; + } + + /// Advance this IO vector backwards by `bytes` bytes. + /// + /// # Safety + /// + /// The IO vector must not be reverted to before its beginning. + #[inline] + pub unsafe fn revert(&mut self, bytes: usize) { + // SAFETY: By the type invariants, `self.iov` is a valid IO vector, and the caller + // ensures that `bytes` is in bounds. + unsafe { bindings::iov_iter_revert(self.as_raw(), bytes) }; + } + + /// Write data to this IO vector. + /// + /// Returns the number of bytes that were written. If this is shorter than the provided slice, + /// then no more bytes can be written. + #[inline] + pub fn copy_to_iter(&mut self, input: &[u8]) -> usize { + // SAFETY: + // * By the type invariants, it is still valid to write to this IO vector. + // * `input` is valid for `input.len()` bytes. + unsafe { bindings::_copy_to_iter(input.as_ptr().cast(), input.len(), self.as_raw()) } + } + + /// Utility for implementing `read_iter` given the full contents of the file. + /// + /// The full contents of the file being read from is represented by `contents`. This call will + /// write the appropriate sub-slice of `contents` and update the file position in `ppos` so + /// that the file will appear to contain `contents` even if takes multiple reads to read the + /// entire file. + #[inline] + pub fn simple_read_from_buffer(&mut self, ppos: &mut i64, contents: &[u8]) -> Result { + if *ppos < 0 { + return Err(EINVAL); + } + let Ok(pos) = usize::try_from(*ppos) else { + return Ok(0); + }; + if pos >= contents.len() { + return Ok(0); + } + + // BOUNDS: We just checked that `pos < contents.len()` above. + let num_written = self.copy_to_iter(&contents[pos..]); + + // OVERFLOW: `pos+num_written <= contents.len() <= isize::MAX <= i64::MAX`. + *ppos = (pos + num_written) as i64; + + Ok(num_written) + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index ed53169e795c..99dbb7b2812e 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -92,6 +92,7 @@ pub mod init; pub mod io; pub mod ioctl; +pub mod iov; pub mod jump_label; #[cfg(CONFIG_KUNIT)] pub mod kunit; diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index 6373fe183b27..35630fc63875 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -13,7 +13,8 @@ device::Device, error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR}, ffi::{c_int, c_long, c_uint, c_ulong}, - fs::File, + fs::{File, Kiocb}, + iov::{IovIterDest, IovIterSource}, mm::virt::VmaNew, prelude::*, seq_file::SeqFile, @@ -141,6 +142,16 @@ fn mmap( build_error!(VTABLE_DEFAULT_ERROR) } + /// Read from this miscdevice. + fn read_iter(_kiocb: Kiocb<'_, Self::Ptr>, _iov: &mut IovIterDest<'_>) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + + /// Write to this miscdevice. + fn write_iter(_kiocb: Kiocb<'_, Self::Ptr>, _iov: &mut IovIterSource<'_>) -> Result { + build_error!(VTABLE_DEFAULT_ERROR) + } + /// Handler for ioctls. /// /// The `cmd` argument is usually manipulated using the utilities in [`kernel::ioctl`]. @@ -245,6 +256,46 @@ impl MiscdeviceVTable { 0 } + /// # Safety + /// + /// `kiocb` must be correspond to a valid file that is associated with a + /// `MiscDeviceRegistration`. `iter` must be a valid `struct iov_iter` for writing. + unsafe extern "C" fn read_iter( + kiocb: *mut bindings::kiocb, + iter: *mut bindings::iov_iter, + ) -> isize { + // SAFETY: The caller provides a valid `struct kiocb` associated with a + // `MiscDeviceRegistration` file. + let kiocb = unsafe { Kiocb::from_raw(kiocb) }; + // SAFETY: This is a valid `struct iov_iter` for writing. + let iov = unsafe { IovIterDest::from_raw(iter) }; + + match T::read_iter(kiocb, iov) { + Ok(res) => res as isize, + Err(err) => err.to_errno() as isize, + } + } + + /// # Safety + /// + /// `kiocb` must be correspond to a valid file that is associated with a + /// `MiscDeviceRegistration`. `iter` must be a valid `struct iov_iter` for writing. + unsafe extern "C" fn write_iter( + kiocb: *mut bindings::kiocb, + iter: *mut bindings::iov_iter, + ) -> isize { + // SAFETY: The caller provides a valid `struct kiocb` associated with a + // `MiscDeviceRegistration` file. + let kiocb = unsafe { Kiocb::from_raw(kiocb) }; + // SAFETY: This is a valid `struct iov_iter` for reading. + let iov = unsafe { IovIterSource::from_raw(iter) }; + + match T::write_iter(kiocb, iov) { + Ok(res) => res as isize, + Err(err) => err.to_errno() as isize, + } + } + /// # Safety /// /// `file` must be a valid file that is associated with a `MiscDeviceRegistration`. @@ -341,6 +392,16 @@ impl MiscdeviceVTable { open: Some(Self::open), release: Some(Self::release), mmap: if T::HAS_MMAP { Some(Self::mmap) } else { None }, + read_iter: if T::HAS_READ_ITER { + Some(Self::read_iter) + } else { + None + }, + write_iter: if T::HAS_WRITE_ITER { + Some(Self::write_iter) + } else { + None + }, unlocked_ioctl: if T::HAS_IOCTL { Some(Self::ioctl) } else { diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs index 911b9320d3c0..d69bc33dbd99 100644 --- a/samples/rust/rust_misc_device.rs +++ b/samples/rust/rust_misc_device.rs @@ -100,8 +100,9 @@ use kernel::{ c_str, device::Device, - fs::File, + fs::{File, Kiocb}, ioctl::{_IO, _IOC_SIZE, _IOR, _IOW}, + iov::{IovIterDest, IovIterSource}, miscdevice::{MiscDevice, MiscDeviceOptions, MiscDeviceRegistration}, new_mutex, prelude::*, @@ -143,6 +144,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit { struct Inner { value: i32, + buffer: KVVec, } #[pin_data(PinnedDrop)] @@ -164,7 +166,10 @@ fn open(_file: &File, misc: &MiscDeviceRegistration) -> Result) -> Result, iov: &mut IovIterDest<'_>) -> Result { + let me = kiocb.file(); + dev_info!(me.dev, "Reading from Rust Misc Device Sample\n"); + + let inner = me.inner.lock(); + // Read the buffer contents, taking the file position into account. + let read = iov.simple_read_from_buffer(kiocb.ki_pos_mut(), &inner.buffer)?; + + Ok(read) + } + + fn write_iter(mut kiocb: Kiocb<'_, Self::Ptr>, iov: &mut IovIterSource<'_>) -> Result { + let me = kiocb.file(); + dev_info!(me.dev, "Writing to Rust Misc Device Sample\n"); + + let mut inner = me.inner.lock(); + + // Replace buffer contents. + inner.buffer.clear(); + let len = iov.copy_from_iter_vec(&mut inner.buffer, GFP_KERNEL)?; + + // Set position to zero so that future `read` calls will see the new contents. + *kiocb.ki_pos_mut() = 0; + + Ok(len) + } + fn ioctl(me: Pin<&RustMiscDevice>, _file: &File, cmd: u32, arg: usize) -> Result { dev_info!(me.dev, "IOCTLing Rust Misc Device Sample\n");