From 0892507f4a0b76eb897afc2bacca85e172512379 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Oct 2025 12:29:09 +0300 Subject: [PATCH 001/305] mfd: ls2kbmc: Fix an IS_ERR() vs NULL check in probe() The devm_kzalloc() function returns NULL on error so check for that instead of error pointers. Fixes: d952bba3fbb5 ("mfd: ls2kbmc: Add Loongson-2K BMC reset function support") Signed-off-by: Dan Carpenter Message-ID: Signed-off-by: Corey Minyard --- drivers/mfd/ls2k-bmc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/ls2k-bmc-core.c b/drivers/mfd/ls2k-bmc-core.c index e162b3c7c9f8..5f38514fa89e 100644 --- a/drivers/mfd/ls2k-bmc-core.c +++ b/drivers/mfd/ls2k-bmc-core.c @@ -469,7 +469,7 @@ static int ls2k_bmc_probe(struct pci_dev *dev, const struct pci_device_id *id) return ret; ddata = devm_kzalloc(&dev->dev, sizeof(*ddata), GFP_KERNEL); - if (IS_ERR(ddata)) { + if (!ddata) { ret = -ENOMEM; goto disable_pci; } From 4af66c2bcab06e6e515b23139122e745d7619680 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Oct 2025 12:29:18 +0300 Subject: [PATCH 002/305] mfd: ls2kbmc: check for devm_mfd_add_devices() failure Call pci_disable_device() if devm_mfd_add_devices() fails. Fixes: 0d64f6d1ffe9 ("mfd: ls2kbmc: Introduce Loongson-2K BMC core driver") Signed-off-by: Dan Carpenter Message-ID: Signed-off-by: Corey Minyard --- drivers/mfd/ls2k-bmc-core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/ls2k-bmc-core.c b/drivers/mfd/ls2k-bmc-core.c index 5f38514fa89e..69387dad6661 100644 --- a/drivers/mfd/ls2k-bmc-core.c +++ b/drivers/mfd/ls2k-bmc-core.c @@ -495,9 +495,13 @@ static int ls2k_bmc_probe(struct pci_dev *dev, const struct pci_device_id *id) goto disable_pci; } - return devm_mfd_add_devices(&dev->dev, PLATFORM_DEVID_AUTO, - ls2k_bmc_cells, ARRAY_SIZE(ls2k_bmc_cells), - &dev->resource[0], 0, NULL); + ret = devm_mfd_add_devices(&dev->dev, PLATFORM_DEVID_AUTO, + ls2k_bmc_cells, ARRAY_SIZE(ls2k_bmc_cells), + &dev->resource[0], 0, NULL); + if (ret) + goto disable_pci; + + return 0; disable_pci: pci_disable_device(dev); From 15623c860c93aac71d22e7bedb7661ff2d3418de Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Mon, 29 Sep 2025 11:02:05 +0200 Subject: [PATCH 003/305] nsfs: handle inode number mismatches gracefully in file handles Replace VFS_WARN_ON_ONCE() with graceful error handling when file handles contain inode numbers that don't match the actual namespace inode. This prevents userspace from triggering kernel warnings by providing malformed file handles to open_by_handle_at(). The issue occurs when userspace provides a file handle with valid namespace type and ID that successfully locates a namespace, but specifies an incorrect inode number. Previously, this would trigger VFS_WARN_ON_ONCE() when comparing the real inode number against the provided value. Since file handle data is user-controllable, inode number mismatches should be treated as invalid input rather than kernel consistency errors. Handle this case by returning NULL to indicate the file handle is invalid, rather than warning about what is essentially user input validation. Reported-by: syzbot+9eefe09bedd093f156c2@syzkaller.appspotmail.com Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Deepanshu Kartikey Signed-off-by: Christian Brauner --- fs/nsfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index 648dc59bef7f..79b026a36fb6 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -490,7 +490,9 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh, VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id); VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type); - VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum); + + if (ns->inum != fid->ns_inum) + return NULL; if (!__ns_ref_get(ns)) return NULL; From deafd21efdd106f9744e2339e0c70c0f4ba565c3 Mon Sep 17 00:00:00 2001 From: Zhou Yuhang Date: Wed, 24 Sep 2025 20:21:39 +0800 Subject: [PATCH 004/305] fs: update comment in init_file() The f_count member in struct file has been replaced by f_ref, so update f_count to f_ref in the comment. Signed-off-by: Zhou Yuhang Signed-off-by: Christian Brauner --- fs/file_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/file_table.c b/fs/file_table.c index b223d873e48b..cd4a3db4659a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -192,7 +192,7 @@ static int init_file(struct file *f, int flags, const struct cred *cred) f->f_sb_err = 0; /* - * We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While + * We're SLAB_TYPESAFE_BY_RCU so initialize f_ref last. While * fget-rcu pattern users need to be able to handle spurious * refcount bumps we should reinitialize the reused file first. */ From d68a29a6a229f8b4f3b19dbcd0bb02881316d642 Mon Sep 17 00:00:00 2001 From: Tong Li Date: Tue, 30 Sep 2025 19:02:58 +0800 Subject: [PATCH 005/305] rust: file: add intra-doc link for 'EBADF' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `BadFdError` doc comment mentions the `EBADF` constant but does not currently provide a navigation target for readers of the generated docs. Turning the references into intra-doc links matches the rest of the module and makes the documentation easier to explore. Suggested-by: Onur Özkan Link: https://github.com/Rust-for-Linux/linux/issues/1186 Signed-off-by: Tong Li Reviewed-by: Onur Özkan Signed-off-by: Christian Brauner --- rust/kernel/fs/file.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index cf06e73a6da0..cd6987850332 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -448,9 +448,9 @@ fn drop(&mut self) { } } -/// Represents the `EBADF` error code. +/// Represents the [`EBADF`] error code. /// -/// Used for methods that can only fail with `EBADF`. +/// Used for methods that can only fail with [`EBADF`]. #[derive(Copy, Clone, Eq, PartialEq)] pub struct BadFdError; From 154d1e7ad9e5ce4b2aaefd3862b3dba545ad978d Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Tue, 30 Sep 2025 13:42:57 +0800 Subject: [PATCH 006/305] dax: skip read lock assertion for read-only filesystems The commit 168316db3583("dax: assert that i_rwsem is held exclusive for writes") added lock assertions to ensure proper locking in DAX operations. However, these assertions trigger false-positive lockdep warnings since read lock is unnecessary on read-only filesystems(e.g., erofs). This patch skips the read lock assertion for read-only filesystems, eliminating the spurious warnings while maintaining the integrity checks for writable filesystems. Fixes: 168316db3583 ("dax: assert that i_rwsem is held exclusive for writes") Signed-off-by: Yuezhang Mo Reviewed-by: Friendy Su Reviewed-by: Daniel Palmer Reviewed-by: Gao Xiang Signed-off-by: Christian Brauner --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 89f071ba7b10..516f995a988c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1725,7 +1725,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) { lockdep_assert_held_write(&iomi.inode->i_rwsem); iomi.flags |= IOMAP_WRITE; - } else { + } else if (!sb_rdonly(iomi.inode->i_sb)) { lockdep_assert_held(&iomi.inode->i_rwsem); } From 56094ad3eaa21e6621396cc33811d8f72847a834 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Oct 2025 17:55:07 +0200 Subject: [PATCH 007/305] vfs: Don't leak disconnected dentries on umount When user calls open_by_handle_at() on some inode that is not cached, we will create disconnected dentry for it. If such dentry is a directory, exportfs_decode_fh_raw() will then try to connect this dentry to the dentry tree through reconnect_path(). It may happen for various reasons (such as corrupted fs or race with rename) that the call to lookup_one_unlocked() in reconnect_one() will fail to find the dentry we are trying to reconnect and instead create a new dentry under the parent. Now this dentry will not be marked as disconnected although the parent still may well be disconnected (at least in case this inconsistency happened because the fs is corrupted and .. doesn't point to the real parent directory). This creates inconsistency in disconnected flags but AFAICS it was mostly harmless. At least until commit f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") which removed adding of most disconnected dentries to sb->s_anon list. Thus after this commit cleanup of disconnected dentries implicitely relies on the fact that dput() will immediately reclaim such dentries. However when some leaf dentry isn't marked as disconnected, as in the scenario described above, the reclaim doesn't happen and the dentries are "leaked". Memory reclaim can eventually reclaim them but otherwise they stay in memory and if umount comes first, we hit infamous "Busy inodes after unmount" bug. Make sure all dentries created under a disconnected parent are marked as disconnected as well. Reported-by: syzbot+1d79ebe5383fc016cf07@syzkaller.appspotmail.com Fixes: f1ee616214cb ("VFS: don't keep disconnected dentries on d_anon") CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Christian Brauner --- fs/dcache.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index a067fa0a965a..035cccbc9276 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2557,6 +2557,8 @@ struct dentry *d_alloc_parallel(struct dentry *parent, spin_lock(&parent->d_lock); new->d_parent = dget_dlock(parent); hlist_add_head(&new->d_sib, &parent->d_children); + if (parent->d_flags & DCACHE_DISCONNECTED) + new->d_flags |= DCACHE_DISCONNECTED; spin_unlock(&parent->d_lock); retry: From a779e27f24aeb679969ddd1fdd7f636e22ddbc1e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 7 Oct 2025 11:32:42 +0200 Subject: [PATCH 008/305] coredump: fix core_pattern input validation In be1e0283021e ("coredump: don't pointlessly check and spew warnings") we tried to fix input validation so it only happens during a write to core_pattern. This would avoid needlessly logging a lot of warnings during a read operation. However the logic accidently got inverted in this commit. Fix it so the input validation only happens on write and is skipped on read. Fixes: be1e0283021e ("coredump: don't pointlessly check and spew warnings") Fixes: 16195d2c7dd2 ("coredump: validate socket name as it is written") Reviewed-by: Jan Kara Reported-by: Yu Watanabe Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- fs/exec.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index b5fc06a092a4..5c1c381ee380 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1468,7 +1468,7 @@ static int proc_dostring_coredump(const struct ctl_table *table, int write, ssize_t retval; char old_core_pattern[CORENAME_MAX_SIZE]; - if (write) + if (!write) return proc_dostring(table, write, buffer, lenp, ppos); retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE); diff --git a/fs/exec.c b/fs/exec.c index 6b70c6726d31..4298e7e08d5d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2048,7 +2048,7 @@ static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int writ { int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!error && !write) + if (!error && write) validate_coredump_safety(); return error; } From e2c69490dda5d4c9f1bfbb2898989c8f3530e354 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 6 Oct 2025 13:18:57 -0700 Subject: [PATCH 009/305] ipmi: Fix handling of messages with provided receive message pointer Prior to commit b52da4054ee0 ("ipmi: Rework user message limit handling"), i_ipmi_request() used to increase the user reference counter if the receive message is provided by the caller of IPMI API functions. This is no longer the case. However, ipmi_free_recv_msg() is still called and decreases the reference counter. This results in the reference counter reaching zero, the user data pointer is released, and all kinds of interesting crashes are seen. Fix the problem by increasing user reference counter if the receive message has been provided by the caller. Fixes: b52da4054ee0 ("ipmi: Rework user message limit handling") Reported-by: Eric Dumazet Cc: Eric Dumazet Cc: Greg Thelen Signed-off-by: Guenter Roeck Message-ID: <20251006201857.3433837-1-linux@roeck-us.net> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index a0b67a35a5f0..3700ab4eba3e 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2301,8 +2301,11 @@ static int i_ipmi_request(struct ipmi_user *user, if (supplied_recv) { recv_msg = supplied_recv; recv_msg->user = user; - if (user) + if (user) { atomic_inc(&user->nr_msgs); + /* The put happens when the message is freed. */ + kref_get(&user->refcount); + } } else { recv_msg = ipmi_alloc_recv_msg(user); if (IS_ERR(recv_msg)) From 1bcc3f87912779f66cdf1789a066046536ca6ccc Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Wed, 24 Sep 2025 10:42:55 -0700 Subject: [PATCH 010/305] KVM: selftests: Test prefault memory during concurrent memslot removal Expand the prefault memory selftest to add a regression test for a KVM bug where KVM's retry logic would result in (breakable) deadlock due to the memslot deletion waiting on prefaulting to release SRCU, and prefaulting waiting on the memslot to fully disappear (KVM uses a two-step process to delete memslots, and KVM x86 retries page faults if a to-be-deleted, a.k.a. INVALID, memslot is encountered). To exercise concurrent memslot remove, spawn a second thread to initiate memslot removal at roughly the same time as prefaulting. Test memslot removal for all testcases, i.e. don't limit concurrent removal to only the success case. There are essentially three prefault scenarios (so far) that are of interest: 1. Success 2. ENOENT due to no memslot 3. EAGAIN due to INVALID memslot For all intents and purposes, #1 and #2 are mutually exclusive, or rather, easier to test via separate testcases since writing to non-existent memory is trivial. But for #3, making it mutually exclusive with #1 _or_ #2 is actually more complex than testing memslot removal for all scenarios. The only requirement to let memslot removal coexist with other scenarios is a way to guarantee a stable result, e.g. that the "no memslot" test observes ENOENT, not EAGAIN, for the final checks. So, rather than make memslot removal mutually exclusive with the ENOENT scenario, simply restore the memslot and retry prefaulting. For the "no memslot" case, KVM_PRE_FAULT_MEMORY should be idempotent, i.e. should always fail with ENOENT regardless of how many times userspace attempts prefaulting. Pass in both the base GPA and the offset (instead of the "full" GPA) so that the worker can recreate the memslot. Signed-off-by: Yan Zhao Co-developed-by: Sean Christopherson Link: https://lore.kernel.org/r/20250924174255.2141847-1-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/pre_fault_memory_test.c | 131 +++++++++++++++--- 1 file changed, 114 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c index 0350a8896a2f..f04768c1d2e4 100644 --- a/tools/testing/selftests/kvm/pre_fault_memory_test.c +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -10,6 +10,7 @@ #include #include #include +#include /* Arbitrarily chosen values */ #define TEST_SIZE (SZ_2M + PAGE_SIZE) @@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa) GUEST_DONE(); } -static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, - u64 left) +struct slot_worker_data { + struct kvm_vm *vm; + u64 gpa; + uint32_t flags; + bool worker_ready; + bool prefault_ready; + bool recreate_slot; +}; + +static void *delete_slot_worker(void *__data) +{ + struct slot_worker_data *data = __data; + struct kvm_vm *vm = data->vm; + + WRITE_ONCE(data->worker_ready, true); + + while (!READ_ONCE(data->prefault_ready)) + cpu_relax(); + + vm_mem_region_delete(vm, TEST_SLOT); + + while (!READ_ONCE(data->recreate_slot)) + cpu_relax(); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa, + TEST_SLOT, TEST_NPAGES, data->flags); + + return NULL; +} + +static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, + u64 size, u64 expected_left, bool private) { struct kvm_pre_fault_memory range = { - .gpa = gpa, + .gpa = base_gpa + offset, .size = size, .flags = 0, }; - u64 prev; + struct slot_worker_data data = { + .vm = vcpu->vm, + .gpa = base_gpa, + .flags = private ? KVM_MEM_GUEST_MEMFD : 0, + }; + bool slot_recreated = false; + pthread_t slot_worker; int ret, save_errno; + u64 prev; - do { + /* + * Concurrently delete (and recreate) the slot to test KVM's handling + * of a racing memslot deletion with prefaulting. + */ + pthread_create(&slot_worker, NULL, delete_slot_worker, &data); + + while (!READ_ONCE(data.worker_ready)) + cpu_relax(); + + WRITE_ONCE(data.prefault_ready, true); + + for (;;) { prev = range.size; ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); save_errno = errno; @@ -49,18 +98,65 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, "%sexpecting range.size to change on %s", ret < 0 ? "not " : "", ret < 0 ? "failure" : "success"); - } while (ret >= 0 ? range.size : save_errno == EINTR); - TEST_ASSERT(range.size == left, - "Completed with %lld bytes left, expected %" PRId64, - range.size, left); + /* + * Immediately retry prefaulting if KVM was interrupted by an + * unrelated signal/event. + */ + if (ret < 0 && save_errno == EINTR) + continue; - if (left == 0) - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + /* + * Tell the worker to recreate the slot in order to complete + * prefaulting (if prefault didn't already succeed before the + * slot was deleted) and/or to prepare for the next testcase. + * Wait for the worker to exit so that the next invocation of + * prefaulting is guaranteed to complete (assuming no KVM bugs). + */ + if (!slot_recreated) { + WRITE_ONCE(data.recreate_slot, true); + pthread_join(slot_worker, NULL); + slot_recreated = true; + + /* + * Retry prefaulting to get a stable result, i.e. to + * avoid seeing random EAGAIN failures. Don't retry if + * prefaulting already succeeded, as KVM disallows + * prefaulting with size=0, i.e. blindly retrying would + * result in test failures due to EINVAL. KVM should + * always return success if all bytes are prefaulted, + * i.e. there is no need to guard against EAGAIN being + * returned. + */ + if (range.size) + continue; + } + + /* + * All done if there are no remaining bytes to prefault, or if + * prefaulting failed (EINTR was handled above, and EAGAIN due + * to prefaulting a memslot that's being actively deleted should + * be impossible since the memslot has already been recreated). + */ + if (!range.size || ret < 0) + break; + } + + TEST_ASSERT(range.size == expected_left, + "Completed with %llu bytes left, expected %lu", + range.size, expected_left); + + /* + * Assert success if prefaulting the entire range should succeed, i.e. + * complete with no bytes remaining. Otherwise prefaulting should have + * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when + * no memslot exists). + */ + if (!expected_left) + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); else - /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */ - __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, - "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, + KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); } static void __test_pre_fault_memory(unsigned long vm_type, bool private) @@ -97,9 +193,10 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private) if (private) vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0); - pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE); + + pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private); + pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); + pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); vcpu_args_set(vcpu, 1, guest_test_virt_mem); vcpu_run(vcpu); From 2a27f6a8fb5722223d526843040f747e9b0e8060 Mon Sep 17 00:00:00 2001 From: Celeste Liu Date: Tue, 30 Sep 2025 19:34:28 +0800 Subject: [PATCH 011/305] can: gs_usb: increase max interface to U8_MAX This issue was found by Runcheng Lu when develop HSCanT USB to CAN FD converter[1]. The original developers may have only 3 interfaces device to test so they write 3 here and wait for future change. During the HSCanT development, we actually used 4 interfaces, so the limitation of 3 is not enough now. But just increase one is not future-proofed. Since the channel index type in gs_host_frame is u8, just make canch[] become a flexible array with a u8 index, so it naturally constraint by U8_MAX and avoid statically allocate 256 pointer for every gs_usb device. [1]: https://github.com/cherry-embedded/HSCanT-hardware Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Reported-by: Runcheng Lu Cc: stable@vger.kernel.org Reviewed-by: Vincent Mailhol Signed-off-by: Celeste Liu Link: https://patch.msgid.link/20250930-gs-usb-max-if-v5-1-863330bf6666@coelacanthus.name Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index c9482d6e947b..9fb4cbbd6d6d 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -289,11 +289,6 @@ struct gs_host_frame { #define GS_MAX_RX_URBS 30 #define GS_NAPI_WEIGHT 32 -/* Maximum number of interfaces the driver supports per device. - * Current hardware only supports 3 interfaces. The future may vary. - */ -#define GS_MAX_INTF 3 - struct gs_tx_context { struct gs_can *dev; unsigned int echo_id; @@ -324,7 +319,6 @@ struct gs_can { /* usb interface struct */ struct gs_usb { - struct gs_can *canch[GS_MAX_INTF]; struct usb_anchor rx_submitted; struct usb_device *udev; @@ -336,9 +330,11 @@ struct gs_usb { unsigned int hf_size_rx; u8 active_channels; + u8 channel_cnt; unsigned int pipe_in; unsigned int pipe_out; + struct gs_can *canch[] __counted_by(channel_cnt); }; /* 'allocate' a tx context. @@ -599,7 +595,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) } /* device reports out of range channel id */ - if (hf->channel >= GS_MAX_INTF) + if (hf->channel >= parent->channel_cnt) goto device_detach; dev = parent->canch[hf->channel]; @@ -699,7 +695,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) /* USB failure take down all interfaces */ if (rc == -ENODEV) { device_detach: - for (rc = 0; rc < GS_MAX_INTF; rc++) { + for (rc = 0; rc < parent->channel_cnt; rc++) { if (parent->canch[rc]) netif_device_detach(parent->canch[rc]->netdev); } @@ -1460,17 +1456,19 @@ static int gs_usb_probe(struct usb_interface *intf, icount = dconf.icount + 1; dev_info(&intf->dev, "Configuring for %u interfaces\n", icount); - if (icount > GS_MAX_INTF) { + if (icount > type_max(parent->channel_cnt)) { dev_err(&intf->dev, "Driver cannot handle more that %u CAN interfaces\n", - GS_MAX_INTF); + type_max(parent->channel_cnt)); return -EINVAL; } - parent = kzalloc(sizeof(*parent), GFP_KERNEL); + parent = kzalloc(struct_size(parent, canch, icount), GFP_KERNEL); if (!parent) return -ENOMEM; + parent->channel_cnt = icount; + init_usb_anchor(&parent->rx_submitted); usb_set_intfdata(intf, parent); @@ -1531,7 +1529,7 @@ static void gs_usb_disconnect(struct usb_interface *intf) return; } - for (i = 0; i < GS_MAX_INTF; i++) + for (i = 0; i < parent->channel_cnt; i++) if (parent->canch[i]) gs_destroy_candev(parent->canch[i]); From a12f0bc764da3781da2019c60826f47a6d7ed64f Mon Sep 17 00:00:00 2001 From: Celeste Liu Date: Tue, 30 Sep 2025 14:53:39 +0800 Subject: [PATCH 012/305] can: gs_usb: gs_make_candev(): populate net_device->dev_port The gs_usb driver supports USB devices with more than 1 CAN channel. In old kernel before 3.15, it uses net_device->dev_id to distinguish different channel in userspace, which was done in commit acff76fa45b4 ("can: gs_usb: gs_make_candev(): set netdev->dev_id"). But since 3.15, the correct way is populating net_device->dev_port. And according to documentation, if network device support multiple interface, lack of net_device->dev_port SHALL be treated as a bug. Fixes: acff76fa45b4 ("can: gs_usb: gs_make_candev(): set netdev->dev_id") Cc: stable@vger.kernel.org Signed-off-by: Celeste Liu Link: https://patch.msgid.link/20250930-gs-usb-populate-net_device-dev_port-v1-1-68a065de6937@coelacanthus.name Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 9fb4cbbd6d6d..69b8d6da651b 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1245,6 +1245,7 @@ static struct gs_can *gs_make_candev(unsigned int channel, netdev->flags |= IFF_ECHO; /* we support full roundtrip echo */ netdev->dev_id = channel; + netdev->dev_port = channel; /* dev setup */ strcpy(dev->bt_const.name, KBUILD_MODNAME); From ba569fb07a7e9e9b71e9282e27e993ba859295c2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 6 Aug 2025 17:46:32 +0200 Subject: [PATCH 013/305] can: m_can: m_can_plat_remove(): add missing pm_runtime_disable() Commit 227619c3ff7c ("can: m_can: move runtime PM enable/disable to m_can_platform") moved the PM runtime enable from the m_can core driver into the m_can_platform. That patch forgot to move the pm_runtime_disable() to m_can_plat_remove(), so that unloading the m_can_platform driver causes an "Unbalanced pm_runtime_enable!" error message. Add the missing pm_runtime_disable() to m_can_plat_remove() to fix the problem. Cc: Patrik Flykt Fixes: 227619c3ff7c ("can: m_can: move runtime PM enable/disable to m_can_platform") Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20250929-m_can-fix-state-handling-v4-1-682b49b49d9a@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index b832566efda0..057eaa7b8b4b 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -180,7 +180,7 @@ static void m_can_plat_remove(struct platform_device *pdev) struct m_can_classdev *mcan_class = &priv->cdev; m_can_class_unregister(mcan_class); - + pm_runtime_disable(mcan_class->dev); m_can_class_free_dev(mcan_class->net); } From 3d9db29b45f970d81acf61cf91a65442efbeb997 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 6 Aug 2025 16:56:15 +0200 Subject: [PATCH 014/305] can: m_can: m_can_handle_state_errors(): fix CAN state transition to Error Active The CAN Error State is determined by the receive and transmit error counters. The CAN error counters decrease when reception/transmission is successful, so that a status transition back to the Error Active status is possible. This transition is not handled by m_can_handle_state_errors(). Add the missing detection of the Error Active state to m_can_handle_state_errors() and extend the handling of this state in m_can_handle_state_change(). Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Fixes: cd0d83eab2e0 ("can: m_can: m_can_handle_state_change(): fix state change") Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20250929-m_can-fix-state-handling-v4-2-682b49b49d9a@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 55 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index e1d725979685..ac864183a536 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -812,6 +812,9 @@ static int m_can_handle_state_change(struct net_device *dev, u32 timestamp = 0; switch (new_state) { + case CAN_STATE_ERROR_ACTIVE: + cdev->can.state = CAN_STATE_ERROR_ACTIVE; + break; case CAN_STATE_ERROR_WARNING: /* error warning state */ cdev->can.can_stats.error_warning++; @@ -841,6 +844,12 @@ static int m_can_handle_state_change(struct net_device *dev, __m_can_get_berr_counter(dev, &bec); switch (new_state) { + case CAN_STATE_ERROR_ACTIVE: + cf->can_id |= CAN_ERR_CRTL | CAN_ERR_CNT; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + break; case CAN_STATE_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL | CAN_ERR_CNT; @@ -877,30 +886,33 @@ static int m_can_handle_state_change(struct net_device *dev, return 1; } -static int m_can_handle_state_errors(struct net_device *dev, u32 psr) +static enum can_state +m_can_state_get_by_psr(struct m_can_classdev *cdev) +{ + u32 reg_psr; + + reg_psr = m_can_read(cdev, M_CAN_PSR); + + if (reg_psr & PSR_BO) + return CAN_STATE_BUS_OFF; + if (reg_psr & PSR_EP) + return CAN_STATE_ERROR_PASSIVE; + if (reg_psr & PSR_EW) + return CAN_STATE_ERROR_WARNING; + + return CAN_STATE_ERROR_ACTIVE; +} + +static int m_can_handle_state_errors(struct net_device *dev) { struct m_can_classdev *cdev = netdev_priv(dev); - int work_done = 0; + enum can_state new_state; - if (psr & PSR_EW && cdev->can.state != CAN_STATE_ERROR_WARNING) { - netdev_dbg(dev, "entered error warning state\n"); - work_done += m_can_handle_state_change(dev, - CAN_STATE_ERROR_WARNING); - } + new_state = m_can_state_get_by_psr(cdev); + if (new_state == cdev->can.state) + return 0; - if (psr & PSR_EP && cdev->can.state != CAN_STATE_ERROR_PASSIVE) { - netdev_dbg(dev, "entered error passive state\n"); - work_done += m_can_handle_state_change(dev, - CAN_STATE_ERROR_PASSIVE); - } - - if (psr & PSR_BO && cdev->can.state != CAN_STATE_BUS_OFF) { - netdev_dbg(dev, "entered error bus off state\n"); - work_done += m_can_handle_state_change(dev, - CAN_STATE_BUS_OFF); - } - - return work_done; + return m_can_handle_state_change(dev, new_state); } static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus) @@ -1031,8 +1043,7 @@ static int m_can_rx_handler(struct net_device *dev, int quota, u32 irqstatus) } if (irqstatus & IR_ERR_STATE) - work_done += m_can_handle_state_errors(dev, - m_can_read(cdev, M_CAN_PSR)); + work_done += m_can_handle_state_errors(dev); if (irqstatus & IR_ERR_BUS_30X) work_done += m_can_handle_bus_errors(dev, irqstatus, From 4942c42fe1849e6d68dfb5b36ccba344a9fac016 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 6 Aug 2025 18:24:12 +0200 Subject: [PATCH 015/305] can: m_can: m_can_chip_config(): bring up interface in correct state In some SoCs (observed on the STM32MP15) the M_CAN IP core keeps the CAN state and CAN error counters over an internal reset cycle. An external reset is not always possible, due to the shared reset with the other CAN core. This caused the core not always be in Error Active state when bringing up the controller. Instead of always setting the CAN state to Error Active in m_can_chip_config(), fix this by reading and decoding the Protocol Status Regitser (PSR) and set the CAN state accordingly. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20250929-m_can-fix-state-handling-v4-3-682b49b49d9a@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index ac864183a536..b6db5b57241c 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1617,7 +1617,7 @@ static int m_can_start(struct net_device *dev) netdev_queue_set_dql_min_limit(netdev_get_tx_queue(cdev->net, 0), cdev->tx_max_coalesced_frames); - cdev->can.state = CAN_STATE_ERROR_ACTIVE; + cdev->can.state = m_can_state_get_by_psr(cdev); m_can_enable_all_interrupts(cdev); From a9e30a22d6f23a2684c248871cad4c3061181639 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 12 Aug 2025 16:58:31 +0200 Subject: [PATCH 016/305] can: m_can: fix CAN state in system PM A suspend/resume cycle on a down interface results in the interface coming up in Error Active state. A suspend/resume cycle on an Up interface will always result in Error Active state, regardless of the actual CAN state. During suspend, only set running interfaces to CAN_STATE_SLEEPING. During resume only touch the CAN state of running interfaces. For wakeup sources, set the CAN state depending on the Protocol Status Regitser (PSR), for non wakeup source interfaces m_can_start() will do the same. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Reviewed-by: Markus Schneider-Pargmann Link: https://patch.msgid.link/20250929-m_can-fix-state-handling-v4-4-682b49b49d9a@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index b6db5b57241c..f2576e577058 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2503,12 +2503,11 @@ int m_can_class_suspend(struct device *dev) } m_can_clk_stop(cdev); + cdev->can.state = CAN_STATE_SLEEPING; } pinctrl_pm_select_sleep_state(dev); - cdev->can.state = CAN_STATE_SLEEPING; - return ret; } EXPORT_SYMBOL_GPL(m_can_class_suspend); @@ -2521,8 +2520,6 @@ int m_can_class_resume(struct device *dev) pinctrl_pm_select_default_state(dev); - cdev->can.state = CAN_STATE_ERROR_ACTIVE; - if (netif_running(ndev)) { ret = m_can_clk_start(cdev); if (ret) @@ -2540,6 +2537,8 @@ int m_can_class_resume(struct device *dev) if (cdev->ops->init) ret = cdev->ops->init(cdev); + cdev->can.state = m_can_state_get_by_psr(cdev); + m_can_write(cdev, M_CAN_IE, cdev->active_interrupts); } else { ret = m_can_start(ndev); From 49836ff2f37dd6d52bfe3153c0bcbd96025a6100 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 9 Oct 2025 08:25:36 +0200 Subject: [PATCH 017/305] can: m_can: replace Dong Aisheng's old email address Dong Aisheng's old Freescale email is not valid anymore and bounces, replace it by the new NXP one. Reviewed-by: Dong Aisheng Link: https://patch.msgid.link/20251009-m_can-update-email-address-v1-1-30a268587f69@pengutronix.de Signed-off-by: Marc Kleine-Budde --- .mailmap | 1 + drivers/net/can/m_can/m_can.c | 4 ++-- drivers/net/can/m_can/m_can_platform.c | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.mailmap b/.mailmap index d30f9363a4c9..8160c62f11e9 100644 --- a/.mailmap +++ b/.mailmap @@ -227,6 +227,7 @@ Dmitry Safonov <0x7f454c46@gmail.com> Dmitry Safonov <0x7f454c46@gmail.com> Dmitry Safonov <0x7f454c46@gmail.com> Domen Puncer +Dong Aisheng Douglas Gilbert Drew Fustini diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index f2576e577058..ad4f577c1ef7 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // CAN bus driver for Bosch M_CAN controller // Copyright (C) 2014 Freescale Semiconductor, Inc. -// Dong Aisheng +// Dong Aisheng // Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/ /* Bosch M_CAN user manual can be obtained from: @@ -2556,7 +2556,7 @@ int m_can_class_resume(struct device *dev) } EXPORT_SYMBOL_GPL(m_can_class_resume); -MODULE_AUTHOR("Dong Aisheng "); +MODULE_AUTHOR("Dong Aisheng "); MODULE_AUTHOR("Dan Murphy "); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("CAN bus driver for Bosch M_CAN controller"); diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c index 057eaa7b8b4b..4a412add2b8d 100644 --- a/drivers/net/can/m_can/m_can_platform.c +++ b/drivers/net/can/m_can/m_can_platform.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // IOMapped CAN bus driver for Bosch M_CAN controller // Copyright (C) 2014 Freescale Semiconductor, Inc. -// Dong Aisheng +// Dong Aisheng // // Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/ @@ -236,7 +236,7 @@ static struct platform_driver m_can_plat_driver = { module_platform_driver(m_can_plat_driver); -MODULE_AUTHOR("Dong Aisheng "); +MODULE_AUTHOR("Dong Aisheng "); MODULE_AUTHOR("Dan Murphy "); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("M_CAN driver for IO Mapped Bosch controllers"); From e07e10ae83bdf429f59c8c149173a8c4f29c481e Mon Sep 17 00:00:00 2001 From: Ketil Johnsen Date: Wed, 8 Oct 2025 12:51:11 +0200 Subject: [PATCH 018/305] drm/panthor: Ensure MCU is disabled on suspend Currently the Panthor driver needs the GPU to be powered down between suspend and resume. If this is not done, then the MCU_CONTROL register will be preserved as AUTO, which again will cause a premature FW boot on resume. The FW will go directly into fatal state in this case. This case needs to be handled as there is no guarantee that the GPU will be powered down after the suspend callback on all platforms. The fix is to call panthor_fw_stop() in "pre-reset" path to ensure the MCU_CONTROL register is cleared (set DISABLE). This matches well with the already existing call to panthor_fw_start() from the "post-reset" path. Signed-off-by: Ketil Johnsen Acked-by: Boris Brezillon Reviewed-by: Steven Price Fixes: 2718d91816ee ("drm/panthor: Add the FW logical block") Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20251008105112.4077015-1-ketil.johnsen@arm.com --- drivers/gpu/drm/panthor/panthor_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index 36f1034839c2..44a995835188 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -1099,6 +1099,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) } panthor_job_irq_suspend(&ptdev->fw->irq); + panthor_fw_stop(ptdev); } /** From bb642e2d300ee27dcede65cda7ffc47a7047bd69 Mon Sep 17 00:00:00 2001 From: Amit Chaudhary Date: Fri, 26 Sep 2025 12:08:22 -0700 Subject: [PATCH 019/305] nvme-multipath: Skip nr_active increments in RETRY disposition For queue-depth I/O policy, this patch fixes unbalanced I/Os across nvme multipaths. Issue Description: The RETRY disposition incorrectly increments ns->ctrl->nr_active counter and reinitializes iostat start-time. In such cases nr_active counter never goes back to zero until that path disconnects and reconnects. Such a path is not chosen for new I/Os if multiple RETRY cases on a given a path cause its queue-depth counter to be artificially higher compared to other paths. This leads to unbalanced I/Os across paths. The patch skips incrementing nr_active if NVME_MPATH_CNT_ACTIVE is already set. And it skips restarting io stats if NVME_MPATH_IO_STATS is already set. base-commit: e989a3da2d371a4b6597ee8dee5c72e407b4db7a Fixes: d4d957b53d91eeb ("nvme-multipath: support io stats on the mpath device") Signed-off-by: Amit Chaudhary Reviewed-by: Randy Jennings Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3da980dc60d9..543e17aead12 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -182,12 +182,14 @@ void nvme_mpath_start_request(struct request *rq) struct nvme_ns *ns = rq->q->queuedata; struct gendisk *disk = ns->head->disk; - if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) { + if ((READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) && + !(nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)) { atomic_inc(&ns->ctrl->nr_active); nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE; } - if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq)) + if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq) || + (nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) return; nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; From 812258ff4166bcd41c7d44707e0591f9ae32ac8c Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 8 Sep 2025 14:12:35 +0100 Subject: [PATCH 020/305] rust: cfi: only 64-bit arm and x86 support CFI_CLANG The kernel uses the standard rustc targets for non-x86 targets, and out of those only 64-bit arm's target has kcfi support enabled. For x86, the custom 64-bit target enables kcfi. The HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC config option that allows CFI_CLANG to be used in combination with RUST does not check whether the rustc target supports kcfi. This breaks the build on riscv (and presumably 32-bit arm) when CFI_CLANG and RUST are enabled at the same time. Ordinarily, a rustc-option check would be used to detect target support but unfortunately rustc-option filters out the target for reasons given in commit 46e24a545cdb4 ("rust: kasan/kbuild: fix missing flags on first build"). As a result, if the host supports kcfi but the target does not, e.g. when building for riscv on x86_64, the build would remain broken. Instead, make HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC depend on the only two architectures where the target used supports it to fix the build. CC: stable@vger.kernel.org Fixes: ca627e636551e ("rust: cfi: add support for CFI_CLANG with Rust") Signed-off-by: Conor Dooley Acked-by: Miguel Ojeda Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250908-distill-lint-1ae78bcf777c@spud Signed-off-by: Paul Walmsley --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index ebe08b9186ad..74ff01133532 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -965,6 +965,7 @@ config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC def_bool y depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS depends on RUSTC_VERSION >= 107900 + depends on ARM64 || X86_64 # With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373 depends on (RUSTC_LLVM_VERSION >= 190103 && RUSTC_VERSION >= 108200) || \ (!GCOV_KERNEL && !KASAN_GENERIC && !KASAN_SW_TAGS) From 781380d2cdef34559a0125ca6464b90bfc01594f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Mon, 15 Sep 2025 16:32:52 +0200 Subject: [PATCH 021/305] riscv: kgdb: Ensure that BUFMAX > NUMREGBYTES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current value of BUFMAX is similar as in other architectures, but as per documentation on KGDB (see 'Documentation/process/debugging/kgdb.rst'), BUFMAX has to be larger than NUMREGBYTES. Some NUMREGBYTES architectures (e.g. powerpc or hexagon) actually define BUFMAX in relation to NUMREGBYTES, and thus this condition is always guaranteed. Since 2048 is a value that is generally accepted on all architectures, and that is larger than the current value of NUMREGBYTES, we can keep this value in arch/riscv, but we can at least add an 'static_assert' as an extra measure just in case NUMREGBYTES changes in the future for some unforseen reason. Signed-off-by: Miquel Sabaté Solà Link: https://lore.kernel.org/r/20250915143252.154955-1-mikisabate@gmail.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/kgdb.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 7559d728c5ff..78b18e2fd771 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -3,14 +3,18 @@ #ifndef __ASM_KGDB_H_ #define __ASM_KGDB_H_ +#include + #ifdef __KERNEL__ #define GDB_SIZEOF_REG sizeof(unsigned long) -#define DBG_MAX_REG_NUM (36) -#define NUMREGBYTES ((DBG_MAX_REG_NUM) * GDB_SIZEOF_REG) +#define DBG_MAX_REG_NUM 36 +#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG) #define CACHE_FLUSH_IS_SAFE 1 #define BUFMAX 2048 +static_assert(BUFMAX > NUMREGBYTES, + "As per KGDB documentation, BUFMAX must be larger than NUMREGBYTES"); #ifdef CONFIG_RISCV_ISA_C #define BREAK_INSTR_SIZE 2 #else @@ -97,6 +101,7 @@ extern unsigned long kgdb_compiled_break; #define DBG_REG_STATUS_OFF 33 #define DBG_REG_BADADDR_OFF 34 #define DBG_REG_CAUSE_OFF 35 +/* NOTE: increase DBG_MAX_REG_NUM if you add more values here. */ extern const char riscv_gdb_stub_feature[64]; From ae9e9f3d67dcef7582a4524047b01e33c5185ddb Mon Sep 17 00:00:00 2001 From: Danil Skrebenkov Date: Fri, 19 Sep 2025 16:28:46 +0300 Subject: [PATCH 022/305] RISC-V: clear hot-unplugged cores from all task mm_cpumasks to avoid rfence errors openSBI v1.7 adds harts checks for ipi operations. Especially it adds comparison between hmask passed as an argument from linux and mask of online harts (from openSBI side). If they don't fit each other the error occurs. When cpu is offline, cpu_online_mask is explicitly cleared in __cpu_disable. However, there is no explicit clearing of mm_cpumask. mm_cpumask is used for rfence operations that call openSBI RFENCE extension which uses ipi to remote harts. If hart is offline there may be error if mask of linux is not as mask of online harts in openSBI. this patch adds explicit clearing of mm_cpumask for offline hart. Signed-off-by: Danil Skrebenkov Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250919132849.31676-1-danil.skrebenkov@cloudbear.ru [pjw@kernel.org: rewrote subject line for clarity] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/cpu-hotplug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index a1e38ecfc8be..3f50d3dd76c6 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -54,6 +54,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) pr_notice("CPU%u: off\n", cpu); + clear_tasks_mm_cpumask(cpu); /* Verify from the firmware if the cpu is really stopped*/ if (cpu_ops->cpu_is_stopped) ret = cpu_ops->cpu_is_stopped(cpu); From c199745d3ac3f836515a5734a6ca5c6f55a8809b Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Mon, 6 Oct 2025 11:37:42 +0200 Subject: [PATCH 023/305] riscv: entry: fix typo in comment 'instruciton' -> 'instruction' Fix a typo in a comment in the RISC-V entry.S. Signed-off-by: Florian Schmaus Link: https://lore.kernel.org/r/20251006093742.53925-1-flo@geekplace.eu [pjw@kernel.org: wrote a basic patch description] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d3d92a4becc7..9b9dec6893b8 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -455,7 +455,7 @@ SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_trap_ecall_s RISCV_PTR do_trap_unknown RISCV_PTR do_trap_ecall_m - /* instruciton page fault */ + /* instruction page fault */ ALT_PAGE_FAULT(RISCV_PTR do_page_fault) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown From 9e68bd803fac49274fde914466fd3b07c4d602c8 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Wed, 10 Sep 2025 17:25:13 +0200 Subject: [PATCH 024/305] riscv: kprobes: Fix probe address validation When adding a kprobe such as "p:probe/tcp_sendmsg _text+15392192", arch_check_kprobe would start iterating all instructions starting from _text until the probed address. Not only is this very inefficient, but literal values in there (e.g. left by function patching) are misinterpreted in a way that causes a desync. Fix this by doing it like x86: start the iteration at the closest preceding symbol instead of the given starting point. Fixes: 87f48c7ccc73 ("riscv: kprobe: Fixup kernel panic when probing an illegal position") Signed-off-by: Fabian Vogt Signed-off-by: Marvin Friedrich Acked-by: Guo Ren Link: https://lore.kernel.org/r/6191817.lOV4Wx5bFT@fvogt-thinkpad Signed-off-by: Paul Walmsley --- arch/riscv/kernel/probes/kprobes.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index c0738d6c6498..8723390c7cad 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -49,10 +49,15 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) post_kprobe_handler(p, kcb, regs); } -static bool __kprobes arch_check_kprobe(struct kprobe *p) +static bool __kprobes arch_check_kprobe(unsigned long addr) { - unsigned long tmp = (unsigned long)p->addr - p->offset; - unsigned long addr = (unsigned long)p->addr; + unsigned long tmp, offset; + + /* start iterating at the closest preceding symbol */ + if (!kallsyms_lookup_size_offset(addr, NULL, &offset)) + return false; + + tmp = addr - offset; while (tmp <= addr) { if (tmp == addr) @@ -71,7 +76,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if ((unsigned long)insn & 0x1) return -EILSEQ; - if (!arch_check_kprobe(p)) + if (!arch_check_kprobe((unsigned long)p->addr)) return -EILSEQ; /* copy instruction */ From 69a8b62a7aa1e54ff7623064f6507fa29c1d0d4e Mon Sep 17 00:00:00 2001 From: Han Gao Date: Wed, 10 Sep 2025 19:24:01 +0800 Subject: [PATCH 025/305] riscv: acpi: avoid errors caused by probing DT devices when ACPI is used Similar to the ARM64 commit 3505f30fb6a9s ("ARM64 / ACPI: If we chose to boot from acpi then disable FDT"), let's not do DT hardware probing if ACPI is enabled in early boot. This avoids errors caused by repeated driver probing. Signed-off-by: Han Gao Link: https://lore.kernel.org/r/20250910112401.552987-1-rabenda.cn@gmail.com [pjw@kernel.org: cleaned up patch description and subject] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/setup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 14235e58c539..b5bc5fc65cea 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -331,11 +331,14 @@ void __init setup_arch(char **cmdline_p) /* Parse the ACPI tables for possible boot-time configuration */ acpi_boot_table_init(); + if (acpi_disabled) { #if IS_ENABLED(CONFIG_BUILTIN_DTB) - unflatten_and_copy_device_tree(); + unflatten_and_copy_device_tree(); #else - unflatten_device_tree(); + unflatten_device_tree(); #endif + } + misc_mem_init(); init_resources(); From 7882d2c45ccba538cddb0615a893a008dd2efcde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 9 Oct 2025 11:18:48 +0200 Subject: [PATCH 026/305] riscv: Respect dependencies of ARCH_HAS_ELF_CORE_EFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This kconfig symbol has dependencies and is only selectable if those dependencies are also enabled. Respect the dependencies. Fixes the following warning when configuring an 'allnoconfig': WARNING: unmet direct dependencies detected for ARCH_HAS_ELF_CORE_EFLAGS Depends on [n]: BINFMT_ELF [=n] && ELF_CORE [=y] Selected by [y]: - RISCV [=y] Fixes: 8c94db0ae97c ("binfmt_elf: preserve original ELF e_flags for core dumps") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20251009-riscv-elf-core-eflags-v1-1-e9b45ab6b36d@linutronix.de Signed-off-by: Paul Walmsley --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0c6038dc5dfd..22cda9c452d2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,7 +29,7 @@ config RISCV select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX - select ARCH_HAS_ELF_CORE_EFLAGS + select ARCH_HAS_ELF_CORE_EFLAGS if BINFMT_ELF && ELF_CORE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL From f3426ac54c42c3260096ddc50b5470eb179fb06a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Oct 2025 16:14:18 +0200 Subject: [PATCH 027/305] dpll: zl3073x: Increase maximum size of flash utility Newer firmware bundles contain a flash utility whose size exceeds the currently allowed limit. Increase the maximum allowed size to accommodate the newer utility version. Without this patch: # devlink dev flash i2c/1-0070 file fw_nosplit_v3.hex Failed to load firmware Flashing failed Error: zl3073x: FW load failed: [utility] component is too big (11000 bytes) Fixes: ca017409da694 ("dpll: zl3073x: Add firmware loading functionality") Suggested-by: Prathosh Satish Signed-off-by: Ivan Vecera Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251008141418.841053-1-ivecera@redhat.com Signed-off-by: Paolo Abeni --- drivers/dpll/zl3073x/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dpll/zl3073x/fw.c b/drivers/dpll/zl3073x/fw.c index d5418ff74886..def37fe8d9b0 100644 --- a/drivers/dpll/zl3073x/fw.c +++ b/drivers/dpll/zl3073x/fw.c @@ -37,7 +37,7 @@ struct zl3073x_fw_component_info { static const struct zl3073x_fw_component_info component_info[] = { [ZL_FW_COMPONENT_UTIL] = { .name = "utility", - .max_size = 0x2300, + .max_size = 0x4000, .load_addr = 0x20000000, .flash_type = ZL3073X_FLASH_TYPE_NONE, }, From 4dd5b5ac089bb6ea719b7ffb748707ac9cbce4e4 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 8 Oct 2025 14:44:17 +0200 Subject: [PATCH 028/305] Revert "fs: make vfs_fileattr_[get|set] return -EOPNOTSUPP" This reverts commit 474b155adf3927d2c944423045757b54aa1ca4de. This patch caused regression in ioctl_setflags(). Underlying filesystems use EOPNOTSUPP to indicate that flag is not supported. This error is also gets converted in ioctl_setflags(). Therefore, for unsupported flags error changed from EOPNOSUPP to ENOIOCTLCMD. Link: https://lore.kernel.org/linux-xfs/a622643f-1585-40b0-9441-cf7ece176e83@kernel.org/ Signed-off-by: Andrey Albershteyn Signed-off-by: Christian Brauner --- fs/file_attr.c | 12 ++---------- fs/fuse/ioctl.c | 4 ---- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/inode.c | 5 ++++- 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/fs/file_attr.c b/fs/file_attr.c index 12424d4945d0..460b2dd21a85 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -84,7 +84,7 @@ int vfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) int error; if (!inode->i_op->fileattr_get) - return -EOPNOTSUPP; + return -ENOIOCTLCMD; error = security_inode_file_getattr(dentry, fa); if (error) @@ -270,7 +270,7 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, int err; if (!inode->i_op->fileattr_set) - return -EOPNOTSUPP; + return -ENOIOCTLCMD; if (!inode_owner_or_capable(idmap, inode)) return -EPERM; @@ -312,8 +312,6 @@ int ioctl_getflags(struct file *file, unsigned int __user *argp) int err; err = vfs_fileattr_get(file->f_path.dentry, &fa); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; if (!err) err = put_user(fa.flags, argp); return err; @@ -335,8 +333,6 @@ int ioctl_setflags(struct file *file, unsigned int __user *argp) fileattr_fill_flags(&fa, flags); err = vfs_fileattr_set(idmap, dentry, &fa); mnt_drop_write_file(file); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; } } return err; @@ -349,8 +345,6 @@ int ioctl_fsgetxattr(struct file *file, void __user *argp) int err; err = vfs_fileattr_get(file->f_path.dentry, &fa); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; if (!err) err = copy_fsxattr_to_user(&fa, argp); @@ -371,8 +365,6 @@ int ioctl_fssetxattr(struct file *file, void __user *argp) if (!err) { err = vfs_fileattr_set(idmap, dentry, &fa); mnt_drop_write_file(file); - if (err == -EOPNOTSUPP) - err = -ENOIOCTLCMD; } } return err; diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 57032eadca6c..fdc175e93f74 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -536,8 +536,6 @@ int fuse_fileattr_get(struct dentry *dentry, struct file_kattr *fa) cleanup: fuse_priv_ioctl_cleanup(inode, ff); - if (err == -ENOTTY) - err = -EOPNOTSUPP; return err; } @@ -574,7 +572,5 @@ int fuse_fileattr_set(struct mnt_idmap *idmap, cleanup: fuse_priv_ioctl_cleanup(inode, ff); - if (err == -ENOTTY) - err = -EOPNOTSUPP; return err; } diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index aac7e34f56c1..604a82acd164 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -178,7 +178,7 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old, err = ovl_real_fileattr_get(old, &oldfa); if (err) { /* Ntfs-3g returns -EINVAL for "no fileattr support" */ - if (err == -EOPNOTSUPP || err == -EINVAL) + if (err == -ENOTTY || err == -EINVAL) return 0; pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", old->dentry, err); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index aaa4cf579561..e11f310ce092 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -720,7 +720,10 @@ int ovl_real_fileattr_get(const struct path *realpath, struct file_kattr *fa) if (err) return err; - return vfs_fileattr_get(realpath->dentry, fa); + err = vfs_fileattr_get(realpath->dentry, fa); + if (err == -ENOIOCTLCMD) + err = -ENOTTY; + return err; } int ovl_fileattr_get(struct dentry *dentry, struct file_kattr *fa) From d90ad28e8aa482e397150e22f3762173d918a724 Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 8 Oct 2025 14:44:18 +0200 Subject: [PATCH 029/305] fs: return EOPNOTSUPP from file_setattr/file_getattr syscalls These syscalls call to vfs_fileattr_get/set functions which return ENOIOCTLCMD if filesystem doesn't support setting file attribute on an inode. For syscalls EOPNOTSUPP would be more appropriate return error. Signed-off-by: Andrey Albershteyn Reviewed-by: Jan Kara Reviewed-by: Arnd Bergmann Signed-off-by: Christian Brauner --- fs/file_attr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/file_attr.c b/fs/file_attr.c index 460b2dd21a85..1dcec88c0680 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -416,6 +416,8 @@ SYSCALL_DEFINE5(file_getattr, int, dfd, const char __user *, filename, } error = vfs_fileattr_get(filepath.dentry, &fa); + if (error == -ENOIOCTLCMD || error == -ENOTTY) + error = -EOPNOTSUPP; if (error) return error; @@ -483,6 +485,8 @@ SYSCALL_DEFINE5(file_setattr, int, dfd, const char __user *, filename, if (!error) { error = vfs_fileattr_set(mnt_idmap(filepath.mnt), filepath.dentry, &fa); + if (error == -ENOIOCTLCMD || error == -ENOTTY) + error = -EOPNOTSUPP; mnt_drop_write(filepath.mnt); } From 7933a585d70ee496fa341b50b8b0a95b131867ff Mon Sep 17 00:00:00 2001 From: Seong-Gwang Heo Date: Thu, 9 Oct 2025 13:41:48 +0800 Subject: [PATCH 030/305] ovl: remove redundant IOCB_DIO_CALLER_COMP clearing The backing_file_write_iter() function, which is called immediately after this code, already contains identical logic to clear the IOCB_DIO_CALLER_COMP flag along with the same explanatory comment. There is no need to duplicate this operation in the overlayfs code. Signed-off-by: Seong-Gwang Heo Fixes: a6293b3e285c ("fs: factor out backing_file_{read,write}_iter() helpers") Acked-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/overlayfs/file.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index fc52c796061d..7ab2c9daffd0 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -369,11 +369,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (!ovl_should_sync(OVL_FS(inode->i_sb))) ifl &= ~(IOCB_DSYNC | IOCB_SYNC); - /* - * Overlayfs doesn't support deferred completions, don't copy - * this property in case it is set by the issuer. - */ - ifl &= ~IOCB_DIO_CALLER_COMP; ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx); out_unlock: From 4b47a8601b71ad98833b447d465592d847b4dc77 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 Sep 2025 11:12:06 -0400 Subject: [PATCH 031/305] NFSD: Define a proc_layoutcommit for the FlexFiles layout type Avoid a crash if a pNFS client should happen to send a LAYOUTCOMMIT operation on a FlexFiles layout. Reported-by: Robert Morris Closes: https://lore.kernel.org/linux-nfs/152f99b2-ba35-4dec-93a9-4690e625dccd@oracle.com/T/#t Cc: Thomas Haynes Cc: stable@vger.kernel.org Fixes: 9b9960a0ca47 ("nfsd: Add a super simple flex file server") Signed-off-by: Chuck Lever --- fs/nfsd/flexfilelayout.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index c318cf74e388..0f1a35400cd5 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -125,6 +125,13 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp, return 0; } +static __be32 +nfsd4_ff_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp, + struct nfsd4_layoutcommit *lcp) +{ + return nfs_ok; +} + const struct nfsd4_layout_ops ff_layout_ops = { .notify_types = NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, @@ -133,4 +140,5 @@ const struct nfsd4_layout_ops ff_layout_ops = { .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, .proc_layoutget = nfsd4_ff_proc_layoutget, .encode_layoutget = nfsd4_ff_encode_layoutget, + .proc_layoutcommit = nfsd4_ff_proc_layoutcommit, }; From 3c652c3a71de1d30d72dc82c3bead8deb48eb749 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 16 Sep 2025 17:33:36 +0800 Subject: [PATCH 032/305] jbd2: ensure that all ongoing I/O complete before freeing blocks When releasing file system metadata blocks in jbd2_journal_forget(), if this buffer has not yet been checkpointed, it may have already been written back, currently be in the process of being written back, or has not yet written back. jbd2_journal_forget() calls jbd2_journal_try_remove_checkpoint() to check the buffer's status and add it to the current transaction if it has not been written back. This buffer can only be reallocated after the transaction is committed. jbd2_journal_try_remove_checkpoint() attempts to lock the buffer and check its dirty status while holding the buffer lock. If the buffer has already been written back, everything proceeds normally. However, there are two issues. First, the function returns immediately if the buffer is locked by the write-back process. It does not wait for the write-back to complete. Consequently, until the current transaction is committed and the block is reallocated, there is no guarantee that the I/O will complete. This means that ongoing I/O could write stale metadata to the newly allocated block, potentially corrupting data. Second, the function unlocks the buffer as soon as it detects that the buffer is still dirty. If a concurrent write-back occurs immediately after this unlocking and before clear_buffer_dirty() is called in jbd2_journal_forget(), data corruption can theoretically still occur. Although these two issues are unlikely to occur in practice since the undergoing metadata writeback I/O does not take this long to complete, it's better to explicitly ensure that all ongoing I/O operations are completed. Fixes: 597599268e3b ("jbd2: discard dirty data when forgetting an un-journalled buffer") Cc: stable@kernel.org Suggested-by: Jan Kara Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Message-ID: <20250916093337.3161016-2-yi.zhang@huaweicloud.com> Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index c7867139af69..3e510564de6e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1659,6 +1659,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) int drop_reserve = 0; int err = 0; int was_modified = 0; + int wait_for_writeback = 0; if (is_handle_aborted(handle)) return -EROFS; @@ -1782,18 +1783,22 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) } /* - * The buffer is still not written to disk, we should - * attach this buffer to current transaction so that the - * buffer can be checkpointed only after the current - * transaction commits. + * The buffer has not yet been written to disk. We should + * either clear the buffer or ensure that the ongoing I/O + * is completed, and attach this buffer to current + * transaction so that the buffer can be checkpointed only + * after the current transaction commits. */ clear_buffer_dirty(bh); + wait_for_writeback = 1; __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); spin_unlock(&journal->j_list_lock); } drop: __brelse(bh); spin_unlock(&jh->b_state_lock); + if (wait_for_writeback) + wait_on_buffer(bh); jbd2_journal_put_journal_head(jh); if (drop_reserve) { /* no need to reserve log space for this block -bzzz */ From 328a782cb138029182e521c08f50eb1587db955d Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Tue, 16 Sep 2025 17:33:37 +0800 Subject: [PATCH 033/305] ext4: wait for ongoing I/O to complete before freeing blocks When freeing metadata blocks in nojournal mode, ext4_forget() calls bforget() to clear the dirty flag on the buffer_head and remvoe associated mappings. This is acceptable if the metadata has not yet begun to be written back. However, if the write-back has already started but is not yet completed, ext4_forget() will have no effect. Subsequently, ext4_mb_clear_bb() will immediately return the block to the mb allocator. This block can then be reallocated immediately, potentially causing an data corruption issue. Fix this by clearing the buffer's dirty flag and waiting for the ongoing I/O to complete, ensuring that no further writes to stale data will occur. Fixes: 16e08b14a455 ("ext4: cleanup clean_bdev_aliases() calls") Cc: stable@kernel.org Reported-by: Gao Xiang Closes: https://lore.kernel.org/linux-ext4/a9417096-9549-4441-9878-b1955b899b4e@huaweicloud.com/ Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Message-ID: <20250916093337.3161016-3-yi.zhang@huaweicloud.com> Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b3e9b7bd7978..a0e66bc10093 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -280,9 +280,16 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle, bh, is_metadata, inode->i_mode, test_opt(inode->i_sb, DATA_FLAGS)); - /* In the no journal case, we can just do a bforget and return */ + /* + * In the no journal case, we should wait for the ongoing buffer + * to complete and do a forget. + */ if (!ext4_handle_valid(handle)) { - bforget(bh); + if (bh) { + clear_buffer_dirty(bh); + wait_on_buffer(bh); + __bforget(bh); + } return 0; } From 4b471b736ea1ce08113a12bd7dcdaea621b0f65f Mon Sep 17 00:00:00 2001 From: Zeno Endemann Date: Thu, 25 Sep 2025 17:24:33 +0200 Subject: [PATCH 034/305] ext4, doc: fix and improve directory hash tree description Some of the details about how directory hash trees work were confusing or outright wrong, this patch should fix those. A note on dx_tail's dt_reserved member, as far as I can tell the kernel never sets this explicitly, so its content is apparently left-overs from what was there before (for the dx_root I've seen remnants of a ext4_dir_entry_tail struct from when the dir was not yet a hash dir). Signed-off-by: Zeno Endemann Message-ID: <20250925152435.22749-1-zeno.endemann@mailbox.org> Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4/directory.rst | 61 ++++++++++---------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/Documentation/filesystems/ext4/directory.rst b/Documentation/filesystems/ext4/directory.rst index 6eece8e31df8..9b003a4d453f 100644 --- a/Documentation/filesystems/ext4/directory.rst +++ b/Documentation/filesystems/ext4/directory.rst @@ -183,10 +183,10 @@ in the place where the name normally goes. The structure is - det_checksum - Directory leaf block checksum. -The leaf directory block checksum is calculated against the FS UUID, the -directory's inode number, the directory's inode generation number, and -the entire directory entry block up to (but not including) the fake -directory entry. +The leaf directory block checksum is calculated against the FS UUID (or +the checksum seed, if that feature is enabled for the fs), the directory's +inode number, the directory's inode generation number, and the entire +directory entry block up to (but not including) the fake directory entry. Hash Tree Directories ~~~~~~~~~~~~~~~~~~~~~ @@ -196,12 +196,12 @@ new feature was added to ext3 to provide a faster (but peculiar) balanced tree keyed off a hash of the directory entry name. If the EXT4_INDEX_FL (0x1000) flag is set in the inode, this directory uses a hashed btree (htree) to organize and find directory entries. For -backwards read-only compatibility with ext2, this tree is actually -hidden inside the directory file, masquerading as “empty” directory data -blocks! It was stated previously that the end of the linear directory -entry table was signified with an entry pointing to inode 0; this is -(ab)used to fool the old linear-scan algorithm into thinking that the -rest of the directory block is empty so that it moves on. +backwards read-only compatibility with ext2, interior tree nodes are actually +hidden inside the directory file, masquerading as “empty” directory entries +spanning the whole block. It was stated previously that directory entries +with the inode set to 0 are treated as unused entries; this is (ab)used to +fool the old linear-scan algorithm into skipping over those blocks containing +the interior tree node data. The root of the tree always lives in the first data block of the directory. By ext2 custom, the '.' and '..' entries must appear at the @@ -209,24 +209,24 @@ beginning of this first block, so they are put here as two ``struct ext4_dir_entry_2`` s and not stored in the tree. The rest of the root node contains metadata about the tree and finally a hash->block map to find nodes that are lower in the htree. If -``dx_root.info.indirect_levels`` is non-zero then the htree has two -levels; the data block pointed to by the root node's map is an interior -node, which is indexed by a minor hash. Interior nodes in this tree -contains a zeroed out ``struct ext4_dir_entry_2`` followed by a -minor_hash->block map to find leafe nodes. Leaf nodes contain a linear -array of all ``struct ext4_dir_entry_2``; all of these entries -(presumably) hash to the same value. If there is an overflow, the -entries simply overflow into the next leaf node, and the -least-significant bit of the hash (in the interior node map) that gets -us to this next leaf node is set. +``dx_root.info.indirect_levels`` is non-zero then the htree has that many +levels and the blocks pointed to by the root node's map are interior nodes. +These interior nodes have a zeroed out ``struct ext4_dir_entry_2`` followed by +a hash->block map to find nodes of the next level. Leaf nodes look like +classic linear directory blocks, but all of its entries have a hash value +equal or greater than the indicated hash of the parent node. -To traverse the directory as a htree, the code calculates the hash of -the desired file name and uses it to find the corresponding block -number. If the tree is flat, the block is a linear array of directory -entries that can be searched; otherwise, the minor hash of the file name -is computed and used against this second block to find the corresponding -third block number. That third block number will be a linear array of -directory entries. +The actual hash value for an entry name is only 31 bits, the least-significant +bit is set to 0. However, if there is a hash collision between directory +entries, the least-significant bit may get set to 1 on interior nodes in the +case where these two (or more) hash-colliding entries do not fit into one leaf +node and must be split across multiple nodes. + +To look up a name in such a htree, the code calculates the hash of the desired +file name and uses it to find the leaf node with the range of hash values the +calculated hash falls into (in other words, a lookup works basically the same +as it would in a B-Tree keyed by the hash value), and possibly also scanning +the leaf nodes that follow (in tree order) in case of hash collisions. To traverse the directory as a linear array (such as the old code does), the code simply reads every data block in the directory. The blocks used @@ -319,7 +319,8 @@ of a data block: * - 0x24 - __le32 - block - - The block number (within the directory file) that goes with hash=0. + - The block number (within the directory file) that lead to the left-most + leaf node, i.e. the leaf containing entries with the lowest hash values. * - 0x28 - struct dx_entry - entries[0] @@ -442,7 +443,7 @@ The dx_tail structure is 8 bytes long and looks like this: * - 0x0 - u32 - dt_reserved - - Zero. + - Unused (but still part of the checksum curiously). * - 0x4 - __le32 - dt_checksum @@ -450,4 +451,4 @@ The dx_tail structure is 8 bytes long and looks like this: The checksum is calculated against the FS UUID, the htree index header (dx_root or dx_node), all of the htree indices (dx_entry) that are in -use, and the tail block (dx_tail). +use, and the tail block (dx_tail) with the dt_checksum initially set to 0. From 1d3ad183943b38eec2acf72a0ae98e635dc8456b Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Tue, 30 Sep 2025 16:58:10 +0530 Subject: [PATCH 035/305] ext4: detect invalid INLINE_DATA + EXTENTS flag combination syzbot reported a BUG_ON in ext4_es_cache_extent() when opening a verity file on a corrupted ext4 filesystem mounted without a journal. The issue is that the filesystem has an inode with both the INLINE_DATA and EXTENTS flags set: EXT4-fs error (device loop0): ext4_cache_extents:545: inode #15: comm syz.0.17: corrupted extent tree: lblk 0 < prev 66 Investigation revealed that the inode has both flags set: DEBUG: inode 15 - flag=1, i_inline_off=164, has_inline=1, extents_flag=1 This is an invalid combination since an inode should have either: - INLINE_DATA: data stored directly in the inode - EXTENTS: data stored in extent-mapped blocks Having both flags causes ext4_has_inline_data() to return true, skipping extent tree validation in __ext4_iget(). The unvalidated out-of-order extents then trigger a BUG_ON in ext4_es_cache_extent() due to integer underflow when calculating hole sizes. Fix this by detecting this invalid flag combination early in ext4_iget() and rejecting the corrupted inode. Cc: stable@kernel.org Reported-and-tested-by: syzbot+038b7bf43423e132b308@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=038b7bf43423e132b308 Suggested-by: Zhang Yi Signed-off-by: Deepanshu Kartikey Reviewed-by: Zhang Yi Message-ID: <20250930112810.315095-1-kartikey406@gmail.com> Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9e4ac87211e..e99306a8f47c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5319,6 +5319,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, } ei->i_flags = le32_to_cpu(raw_inode->i_flags); ext4_set_inode_flags(inode, true); + /* Detect invalid flag combination - can't have both inline data and extents */ + if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) && + ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { + ext4_error_inode(inode, function, line, 0, + "inode has both inline data and extents flags"); + ret = -EFSCORRUPTED; + goto bad_inode; + } inode->i_blocks = ext4_inode_blocks(raw_inode, ei); ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); if (ext4_has_feature_64bit(sb)) From 971843c511c3c2f6eda96c6b03442913bfee6148 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 7 Oct 2025 15:49:37 +0200 Subject: [PATCH 036/305] ext4: free orphan info with kvfree Orphan info is now getting allocated with kvmalloc_array(). Free it with kvfree() instead of kfree() to avoid complaints from mm. Reported-by: Chris Mason Fixes: 0a6ce20c1564 ("ext4: verify orphan file size is not too big") Cc: stable@vger.kernel.org Signed-off-by: Jan Kara Message-ID: <20251007134936.7291-2-jack@suse.cz> Signed-off-by: Theodore Ts'o --- fs/ext4/orphan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 33c3a89396b1..82d5e7501455 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -513,7 +513,7 @@ void ext4_release_orphan_info(struct super_block *sb) return; for (i = 0; i < oi->of_blocks; i++) brelse(oi->of_binfo[i].ob_bh); - kfree(oi->of_binfo); + kvfree(oi->of_binfo); } static struct ext4_orphan_block_tail *ext4_orphan_block_tail( @@ -637,7 +637,7 @@ int ext4_init_orphan_info(struct super_block *sb) out_free: for (i--; i >= 0; i--) brelse(oi->of_binfo[i].ob_bh); - kfree(oi->of_binfo); + kvfree(oi->of_binfo); out_put: iput(inode); return ret; From 034417c1439a533a315562a57bd340d963eaac6b Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Fri, 10 Oct 2025 08:52:39 +0800 Subject: [PATCH 037/305] KVM: x86/pmu: Don't try to get perf capabilities for hybrid CPUs Explicitly zero kvm_host_pmu instead of attempting to get the perf PMU capabilities when running on a hybrid CPU to avoid running afoul of perf's sanity check. ------------[ cut here ]------------ WARNING: arch/x86/events/core.c:3089 at perf_get_x86_pmu_capability+0xd/0xc0, Call Trace: kvm_x86_vendor_init+0x1b0/0x1a40 [kvm] vmx_init+0xdb/0x260 [kvm_intel] vt_init+0x12/0x9d0 [kvm_intel] do_one_initcall+0x60/0x3f0 do_init_module+0x97/0x2b0 load_module+0x2d08/0x2e30 init_module_from_file+0x96/0xe0 idempotent_init_module+0x117/0x330 __x64_sys_finit_module+0x73/0xe0 Always read the capabilities for non-hybrid CPUs, i.e. don't entirely revert to reading capabilities if and only if KVM wants to use a PMU, as it may be useful to have the host PMU capabilities available, e.g. if only or debug. Reported-by: Chaitanya Kumar Borah Closes: https://lore.kernel.org/all/70b64347-2aca-4511-af78-a767d5fa8226@intel.com/ Fixes: 51f34b1e650f ("KVM: x86/pmu: Snapshot host (i.e. perf's) reported PMU capabilities") Suggested-by: Sean Christopherson Signed-off-by: Dapeng Mi Link: https://lore.kernel.org/r/20251010005239.146953-1-dapeng1.mi@linux.intel.com [sean: rework changelog, call out hybrid CPUs in shortlog] Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 40ac4cb44ed2..487ad19a236e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -108,16 +108,18 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS; - perf_get_x86_pmu_capability(&kvm_host_pmu); - /* * Hybrid PMUs don't play nice with virtualization without careful * configuration by userspace, and KVM's APIs for reporting supported * vPMU features do not account for hybrid PMUs. Disable vPMU support * for hybrid PMUs until KVM gains a way to let userspace opt-in. */ - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { enable_pmu = false; + memset(&kvm_host_pmu, 0, sizeof(kvm_host_pmu)); + } else { + perf_get_x86_pmu_capability(&kvm_host_pmu); + } if (enable_pmu) { /* From d2042d8f96ddefdeee823737f813efe3ab4b4e8d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:54 -0700 Subject: [PATCH 038/305] KVM: Rework KVM_CAP_GUEST_MEMFD_MMAP into KVM_CAP_GUEST_MEMFD_FLAGS Rework the not-yet-released KVM_CAP_GUEST_MEMFD_MMAP into a more generic KVM_CAP_GUEST_MEMFD_FLAGS capability so that adding new flags doesn't require a new capability, and so that developers aren't tempted to bundle multiple flags into a single capability. Note, kvm_vm_ioctl_check_extension_generic() can only return a 32-bit value, but that limitation can be easily circumvented by adding e.g. KVM_CAP_GUEST_MEMFD_FLAGS2 in the unlikely event guest_memfd supports more than 32 flags. Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-2-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 10 +++++++--- include/uapi/linux/kvm.h | 2 +- tools/testing/selftests/kvm/guest_memfd_test.c | 13 ++++++------- virt/kvm/kvm_main.c | 7 +++++-- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6ae24c5ca559..7ba92f2ced38 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6432,9 +6432,13 @@ most one mapping per page, i.e. binding multiple memory regions to a single guest_memfd range is not allowed (any number of memory regions can be bound to a single guest_memfd file, but the bound ranges must not overlap). -When the capability KVM_CAP_GUEST_MEMFD_MMAP is supported, the 'flags' field -supports GUEST_MEMFD_FLAG_MMAP. Setting this flag on guest_memfd creation -enables mmap() and faulting of guest_memfd memory to host userspace. +The capability KVM_CAP_GUEST_MEMFD_FLAGS enumerates the `flags` that can be +specified via KVM_CREATE_GUEST_MEMFD. Currently defined flags: + + ============================ ================================================ + GUEST_MEMFD_FLAG_MMAP Enable using mmap() on the guest_memfd file + descriptor. + ============================ ================================================ When the KVM MMU performs a PFN lookup to service a guest fault and the backing guest_memfd has the GUEST_MEMFD_FLAG_MMAP set, then the fault will always be diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6efa98a57ec1..b1d52d0c56ec 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -962,7 +962,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 -#define KVM_CAP_GUEST_MEMFD_MMAP 244 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index b3ca6737f304..3e58bd496104 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -262,19 +262,17 @@ static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags) static void test_guest_memfd(unsigned long vm_type) { - uint64_t flags = 0; struct kvm_vm *vm; size_t total_size; size_t page_size; + uint64_t flags; int fd; page_size = getpagesize(); total_size = page_size * 4; vm = vm_create_barebones_type(vm_type); - - if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP)) - flags |= GUEST_MEMFD_FLAG_MMAP; + flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags, page_size); @@ -328,13 +326,14 @@ static void test_guest_memfd_guest(void) size_t size; int fd, i; - if (!kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP)) + if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS)) return; vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); - TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP), - "Default VM type should always support guest_memfd mmap()"); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, + "Default VM type should support MMAP, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); size = vm->page_size; fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 226faeaa8e56..e3a268757621 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4928,8 +4928,11 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_KVM_GUEST_MEMFD case KVM_CAP_GUEST_MEMFD: return 1; - case KVM_CAP_GUEST_MEMFD_MMAP: - return !kvm || kvm_arch_supports_gmem_mmap(kvm); + case KVM_CAP_GUEST_MEMFD_FLAGS: + if (!kvm || kvm_arch_supports_gmem_mmap(kvm)) + return GUEST_MEMFD_FLAG_MMAP; + + return 0; #endif default: break; From fe2bf6234e947bf5544db6d386af1df2a8db80f3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:55 -0700 Subject: [PATCH 039/305] KVM: guest_memfd: Add INIT_SHARED flag, reject user page faults if not set Add a guest_memfd flag to allow userspace to state that the underlying memory should be configured to be initialized as shared, and reject user page faults if the guest_memfd instance's memory isn't shared. Because KVM doesn't yet support in-place private<=>shared conversions, all guest_memfd memory effectively follows the initial state. Alternatively, KVM could deduce the initial state based on MMAP, which for all intents and purposes is what KVM currently does. However, implicitly deriving the default state based on MMAP will result in a messy ABI when support for in-place conversions is added. For x86 CoCo VMs, which don't yet support MMAP, memory is currently private by default (otherwise the memory would be unusable). If MMAP implies memory is shared by default, then the default state for CoCo VMs will vary based on MMAP, and from userspace's perspective, will change when in-place conversion support is added. I.e. to maintain guest<=>host ABI, userspace would need to immediately convert all memory from shared=>private, which is both ugly and inefficient. The inefficiency could be avoided by adding a flag to state that memory is _private_ by default, irrespective of MMAP, but that would lead to an equally messy and hard to document ABI. Bite the bullet and immediately add a flag to control the default state so that the effective behavior is explicit and straightforward. Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files") Cc: David Hildenbrand Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-3-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 5 +++++ include/uapi/linux/kvm.h | 3 ++- tools/testing/selftests/kvm/guest_memfd_test.c | 15 ++++++++++++--- virt/kvm/guest_memfd.c | 6 +++++- virt/kvm/kvm_main.c | 3 ++- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 7ba92f2ced38..754b662a453c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6438,6 +6438,11 @@ specified via KVM_CREATE_GUEST_MEMFD. Currently defined flags: ============================ ================================================ GUEST_MEMFD_FLAG_MMAP Enable using mmap() on the guest_memfd file descriptor. + GUEST_MEMFD_FLAG_INIT_SHARED Make all memory in the file shared during + KVM_CREATE_GUEST_MEMFD (memory files created + without INIT_SHARED will be marked private). + Shared memory can be faulted into host userspace + page tables. Private memory cannot. ============================ ================================================ When the KVM MMU performs a PFN lookup to service a guest fault and the backing diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b1d52d0c56ec..52f6000ab020 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1599,7 +1599,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) -#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 3e58bd496104..0de56ce3c4e2 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -239,8 +239,9 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) close(fd1); } -static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags) +static void test_guest_memfd_flags(struct kvm_vm *vm) { + uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); size_t page_size = getpagesize(); uint64_t flag; int fd; @@ -274,6 +275,10 @@ static void test_guest_memfd(unsigned long vm_type) vm = vm_create_barebones_type(vm_type); flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + /* This test doesn't yet support testing mmap() on private memory. */ + if (!(flags & GUEST_MEMFD_FLAG_INIT_SHARED)) + flags &= ~GUEST_MEMFD_FLAG_MMAP; + test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags, page_size); @@ -292,7 +297,7 @@ static void test_guest_memfd(unsigned long vm_type) test_fallocate(fd, page_size, total_size); test_invalid_punch_hole(fd, page_size, total_size); - test_guest_memfd_flags(vm, flags); + test_guest_memfd_flags(vm); close(fd); kvm_vm_free(vm); @@ -334,9 +339,13 @@ static void test_guest_memfd_guest(void) TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, "Default VM type should support MMAP, supported flags = 0x%x", vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED, + "Default VM type should support INIT_SHARED, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); size = vm->page_size; - fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP); + fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 94bafd6c558c..cf3afba23a6b 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -328,6 +328,9 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf) if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) return VM_FAULT_SIGBUS; + if (!((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED)) + return VM_FAULT_SIGBUS; + folio = kvm_gmem_get_folio(inode, vmf->pgoff); if (IS_ERR(folio)) { int err = PTR_ERR(folio); @@ -525,7 +528,8 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) u64 valid_flags = 0; if (kvm_arch_supports_gmem_mmap(kvm)) - valid_flags |= GUEST_MEMFD_FLAG_MMAP; + valid_flags |= GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED; if (flags & ~valid_flags) return -EINVAL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e3a268757621..5f644ca54af3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4930,7 +4930,8 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return 1; case KVM_CAP_GUEST_MEMFD_FLAGS: if (!kvm || kvm_arch_supports_gmem_mmap(kvm)) - return GUEST_MEMFD_FLAG_MMAP; + return GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED; return 0; #endif From 5d3341d684be80892d8f6f9812f90f9274b81177 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:56 -0700 Subject: [PATCH 040/305] KVM: guest_memfd: Invalidate SHARED GPAs if gmem supports INIT_SHARED When invalidating gmem ranges, e.g. in response to PUNCH_HOLE, process all possible range types (PRIVATE vs. SHARED) for the gmem instance. Since since guest_memfd doesn't yet support in-place conversions, simply pivot on INIT_SHARED as a gmem instance can currently only have private or shared memory, not both. Failure to mark shared GPAs for invalidation is benign in the current code base, as only x86's TDX consumes KVM_FILTER_{PRIVATE,SHARED}, and TDX doesn't yet support INIT_SHARED with guest_memfd. However, invalidating only private GPAs is conceptually wrong and a lurking bug, e.g. could result in missed invalidations if ARM starts filtering invalidations based on attributes. Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files") Reviewed-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-4-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/guest_memfd.c | 64 +++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index cf3afba23a6b..e10d2c71e78c 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -102,8 +102,17 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) return filemap_grab_folio(inode->i_mapping, index); } -static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, - pgoff_t end) +static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode) +{ + if ((u64)inode->i_private & GUEST_MEMFD_FLAG_INIT_SHARED) + return KVM_FILTER_SHARED; + + return KVM_FILTER_PRIVATE; +} + +static void __kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, + pgoff_t end, + enum kvm_gfn_range_filter attr_filter) { bool flush = false, found_memslot = false; struct kvm_memory_slot *slot; @@ -118,8 +127,7 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, .end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff, .slot = slot, .may_block = true, - /* guest memfd is relevant to only private mappings. */ - .attr_filter = KVM_FILTER_PRIVATE, + .attr_filter = attr_filter, }; if (!found_memslot) { @@ -139,8 +147,21 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, KVM_MMU_UNLOCK(kvm); } -static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, - pgoff_t end) +static void kvm_gmem_invalidate_begin(struct inode *inode, pgoff_t start, + pgoff_t end) +{ + struct list_head *gmem_list = &inode->i_mapping->i_private_list; + enum kvm_gfn_range_filter attr_filter; + struct kvm_gmem *gmem; + + attr_filter = kvm_gmem_get_invalidate_filter(inode); + + list_for_each_entry(gmem, gmem_list, entry) + __kvm_gmem_invalidate_begin(gmem, start, end, attr_filter); +} + +static void __kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, + pgoff_t end) { struct kvm *kvm = gmem->kvm; @@ -151,12 +172,20 @@ static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start, } } -static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) +static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start, + pgoff_t end) { struct list_head *gmem_list = &inode->i_mapping->i_private_list; + struct kvm_gmem *gmem; + + list_for_each_entry(gmem, gmem_list, entry) + __kvm_gmem_invalidate_end(gmem, start, end); +} + +static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) +{ pgoff_t start = offset >> PAGE_SHIFT; pgoff_t end = (offset + len) >> PAGE_SHIFT; - struct kvm_gmem *gmem; /* * Bindings must be stable across invalidation to ensure the start+end @@ -164,13 +193,11 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len) */ filemap_invalidate_lock(inode->i_mapping); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + kvm_gmem_invalidate_begin(inode, start, end); truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_invalidate_end(inode, start, end); filemap_invalidate_unlock(inode->i_mapping); @@ -280,8 +307,9 @@ static int kvm_gmem_release(struct inode *inode, struct file *file) * Zap all SPTEs pointed at by this file. Do not free the backing * memory, as its lifetime is associated with the inode, not the file. */ - kvm_gmem_invalidate_begin(gmem, 0, -1ul); - kvm_gmem_invalidate_end(gmem, 0, -1ul); + __kvm_gmem_invalidate_begin(gmem, 0, -1ul, + kvm_gmem_get_invalidate_filter(inode)); + __kvm_gmem_invalidate_end(gmem, 0, -1ul); list_del(&gmem->entry); @@ -403,8 +431,6 @@ static int kvm_gmem_migrate_folio(struct address_space *mapping, static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *folio) { - struct list_head *gmem_list = &mapping->i_private_list; - struct kvm_gmem *gmem; pgoff_t start, end; filemap_invalidate_lock_shared(mapping); @@ -412,8 +438,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol start = folio->index; end = start + folio_nr_pages(folio); - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_begin(gmem, start, end); + kvm_gmem_invalidate_begin(mapping->host, start, end); /* * Do not truncate the range, what action is taken in response to the @@ -424,8 +449,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol * error to userspace. */ - list_for_each_entry(gmem, gmem_list, entry) - kvm_gmem_invalidate_end(gmem, start, end); + kvm_gmem_invalidate_end(mapping->host, start, end); filemap_invalidate_unlock_shared(mapping); From 9aef71c892a55e004419923ba7129abe3e58d9f1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:57 -0700 Subject: [PATCH 041/305] KVM: Explicitly mark KVM_GUEST_MEMFD as depending on KVM_GENERIC_MMU_NOTIFIER Add KVM_GENERIC_MMU_NOTIFIER as a dependency for selecting KVM_GUEST_MEMFD, as guest_memfd relies on kvm_mmu_invalidate_{begin,end}(), which are defined if and only if the generic mmu_notifier implementation is enabled. The missing dependency is currently benign as s390 is the only KVM arch that doesn't utilize the generic mmu_notifier infrastructure, and s390 doesn't currently support guest_memfd. Fixes: a7800aa80ea4 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory") Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-5-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 1b7d5be0b6c4..a01cc5743137 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,6 +113,7 @@ config KVM_GENERIC_MEMORY_ATTRIBUTES bool config KVM_GUEST_MEMFD + depends on KVM_GENERIC_MMU_NOTIFIER select XARRAY_MULTI bool From 44c6cb9fe9888b371e31165b2854bd0f4e2787d4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:58 -0700 Subject: [PATCH 042/305] KVM: guest_memfd: Allow mmap() on guest_memfd for x86 VMs with private memory Allow mmap() on guest_memfd instances for x86 VMs with private memory as the need to track private vs. shared state in the guest_memfd instance is only pertinent to INIT_SHARED. Doing mmap() on private memory isn't terrible useful (yet!), but it's now possible, and will be desirable when guest_memfd gains support for other VMA-based syscalls, e.g. mbind() to set NUMA policy. Lift the restriction now, before MMAP support is officially released, so that KVM doesn't need to add another capability to enumerate support for mmap() on private memory. Fixes: 3d3a04fad25a ("KVM: Allow and advertise support for host mmap() on guest_memfd files") Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-6-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 7 ++++--- include/linux/kvm_host.h | 12 +++++++++++- virt/kvm/guest_memfd.c | 9 ++------- virt/kvm/kvm_main.c | 6 +----- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4b8138bd4857..fe3dc3eb4331 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13942,10 +13942,11 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_GUEST_MEMFD /* - * KVM doesn't yet support mmap() on guest_memfd for VMs with private memory - * (the private vs. shared tracking needs to be moved into guest_memfd). + * KVM doesn't yet support initializing guest_memfd memory as shared for VMs + * with private memory (the private vs. shared tracking needs to be moved into + * guest_memfd). */ -bool kvm_arch_supports_gmem_mmap(struct kvm *kvm) +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { return !kvm_arch_has_private_mem(kvm); } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 19b8c4bebb9c..680ca838f018 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -729,7 +729,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) #endif #ifdef CONFIG_KVM_GUEST_MEMFD -bool kvm_arch_supports_gmem_mmap(struct kvm *kvm); +bool kvm_arch_supports_gmem_init_shared(struct kvm *kvm); + +static inline u64 kvm_gmem_get_supported_flags(struct kvm *kvm) +{ + u64 flags = GUEST_MEMFD_FLAG_MMAP; + + if (!kvm || kvm_arch_supports_gmem_init_shared(kvm)) + flags |= GUEST_MEMFD_FLAG_INIT_SHARED; + + return flags; +} #endif #ifndef kvm_arch_has_readonly_mem diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index e10d2c71e78c..fbca8c0972da 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -485,7 +485,7 @@ static const struct inode_operations kvm_gmem_iops = { .setattr = kvm_gmem_setattr, }; -bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm) +bool __weak kvm_arch_supports_gmem_init_shared(struct kvm *kvm) { return true; } @@ -549,13 +549,8 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) { loff_t size = args->size; u64 flags = args->flags; - u64 valid_flags = 0; - if (kvm_arch_supports_gmem_mmap(kvm)) - valid_flags |= GUEST_MEMFD_FLAG_MMAP | - GUEST_MEMFD_FLAG_INIT_SHARED; - - if (flags & ~valid_flags) + if (flags & ~kvm_gmem_get_supported_flags(kvm)) return -EINVAL; if (size <= 0 || !PAGE_ALIGNED(size)) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5f644ca54af3..b7a0ae2a7b20 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4929,11 +4929,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_GUEST_MEMFD: return 1; case KVM_CAP_GUEST_MEMFD_FLAGS: - if (!kvm || kvm_arch_supports_gmem_mmap(kvm)) - return GUEST_MEMFD_FLAG_MMAP | - GUEST_MEMFD_FLAG_INIT_SHARED; - - return 0; + return kvm_gmem_get_supported_flags(kvm); #endif default: break; From 3a6c08538c742624c60cb6a53dd61eb025e0d1e1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:25:59 -0700 Subject: [PATCH 043/305] KVM: selftests: Stash the host page size in a global in the guest_memfd test Use a global variable to track the host page size in the guest_memfd test so that the information doesn't need to be constantly passed around. The state is purely a reflection of the underlying system, i.e. can't be set by the test and is constant for a given invocation of the test, and thus explicitly passing the host page size to individual testcases adds no value, e.g. doesn't allow testing different combinations. Making page_size a global will simplify an upcoming change to create a new guest_memfd instance per testcase. No functional change intended. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-7-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/guest_memfd_test.c | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 0de56ce3c4e2..a7c9601bd31e 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -24,6 +24,8 @@ #include "test_util.h" #include "ucall_common.h" +static size_t page_size; + static void test_file_read_write(int fd) { char buf[64]; @@ -38,7 +40,7 @@ static void test_file_read_write(int fd) "pwrite on a guest_mem fd should fail"); } -static void test_mmap_supported(int fd, size_t page_size, size_t total_size) +static void test_mmap_supported(int fd, size_t total_size) { const char val = 0xaa; char *mem; @@ -78,7 +80,7 @@ void fault_sigbus_handler(int signum) siglongjmp(jmpbuf, 1); } -static void test_fault_overflow(int fd, size_t page_size, size_t total_size) +static void test_fault_overflow(int fd, size_t total_size) { struct sigaction sa_old, sa_new = { .sa_handler = fault_sigbus_handler, @@ -106,7 +108,7 @@ static void test_fault_overflow(int fd, size_t page_size, size_t total_size) TEST_ASSERT(!ret, "munmap() should succeed."); } -static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) +static void test_mmap_not_supported(int fd, size_t total_size) { char *mem; @@ -117,7 +119,7 @@ static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) TEST_ASSERT_EQ(mem, MAP_FAILED); } -static void test_file_size(int fd, size_t page_size, size_t total_size) +static void test_file_size(int fd, size_t total_size) { struct stat sb; int ret; @@ -128,7 +130,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size) TEST_ASSERT_EQ(sb.st_blksize, page_size); } -static void test_fallocate(int fd, size_t page_size, size_t total_size) +static void test_fallocate(int fd, size_t total_size) { int ret; @@ -165,7 +167,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size) TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); } -static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) +static void test_invalid_punch_hole(int fd, size_t total_size) { struct { off_t offset; @@ -196,8 +198,7 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) } static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, - uint64_t guest_memfd_flags, - size_t page_size) + uint64_t guest_memfd_flags) { size_t size; int fd; @@ -214,7 +215,6 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) { int fd1, fd2, ret; struct stat st1, st2; - size_t page_size = getpagesize(); fd1 = __vm_create_guest_memfd(vm, page_size, 0); TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); @@ -242,7 +242,6 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) static void test_guest_memfd_flags(struct kvm_vm *vm) { uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); - size_t page_size = getpagesize(); uint64_t flag; int fd; @@ -265,11 +264,9 @@ static void test_guest_memfd(unsigned long vm_type) { struct kvm_vm *vm; size_t total_size; - size_t page_size; uint64_t flags; int fd; - page_size = getpagesize(); total_size = page_size * 4; vm = vm_create_barebones_type(vm_type); @@ -280,22 +277,22 @@ static void test_guest_memfd(unsigned long vm_type) flags &= ~GUEST_MEMFD_FLAG_MMAP; test_create_guest_memfd_multiple(vm); - test_create_guest_memfd_invalid_sizes(vm, flags, page_size); + test_create_guest_memfd_invalid_sizes(vm, flags); fd = vm_create_guest_memfd(vm, total_size, flags); test_file_read_write(fd); if (flags & GUEST_MEMFD_FLAG_MMAP) { - test_mmap_supported(fd, page_size, total_size); - test_fault_overflow(fd, page_size, total_size); + test_mmap_supported(fd, total_size); + test_fault_overflow(fd, total_size); } else { - test_mmap_not_supported(fd, page_size, total_size); + test_mmap_not_supported(fd, total_size); } - test_file_size(fd, page_size, total_size); - test_fallocate(fd, page_size, total_size); - test_invalid_punch_hole(fd, page_size, total_size); + test_file_size(fd, total_size); + test_fallocate(fd, total_size); + test_invalid_punch_hole(fd, total_size); test_guest_memfd_flags(vm); @@ -374,6 +371,8 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + page_size = getpagesize(); + /* * Not all architectures support KVM_CAP_VM_TYPES. However, those that * support guest_memfd have that support for the default VM type. From 21d602ed616aebae4de568be55db65a1f5a3be10 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:00 -0700 Subject: [PATCH 044/305] KVM: selftests: Create a new guest_memfd for each testcase Refactor the guest_memfd selftest to improve test isolation by creating a a new guest_memfd for each testcase. Currently, the test reuses a single guest_memfd instance for all testcases, and thus creates dependencies between tests, e.g. not truncating folios from the guest_memfd instance at the end of a test could lead to unexpected results (see the PUNCH_HOLE purging that needs to done by in-flight the NUMA testcases[1]). Invoke each test via a macro wrapper to create and close a guest_memfd to cut down on the boilerplate copy+paste needed to create a test. Link: https://lore.kernel.org/all/20250827175247.83322-10-shivankg@amd.com Reported-by: Ackerley Tng Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-8-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/guest_memfd_test.c | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index a7c9601bd31e..afdc4d3a956d 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -26,7 +26,7 @@ static size_t page_size; -static void test_file_read_write(int fd) +static void test_file_read_write(int fd, size_t total_size) { char buf[64]; @@ -260,14 +260,18 @@ static void test_guest_memfd_flags(struct kvm_vm *vm) } } +#define gmem_test(__test, __vm, __flags) \ +do { \ + int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \ + \ + test_##__test(fd, page_size * 4); \ + close(fd); \ +} while (0) + static void test_guest_memfd(unsigned long vm_type) { struct kvm_vm *vm; - size_t total_size; uint64_t flags; - int fd; - - total_size = page_size * 4; vm = vm_create_barebones_type(vm_type); flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); @@ -279,24 +283,21 @@ static void test_guest_memfd(unsigned long vm_type) test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags); - fd = vm_create_guest_memfd(vm, total_size, flags); - - test_file_read_write(fd); + gmem_test(file_read_write, vm, flags); if (flags & GUEST_MEMFD_FLAG_MMAP) { - test_mmap_supported(fd, total_size); - test_fault_overflow(fd, total_size); + gmem_test(mmap_supported, vm, flags); + gmem_test(fault_overflow, vm, flags); } else { - test_mmap_not_supported(fd, total_size); + gmem_test(mmap_not_supported, vm, flags); } - test_file_size(fd, total_size); - test_fallocate(fd, total_size); - test_invalid_punch_hole(fd, total_size); + gmem_test(file_size, vm, flags); + gmem_test(fallocate, vm, flags); + gmem_test(invalid_punch_hole, vm, flags); test_guest_memfd_flags(vm); - close(fd); kvm_vm_free(vm); } From df0d9923f7055169cb01b050236e5a9a2b36db02 Mon Sep 17 00:00:00 2001 From: Ackerley Tng Date: Fri, 3 Oct 2025 16:26:01 -0700 Subject: [PATCH 045/305] KVM: selftests: Add test coverage for guest_memfd without GUEST_MEMFD_FLAG_MMAP If a VM type supports KVM_CAP_GUEST_MEMFD_MMAP, the guest_memfd test will run all test cases with GUEST_MEMFD_FLAG_MMAP set. This leaves the code path for creating a non-mmap()-able guest_memfd on a VM that supports mappable guest memfds untested. Refactor the test to run the main test suite with a given set of flags. Then, for VM types that support the mappable capability, invoke the test suite twice: once with no flags, and once with GUEST_MEMFD_FLAG_MMAP set. This ensures both creation paths are properly exercised on capable VMs. Run test_guest_memfd_flags() only once per VM type since it depends only on the set of valid/supported flags, i.e. iterating over an arbitrary set of flags is both unnecessary and wrong. Signed-off-by: Ackerley Tng [sean: use double-underscores for the inner helper] Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20251003232606.4070510-9-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/guest_memfd_test.c | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index afdc4d3a956d..9f98a067ab51 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -268,18 +268,8 @@ do { \ close(fd); \ } while (0) -static void test_guest_memfd(unsigned long vm_type) +static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) { - struct kvm_vm *vm; - uint64_t flags; - - vm = vm_create_barebones_type(vm_type); - flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); - - /* This test doesn't yet support testing mmap() on private memory. */ - if (!(flags & GUEST_MEMFD_FLAG_INIT_SHARED)) - flags &= ~GUEST_MEMFD_FLAG_MMAP; - test_create_guest_memfd_multiple(vm); test_create_guest_memfd_invalid_sizes(vm, flags); @@ -295,9 +285,24 @@ static void test_guest_memfd(unsigned long vm_type) gmem_test(file_size, vm, flags); gmem_test(fallocate, vm, flags); gmem_test(invalid_punch_hole, vm, flags); +} + +static void test_guest_memfd(unsigned long vm_type) +{ + struct kvm_vm *vm = vm_create_barebones_type(vm_type); + uint64_t flags; test_guest_memfd_flags(vm); + __test_guest_memfd(vm, 0); + + flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + + /* MMAP should always be supported if INIT_SHARED is supported. */ + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); + kvm_vm_free(vm); } From 61cee97f40180312dcca9580a5be0b0aa2217f6e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:02 -0700 Subject: [PATCH 046/305] KVM: selftests: Add wrappers for mmap() and munmap() to assert success Add and use wrappers for mmap() and munmap() that assert success to reduce a significant amount of boilerplate code, to ensure all tests assert on failure, and to provide consistent error messages on failure. No functional change intended. Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Reviewed-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-10-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/guest_memfd_test.c | 21 +++------ .../testing/selftests/kvm/include/kvm_util.h | 25 +++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 44 +++++++------------ tools/testing/selftests/kvm/mmu_stress_test.c | 5 +-- .../selftests/kvm/s390/ucontrol_test.c | 16 +++---- .../selftests/kvm/set_memory_region_test.c | 17 ++++--- 6 files changed, 64 insertions(+), 64 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 9f98a067ab51..319fda4f5d53 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -50,8 +50,7 @@ static void test_mmap_supported(int fd, size_t total_size) mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); - mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); memset(mem, val, total_size); for (i = 0; i < total_size; i++) @@ -70,8 +69,7 @@ static void test_mmap_supported(int fd, size_t total_size) for (i = 0; i < total_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); - ret = munmap(mem, total_size); - TEST_ASSERT(!ret, "munmap() should succeed."); + kvm_munmap(mem, total_size); } static sigjmp_buf jmpbuf; @@ -89,10 +87,8 @@ static void test_fault_overflow(int fd, size_t total_size) const char val = 0xaa; char *mem; size_t i; - int ret; - mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); sigaction(SIGBUS, &sa_new, &sa_old); if (sigsetjmp(jmpbuf, 1) == 0) { @@ -104,8 +100,7 @@ static void test_fault_overflow(int fd, size_t total_size) for (i = 0; i < total_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); - ret = munmap(mem, map_size); - TEST_ASSERT(!ret, "munmap() should succeed."); + kvm_munmap(mem, map_size); } static void test_mmap_not_supported(int fd, size_t total_size) @@ -351,10 +346,9 @@ static void test_guest_memfd_guest(void) GUEST_MEMFD_FLAG_INIT_SHARED); vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); - mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); memset(mem, 0xaa, size); - munmap(mem, size); + kvm_munmap(mem, size); virt_pg_map(vm, gpa, gpa); vcpu_args_set(vcpu, 2, gpa, size); @@ -362,8 +356,7 @@ static void test_guest_memfd_guest(void) TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); - mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); for (i = 0; i < size; i++) TEST_ASSERT_EQ(mem[i], 0xff); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 26cc30290e76..ee60dbf5208a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -286,6 +286,31 @@ static inline bool kvm_has_cap(long cap) #define __KVM_SYSCALL_ERROR(_name, _ret) \ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) +static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd, + off_t offset) +{ + void *mem; + + mem = mmap(NULL, size, prot, flags, fd, offset); + TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", + (int)(unsigned long)MAP_FAILED)); + + return mem; +} + +static inline void *kvm_mmap(size_t size, int prot, int flags, int fd) +{ + return __kvm_mmap(size, prot, flags, fd, 0); +} + +static inline void kvm_munmap(void *mem, size_t size) +{ + int ret; + + ret = munmap(mem, size); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); +} + /* * Use the "inner", double-underscore macro when reporting errors from within * other macros so that the name of ioctl() and not its literal numeric value diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6743fbd9bd67..83a721be7ec5 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -741,13 +741,11 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) int ret; if (vcpu->dirty_gfns) { - ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size); vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->run, vcpu_mmap_sz()); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->run, vcpu_mmap_sz()); ret = close(vcpu->fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); @@ -783,20 +781,16 @@ void kvm_vm_release(struct kvm_vm *vmp) static void __vm_mem_region_delete(struct kvm_vm *vm, struct userspace_mem_region *region) { - int ret; - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); rb_erase(®ion->hva_node, &vm->regions.hva_tree); hash_del(®ion->slot_node); sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); - ret = munmap(region->mmap_start, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_start, region->mmap_size); if (region->fd >= 0) { /* There's an extra map when using shared memory. */ - ret = munmap(region->mmap_alias, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_alias, region->mmap_size); close(region->fd); } if (region->region.guest_memfd >= 0) @@ -1053,12 +1047,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->fd = kvm_memfd_alloc(region->mmap_size, src_type == VM_MEM_SRC_SHARED_HUGETLB); - region->mmap_start = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_start != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); TEST_ASSERT(!is_backing_src_hugetlb(src_type) || region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), @@ -1129,12 +1120,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, /* If shared memory, create an alias. */ if (region->fd >= 0) { - region->mmap_alias = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_alias != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_alias = kvm_mmap(region->mmap_size, + PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); /* Align host alias address */ region->host_alias = align_ptr_up(region->mmap_alias, alignment); @@ -1344,10 +1333,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); - vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), - PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); - TEST_ASSERT(vcpu->run != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd); if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) vcpu->stats.fd = vcpu_get_stats_fd(vcpu); @@ -1794,9 +1781,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); - addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, - page_size * KVM_DIRTY_LOG_PAGE_OFFSET); - TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); vcpu->dirty_gfns = addr; vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 6a437d2be9fa..37b7e6524533 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -339,8 +339,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); fd = kvm_memfd_alloc(slot_size, hugepages); - mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); @@ -413,7 +412,7 @@ int main(int argc, char *argv[]) for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); - munmap(mem, slot_size / 2); + kvm_munmap(mem, slot_size / 2); /* Sanity check that the vCPUs actually ran. */ for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index d265b34c54be..50bc1c38225a 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm) self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); - self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, - PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); - ASSERT_NE(self->run, MAP_FAILED); + self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd); /** * For virtual cpus that have been created with S390 user controlled * virtual machines, the resulting vcpu fd can be memory mapped at page * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of * the virtual cpu's hardware control block. */ - self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, - PROT_READ | PROT_WRITE, MAP_SHARED, - self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); - ASSERT_NE(self->sie_block, MAP_FAILED); + self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd, + KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); TH_LOG("VM created %p %p", self->run, self->sie_block); @@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm) FIXTURE_TEARDOWN(uc_kvm) { - munmap(self->sie_block, PAGE_SIZE); - munmap(self->run, self->kvm_run_size); + kvm_munmap(self->sie_block, PAGE_SIZE); + kvm_munmap(self->run, self->kvm_run_size); close(self->vcpu_fd); close(self->vm_fd); close(self->kvm_fd); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index ce3ac0fd6dfb..7fe427ff9b38 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -433,10 +433,10 @@ static void test_add_max_memory_regions(void) pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); - mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + + mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); for (slot = 0; slot < max_mem_slots; slot++) @@ -446,9 +446,8 @@ static void test_add_max_memory_regions(void) mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); /* Check it cannot be added memory slots beyond the limit */ - mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); + mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, (uint64_t)max_mem_slots * MEM_REGION_SIZE, @@ -456,8 +455,8 @@ static void test_add_max_memory_regions(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); - munmap(mem_extra, MEM_REGION_SIZE); + kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); + kvm_munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } From 505c953009ec8260870d41ef8109bb4c7e208e6f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:03 -0700 Subject: [PATCH 047/305] KVM: selftests: Isolate the guest_memfd Copy-on-Write negative testcase Move the guest_memfd Copy-on-Write (CoW) testcase to its own function to better separate positive testcases from negative testcases. No functional change intended. Suggested-by: Ackerley Tng Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-11-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 319fda4f5d53..640636c76eb9 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -40,6 +40,14 @@ static void test_file_read_write(int fd, size_t total_size) "pwrite on a guest_mem fd should fail"); } +static void test_mmap_cow(int fd, size_t size) +{ + void *mem; + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); +} + static void test_mmap_supported(int fd, size_t total_size) { const char val = 0xaa; @@ -47,9 +55,6 @@ static void test_mmap_supported(int fd, size_t total_size) size_t i; int ret; - mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); - TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); - mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); memset(mem, val, total_size); @@ -272,6 +277,7 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) if (flags & GUEST_MEMFD_FLAG_MMAP) { gmem_test(mmap_supported, vm, flags); + gmem_test(mmap_cow, vm, flags); gmem_test(fault_overflow, vm, flags); } else { gmem_test(mmap_not_supported, vm, flags); From f91187c0ecc6358ccecf533c5fcc6b7dbb4735cb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:04 -0700 Subject: [PATCH 048/305] KVM: selftests: Add wrapper macro to handle and assert on expected SIGBUS Extract the guest_memfd test's SIGBUS handling functionality into a common TEST_EXPECT_SIGBUS() macro in anticipation of adding more SIGBUS testcases. Eating a SIGBUS isn't terrible difficult, but it requires a non-trivial amount of boilerplate code, and using a macro allows selftests to print out the exact action that failed to generate a SIGBUS without the developer needing to remember to add a useful error message. Explicitly mark the SIGBUS handler as "used", as gcc-14 at least likes to discard the function before linking. Opportunistically use TEST_FAIL(...) instead of TEST_ASSERT(false, ...), and fix the write path of the guest_memfd test to use the local "val" instead of hardcoding the literal value a second time. Suggested-by: Ackerley Tng Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: Lisa Wang Tested-by: Lisa Wang Link: https://lore.kernel.org/r/20251003232606.4070510-12-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/guest_memfd_test.c | 18 +----------------- .../testing/selftests/kvm/include/test_util.h | 19 +++++++++++++++++++ tools/testing/selftests/kvm/lib/test_util.c | 7 +++++++ 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 640636c76eb9..73c2e54e7297 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -14,8 +14,6 @@ #include #include #include -#include -#include #include #include #include @@ -77,17 +75,8 @@ static void test_mmap_supported(int fd, size_t total_size) kvm_munmap(mem, total_size); } -static sigjmp_buf jmpbuf; -void fault_sigbus_handler(int signum) -{ - siglongjmp(jmpbuf, 1); -} - static void test_fault_overflow(int fd, size_t total_size) { - struct sigaction sa_old, sa_new = { - .sa_handler = fault_sigbus_handler, - }; size_t map_size = total_size * 4; const char val = 0xaa; char *mem; @@ -95,12 +84,7 @@ static void test_fault_overflow(int fd, size_t total_size) mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); - sigaction(SIGBUS, &sa_new, &sa_old); - if (sigsetjmp(jmpbuf, 1) == 0) { - memset(mem, 0xaa, map_size); - TEST_ASSERT(false, "memset() should have triggered SIGBUS."); - } - sigaction(SIGBUS, &sa_old, NULL); + TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); for (i = 0; i < total_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index c6ef895fbd9a..b4872ba8ed12 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -8,6 +8,8 @@ #ifndef SELFTEST_KVM_TEST_UTIL_H #define SELFTEST_KVM_TEST_UTIL_H +#include +#include #include #include #include @@ -78,6 +80,23 @@ do { \ __builtin_unreachable(); \ } while (0) +extern sigjmp_buf expect_sigbus_jmpbuf; +void expect_sigbus_handler(int signum); + +#define TEST_EXPECT_SIGBUS(action) \ +do { \ + struct sigaction sa_old, sa_new = { \ + .sa_handler = expect_sigbus_handler, \ + }; \ + \ + sigaction(SIGBUS, &sa_new, &sa_old); \ + if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \ + action; \ + TEST_FAIL("'%s' should have triggered SIGBUS", #action); \ + } \ + sigaction(SIGBUS, &sa_old, NULL); \ +} while (0) + size_t parse_size(const char *size); int64_t timespec_to_ns(struct timespec ts); diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 03eb99af9b8d..8a1848586a85 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,6 +18,13 @@ #include "test_util.h" +sigjmp_buf expect_sigbus_jmpbuf; + +void __attribute__((used)) expect_sigbus_handler(int signum) +{ + siglongjmp(expect_sigbus_jmpbuf, 1); +} + /* * Random number generator that is usable from guest code. This is the * Park-Miller LCG using standard constants. From 19942d4fd9cf022b28a9afb432a6338dcf96fc2f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:05 -0700 Subject: [PATCH 049/305] KVM: selftests: Verify that faulting in private guest_memfd memory fails Add a guest_memfd testcase to verify that faulting in private memory gets a SIGBUS. For now, test only the case where memory is private by default since KVM doesn't yet support in-place conversion. Deliberately run the CoW test with and without INIT_SHARED set as KVM should disallow MAP_PRIVATE regardless of whether the memory itself is private from a CoCo perspective. Cc: Ackerley Tng Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Reviewed-by: David Hildenbrand Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Link: https://lore.kernel.org/r/20251003232606.4070510-13-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/guest_memfd_test.c | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 73c2e54e7297..f5372fdf096d 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -75,9 +75,8 @@ static void test_mmap_supported(int fd, size_t total_size) kvm_munmap(mem, total_size); } -static void test_fault_overflow(int fd, size_t total_size) +static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) { - size_t map_size = total_size * 4; const char val = 0xaa; char *mem; size_t i; @@ -86,12 +85,22 @@ static void test_fault_overflow(int fd, size_t total_size) TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); - for (i = 0; i < total_size; i++) + for (i = 0; i < accessible_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); kvm_munmap(mem, map_size); } +static void test_fault_overflow(int fd, size_t total_size) +{ + test_fault_sigbus(fd, total_size, total_size * 4); +} + +static void test_fault_private(int fd, size_t total_size) +{ + test_fault_sigbus(fd, 0, total_size); +} + static void test_mmap_not_supported(int fd, size_t total_size) { char *mem; @@ -260,9 +269,14 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags) gmem_test(file_read_write, vm, flags); if (flags & GUEST_MEMFD_FLAG_MMAP) { - gmem_test(mmap_supported, vm, flags); + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { + gmem_test(mmap_supported, vm, flags); + gmem_test(fault_overflow, vm, flags); + } else { + gmem_test(fault_private, vm, flags); + } + gmem_test(mmap_cow, vm, flags); - gmem_test(fault_overflow, vm, flags); } else { gmem_test(mmap_not_supported, vm, flags); } @@ -282,6 +296,8 @@ static void test_guest_memfd(unsigned long vm_type) __test_guest_memfd(vm, 0); flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + if (flags & GUEST_MEMFD_FLAG_MMAP) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP); /* MMAP should always be supported if INIT_SHARED is supported. */ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) From 505f5224b197b77169c977e747cbc18b222f85f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 3 Oct 2025 16:26:06 -0700 Subject: [PATCH 050/305] KVM: selftests: Verify that reads to inaccessible guest_memfd VMAs SIGBUS Expand the guest_memfd negative testcases for overflow and MAP_PRIVATE to verify that reads to inaccessible memory also get a SIGBUS. Reviewed-by: Ackerley Tng Tested-by: Ackerley Tng Reviewed-by: Lisa Wang Tested-by: Lisa Wang Link: https://lore.kernel.org/r/20251003232606.4070510-14-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/guest_memfd_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index f5372fdf096d..e7d9aeb418d3 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -84,6 +84,7 @@ static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); + TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size])); for (i = 0; i < accessible_size; i++) TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); From 852947be66b826c3d0ba328e19a3559fdf7ac726 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 10 Oct 2025 15:50:24 -0600 Subject: [PATCH 051/305] riscv: kprobes: convert one final __ASSEMBLY__ to __ASSEMBLER__ Per the reasoning in commit f811f58597ac ("riscv: Replace __ASSEMBLY__ with __ASSEMBLER__ in non-uapi headers"), convert one last remaining instance of __ASSEMBLY__ in the arch/riscv kprobes code. This entered the tree from patches that were sent before Thomas' changes; and when I reviewed the kprobes patches before queuing them, I missed this instance. Cc: Nam Cao Cc: Thomas Huth Link: https://lore.kernel.org/linux-riscv/16b74b63-f223-4f0b-b6e5-31cea5e620b4@redhat.com/ Link: https://lore.kernel.org/linux-riscv/20250606070952.498274-1-thuth@redhat.com/ Signed-off-by: Paul Walmsley --- arch/riscv/kernel/tests/kprobes/test-kprobes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.h b/arch/riscv/kernel/tests/kprobes/test-kprobes.h index 3886ab491ecb..537f44aa9d3f 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.h +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.h @@ -11,7 +11,7 @@ #define KPROBE_TEST_MAGIC_LOWER 0x0000babe #define KPROBE_TEST_MAGIC_UPPER 0xcafe0000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* array of addresses to install kprobes */ extern void *test_kprobes_addresses[]; @@ -19,6 +19,6 @@ extern void *test_kprobes_addresses[]; /* array of functions that return KPROBE_TEST_MAGIC */ extern long (*test_kprobes_functions[])(void); -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* TEST_KPROBES_H */ From db74b04edce1bc86b9a5acc724c7ca06f427ab60 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 11 Oct 2025 12:59:53 +0200 Subject: [PATCH 052/305] drm/bridge: lt9211: Drop check for last nibble of version register There is now a new LT9211 rev. U5, which reports chip ID 0x18 0x01 0xe4 . The previous LT9211 reported chip ID 0x18 0x01 0xe3 , which is what the driver checks for right now. Since there is a possibility there will be yet another revision of the LT9211 in the future, drop the last version nibble check to allow all future revisions of the chip to work with this driver. This fix makes LT9211 rev. U5 work with this driver. Fixes: 8ce4129e3de4 ("drm/bridge: lt9211: Add Lontium LT9211 bridge driver") Signed-off-by: Marek Vasut Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20251011110017.12521-1-marek.vasut@mailbox.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/lontium-lt9211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt9211.c b/drivers/gpu/drm/bridge/lontium-lt9211.c index 399fa7eebd49..03fc8fd10f20 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9211.c +++ b/drivers/gpu/drm/bridge/lontium-lt9211.c @@ -121,8 +121,7 @@ static int lt9211_read_chipid(struct lt9211 *ctx) } /* Test for known Chip ID. */ - if (chipid[0] != REG_CHIPID0_VALUE || chipid[1] != REG_CHIPID1_VALUE || - chipid[2] != REG_CHIPID2_VALUE) { + if (chipid[0] != REG_CHIPID0_VALUE || chipid[1] != REG_CHIPID1_VALUE) { dev_err(ctx->dev, "Unknown Chip ID: 0x%02x 0x%02x 0x%02x\n", chipid[0], chipid[1], chipid[2]); return -EINVAL; From d5d790ba1558dbb8d179054f514476e2ee970b8e Mon Sep 17 00:00:00 2001 From: Bhanu Seshu Kumar Valluri Date: Thu, 9 Oct 2025 11:00:09 +0530 Subject: [PATCH 053/305] net: usb: lan78xx: Fix lost EEPROM write timeout error(-ETIMEDOUT) in lan78xx_write_raw_eeprom The function lan78xx_write_raw_eeprom failed to properly propagate EEPROM write timeout errors (-ETIMEDOUT). In the timeout fallthrough path, it first attempted to restore the pin configuration for LED outputs and then returned only the status of that restore operation, discarding the original timeout error saved in ret. As a result, callers could mistakenly treat EEPROM write operation as successful even though the EEPROM write had actually timed out with no or partial data write. To fix this, handle errors in restoring the LED pin configuration separately. If the restore succeeds, return any prior EEPROM write timeout error saved in ret to the caller. Suggested-by: Oleksij Rempel Fixes: 8b1b2ca83b20 ("net: usb: lan78xx: Improve error handling in EEPROM and OTP operations") cc: stable@vger.kernel.org Signed-off-by: Bhanu Seshu Kumar Valluri Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 42d35cc6b421..28195d9a8d6b 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1175,10 +1175,13 @@ static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset, } write_raw_eeprom_done: - if (dev->chipid == ID_REV_CHIP_ID_7800_) - return lan78xx_write_reg(dev, HW_CFG, saved); - - return 0; + if (dev->chipid == ID_REV_CHIP_ID_7800_) { + int rc = lan78xx_write_reg(dev, HW_CFG, saved); + /* If USB fails, there is nothing to do */ + if (rc < 0) + return rc; + } + return ret; } static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, From 3abc0e55ea1fa2250e52bc860e8f24b2b9a2093a Mon Sep 17 00:00:00 2001 From: Rex Lu Date: Thu, 9 Oct 2025 08:29:34 +0200 Subject: [PATCH 054/305] net: mtk: wed: add dma mask limitation and GFP_DMA32 for device with more than 4GB DRAM Limit tx/rx buffer address to 32-bit address space for board with more than 4GB DRAM. Fixes: 804775dfc2885 ("net: ethernet: mtk_eth_soc: add support for Wireless Ethernet Dispatch (WED)") Fixes: 6757d345dd7db ("net: ethernet: mtk_wed: introduce hw_rro support for MT7988") Tested-by: Daniel Pawlik Tested-by: Matteo Croce Signed-off-by: Rex Lu Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_wed.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 3dbb113b792c..1ed1f88dd7f8 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -677,7 +677,7 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) void *buf; int s; - page = __dev_alloc_page(GFP_KERNEL); + page = __dev_alloc_page(GFP_KERNEL | GFP_DMA32); if (!page) return -ENOMEM; @@ -800,7 +800,7 @@ mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev) struct page *page; int s; - page = __dev_alloc_page(GFP_KERNEL); + page = __dev_alloc_page(GFP_KERNEL | GFP_DMA32); if (!page) return -ENOMEM; @@ -2426,6 +2426,10 @@ mtk_wed_attach(struct mtk_wed_device *dev) dev->version = hw->version; dev->hw->pcie_base = mtk_wed_get_pcie_base(dev); + ret = dma_set_mask_and_coherent(hw->dev, DMA_BIT_MASK(32)); + if (ret) + goto out; + if (hw->eth->dma_dev == hw->eth->dev && of_dma_is_coherent(hw->eth->dev->of_node)) mtk_eth_set_dma_device(hw->eth, hw->dev); From 65946eac6d888d50ae527c4e5c237dbe5cc3a2f2 Mon Sep 17 00:00:00 2001 From: Yeounsu Moon Date: Fri, 10 Oct 2025 00:57:16 +0900 Subject: [PATCH 055/305] net: dlink: handle dma_map_single() failure properly There is no error handling for `dma_map_single()` failures. Add error handling by checking `dma_mapping_error()` and freeing the `skb` using `dev_kfree_skb()` (process context) when it fails. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yeounsu Moon Tested-on: D-Link DGE-550T Rev-A3 Suggested-by: Simon Horman Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/dl2k.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 1996d2e4e3e2..7077d705e471 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -508,25 +508,34 @@ static int alloc_list(struct net_device *dev) for (i = 0; i < RX_RING_SIZE; i++) { /* Allocated fixed size of skbuff */ struct sk_buff *skb; + dma_addr_t addr; skb = netdev_alloc_skb_ip_align(dev, np->rx_buf_sz); np->rx_skbuff[i] = skb; - if (!skb) { - free_list(dev); - return -ENOMEM; - } + if (!skb) + goto err_free_list; + + addr = dma_map_single(&np->pdev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pdev->dev, addr)) + goto err_kfree_skb; np->rx_ring[i].next_desc = cpu_to_le64(np->rx_ring_dma + ((i + 1) % RX_RING_SIZE) * sizeof(struct netdev_desc)); /* Rubicon now supports 40 bits of addressing space. */ - np->rx_ring[i].fraginfo = - cpu_to_le64(dma_map_single(&np->pdev->dev, skb->data, - np->rx_buf_sz, DMA_FROM_DEVICE)); + np->rx_ring[i].fraginfo = cpu_to_le64(addr); np->rx_ring[i].fraginfo |= cpu_to_le64((u64)np->rx_buf_sz << 48); } return 0; + +err_kfree_skb: + dev_kfree_skb(np->rx_skbuff[i]); + np->rx_skbuff[i] = NULL; +err_free_list: + free_list(dev); + return -ENOMEM; } static void rio_hw_init(struct net_device *dev) From 68a052239fc4b351e961f698b824f7654a346091 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 9 Oct 2025 13:56:29 -0700 Subject: [PATCH 056/305] selftests: drv-net: update remaining Python init files Convert remaining __init__ files similar to what we did in commit b615879dbfea ("selftests: drv-net: make linters happy with our imports") Signed-off-by: Jakub Kicinski Signed-off-by: Stanislav Fomichev Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../drivers/net/hw/lib/py/__init__.py | 40 ++++++++++++++----- .../selftests/drivers/net/lib/py/__init__.py | 4 +- .../testing/selftests/net/lib/py/__init__.py | 29 ++++++++++++-- 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 0ceb297e7757..fb010a48a5a1 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -1,5 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Driver test environment (hardware-only tests). +NetDrvEnv and NetDrvEpEnv are the main environment classes. +Former is for local host only tests, latter creates / connects +to a remote endpoint. See NIPA wiki for more information about +running and writing driver tests. +""" + import sys from pathlib import Path @@ -8,26 +16,36 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) - from net.lib.py import * - from drivers.net.lib.py import * - # Import one by one to avoid pylint false positives + from net.lib.py import NetNS, NetNSEnter, NetdevSimDev from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure - from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ - rand_port, tool, wait_port_listen - from net.lib.py import fd_read_timeout + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none - from net.lib.py import NetNSEnter - from drivers.net.lib.py import GenerateTraffic + from drivers.net.lib.py import GenerateTraffic, Remote from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv + + __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", + "EthtoolFamily", "NetdevFamily", "NetshaperFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", + "CmdExitFailure", + "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", + "fd_read_timeout", "ip", "rand_port", + "wait_port_listen", "wait_file", + "KsftSkipEx", "KsftFailEx", "KsftXfailEx", + "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", + "ksft_setup", + "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", + "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", + "ksft_not_none", "ksft_not_none", + "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"] except ModuleNotFoundError as e: - ksft_pr("Failed importing `net` library from kernel sources") - ksft_pr(str(e)) - ktap_result(True, comment="SKIP") + print("Failed importing `net` library from kernel sources") + print(str(e)) sys.exit(4) diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index e6c070f32f51..b0c6300150fb 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -22,7 +22,7 @@ try: NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file + fd_read_timeout, ip, rand_port, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup @@ -34,7 +34,7 @@ try: "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "CmdExitFailure", "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", - "fd_read_timeout", "ip", "rand_port", "tool", + "fd_read_timeout", "ip", "rand_port", "wait_port_listen", "wait_file", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 997b85cc216a..97b7cf2b20eb 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -1,9 +1,32 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Python selftest helpers for netdev. +""" + from .consts import KSRC -from .ksft import * +from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \ + ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \ + ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \ + ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit from .netns import NetNS, NetNSEnter -from .nsim import * -from .utils import * +from .nsim import NetdevSim, NetdevSimDev +from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ + bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily + +__all__ = ["KSRC", + "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq", + "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in", + "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises", + "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup", + "ksft_run", "ksft_exit", + "NetNS", "NetNSEnter", + "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", + "bpftool", "ip", "ethtool", "bpftrace", "rand_port", + "wait_port_listen", "wait_file", + "NetdevSim", "NetdevSimDev", + "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", + "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily", + "RtnlAddrFamily"] From 0b4b77eff5f8cd9be062783a1c1e198d46d0a753 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 10 Oct 2025 16:18:59 +0200 Subject: [PATCH 057/305] doc: fix seg6_flowlabel path This sysctl is not per interface; it's global per netns. Fixes: 292ecd9f5a94 ("doc: move seg6_flowlabel to seg6-sysctl.rst") Reported-by: Philippe Guibert Signed-off-by: Nicolas Dichtel Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/seg6-sysctl.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/networking/seg6-sysctl.rst b/Documentation/networking/seg6-sysctl.rst index 07c20e470baf..1b6af4779be1 100644 --- a/Documentation/networking/seg6-sysctl.rst +++ b/Documentation/networking/seg6-sysctl.rst @@ -25,6 +25,9 @@ seg6_require_hmac - INTEGER Default is 0. +/proc/sys/net/ipv6/seg6_* variables: +==================================== + seg6_flowlabel - INTEGER Controls the behaviour of computing the flowlabel of outer IPv6 header in case of SR T.encaps From 39dec6cd888bde4171bd4b8fcf45f73ab684404d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 1 Oct 2025 21:09:00 +0200 Subject: [PATCH 058/305] smb: server: Use common error handling code in smb_direct_rdma_xmit() Add two jump targets so that a bit of exception handling can be better reused at the end of this function implementation. Signed-off-by: Markus Elfring Reviewed-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index b3077766d6ec..a201c5871a77 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -1574,18 +1574,14 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, get_buf_page_count(desc_buf, desc_buf_len), msg->sg_list, SG_CHUNK_SIZE); if (ret) { - kfree(msg); ret = -ENOMEM; - goto out; + goto free_msg; } ret = get_sg_list(desc_buf, desc_buf_len, msg->sgt.sgl, msg->sgt.orig_nents); - if (ret < 0) { - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); - kfree(msg); - goto out; - } + if (ret < 0) + goto free_table; ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port, msg->sgt.sgl, @@ -1596,9 +1592,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); if (ret < 0) { pr_err("failed to init rdma_rw_ctx: %d\n", ret); - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); - kfree(msg); - goto out; + goto free_table; } list_add_tail(&msg->list, &msg_list); @@ -1630,6 +1624,12 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, atomic_add(credits_needed, &sc->rw_io.credits.count); wake_up(&sc->rw_io.credits.wait_queue); return ret; + +free_table: + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); +free_msg: + kfree(msg); + goto out; } static int smb_direct_rdma_write(struct ksmbd_transport *t, From ef3e73a917ec7d080e0fb0e4015098a4fb0f1cff Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 10 Oct 2025 12:03:07 +0000 Subject: [PATCH 059/305] powerpc/pseries/msi: Fix NULL pointer dereference at irq domain teardown pseries_msi_ops_teardown() reads pci_dev* from msi_alloc_info_t. However, pseries_msi_ops_prepare() does not populate this structure, thus it is all zeros. Consequently, pseries_msi_ops_teardown() triggers a NULL pointer dereference crash. struct pci_dev is available in struct irq_domain. Read it there instead. Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/linuxppc-dev/878d7651-433a-46fe-a28b-1b7e893fcbe0@linux.ibm.com/ Tested-by: Venkat Rao Bagalkote Signed-off-by: Nam Cao Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20251010120307.3281720-1-namcao@linutronix.de --- arch/powerpc/platforms/pseries/msi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 825f9432e03d..a82aaa786e9e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -443,8 +443,7 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev */ static void pseries_msi_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg) { - struct msi_desc *desc = arg->desc; - struct pci_dev *pdev = msi_desc_to_pci_dev(desc); + struct pci_dev *pdev = to_pci_dev(domain->dev); rtas_disable_msi(pdev); } From 2743cf75f7c92d2a0a4acabd7aef1c17d98fe123 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 8 Oct 2025 08:13:59 +0000 Subject: [PATCH 060/305] powerpc, ocxl: Fix extraction of struct xive_irq_data Commit cc0cc23babc9 ("powerpc/xive: Untangle xive from child interrupt controller drivers") changed xive_irq_data to be stashed to chip_data instead of handler_data. However, multiple places are still attempting to read xive_irq_data from handler_data and get a NULL pointer deference bug. Update them to read xive_irq_data from chip_data. Non-XIVE files which touch xive_irq_data seem quite strange to me, especially the ocxl driver. I think there ought to be an alternative platform-independent solution, instead of touching XIVE's data directly. Therefore, I think this whole thing should be cleaned up. But perhaps I just misunderstand something. In any case, this cleanup would not be trivial; for now, just get things working again. Fixes: cc0cc23babc9 ("powerpc/xive: Untangle xive from child interrupt controller drivers") Reported-by: Ritesh Harjani (IBM) Closes: https://lore.kernel.org/linuxppc-dev/68e48df8.170a0220.4b4b0.217d@mx.google.com/ Signed-off-by: Nam Cao Reviewed-by: Ganesh Goudar Acked-by: Andrew Donnellan # ocxl Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20251008081359.1382699-1-namcao@linutronix.de --- arch/powerpc/kvm/book3s_xive.c | 12 ++++-------- arch/powerpc/platforms/powernv/vas.c | 2 +- arch/powerpc/sysdev/xive/common.c | 2 +- drivers/misc/ocxl/afu_irq.c | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 1302b5ac5672..89a1b8c21ab4 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -916,8 +916,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, * it fires once. */ if (single_escalation) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[prio]); xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); vcpu->arch.xive_esc_raddr = xd->eoi_page; @@ -1612,7 +1611,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, /* Grab info about irq */ state->pt_number = hw_irq; - state->pt_data = irq_data_get_irq_handler_data(host_data); + state->pt_data = irq_data_get_irq_chip_data(host_data); /* * Configure the IRQ to match the existing configuration of @@ -1787,8 +1786,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) */ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) { - struct irq_data *d = irq_get_irq_data(irq); - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(irq); /* * This slightly odd sequence gives the right result @@ -2827,9 +2825,7 @@ int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) i0, i1); } if (xc->esc_virq[i]) { - struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); - struct xive_irq_data *xd = - irq_data_get_irq_handler_data(d); + struct xive_irq_data *xd = irq_get_chip_data(xc->esc_virq[i]); u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); seq_printf(m, " ESC %d %c%c EOI @%llx", diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index b65256a63e87..9c9650319f3b 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -121,7 +121,7 @@ static int init_vas_instance(struct platform_device *pdev) return -EINVAL; } - xd = irq_get_handler_data(vinst->virq); + xd = irq_get_chip_data(vinst->virq); if (!xd) { pr_err("Inst%d: Invalid virq %d\n", vinst->vas_id, vinst->virq); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 625361a15424..8d0123b0ae84 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1580,7 +1580,7 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc) cpu, irq); #endif raw_spin_lock(&desc->lock); - xd = irq_desc_get_handler_data(desc); + xd = irq_desc_get_chip_data(desc); /* * Clear saved_p to indicate that it's no longer pending diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c index 36f7379b8e2d..f6b821fc274c 100644 --- a/drivers/misc/ocxl/afu_irq.c +++ b/drivers/misc/ocxl/afu_irq.c @@ -203,7 +203,7 @@ u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id) mutex_lock(&ctx->irq_lock); irq = idr_find(&ctx->irq_idr, irq_id); if (irq) { - xd = irq_get_handler_data(irq->virq); + xd = irq_get_chip_data(irq->virq); addr = xd ? xd->trig_page : 0; } mutex_unlock(&ctx->irq_lock); From 0843ba458439f38efdc14aa359c14ad0127edb01 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 8 Oct 2025 08:59:34 +0530 Subject: [PATCH 061/305] powerpc/fadump: skip parameter area allocation when fadump is disabled Fadump allocates memory to pass additional kernel command-line argument to the fadump kernel. However, this allocation is not needed when fadump is disabled. So avoid allocating memory for the additional parameter area in such cases. Fixes: f4892c68ecc1 ("powerpc/fadump: allocate memory for additional parameters early") Reviewed-by: Hari Bathini Signed-off-by: Sourabh Jain Fixes: f4892c68ecc1 ("powerpc/fadump: allocate memory for additional parameters early") Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20251008032934.262683-1-sourabhjain@linux.ibm.com --- arch/powerpc/kernel/fadump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5782e743fd27..4ebc333dd786 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1747,6 +1747,9 @@ void __init fadump_setup_param_area(void) { phys_addr_t range_start, range_end; + if (!fw_dump.fadump_enabled) + return; + if (!fw_dump.param_area_supported || fw_dump.dump_active) return; From 12d724f2852d094d68dccaf5101e0ef89a971cde Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 9 Oct 2025 19:46:00 +0900 Subject: [PATCH 062/305] ata: libata-core: relax checks in ata_read_log_directory() Commit 6d4405b16d37 ("ata: libata-core: Cache the general purpose log directory") introduced caching of a device general purpose log directory to avoid repeated access to this log page during device scan. This change also added a check on this log page to verify that the log page version is 0x0001 as mandated by the ACS specifications. And it turns out that some devices do not bother reporting this version, instead reporting a version 0, resulting in error messages such as: ata6.00: Invalid log directory version 0x0000 and to the device being marked as not supporting the general purpose log directory log page. Since before commit 6d4405b16d37 the log page version check did not exist and things were still working correctly for these devices, relax ata_read_log_directory() version check and only warn about the invalid log page version number without disabling access to the log directory page. Fixes: 6d4405b16d37 ("ata: libata-core: Cache the general purpose log directory") Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220635 Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 11 ++++------- include/linux/libata.h | 6 ++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ff53f5f029b4..2a210719c4ce 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2174,13 +2174,10 @@ static int ata_read_log_directory(struct ata_device *dev) } version = get_unaligned_le16(&dev->gp_log_dir[0]); - if (version != 0x0001) { - ata_dev_err(dev, "Invalid log directory version 0x%04x\n", - version); - ata_clear_log_directory(dev); - dev->quirks |= ATA_QUIRK_NO_LOG_DIR; - return -EINVAL; - } + if (version != 0x0001) + ata_dev_warn_once(dev, + "Invalid log directory version 0x%04x\n", + version); return 0; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 21de0935775d..7a98de1cc995 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1594,6 +1594,12 @@ do { \ #define ata_dev_dbg(dev, fmt, ...) \ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__) +#define ata_dev_warn_once(dev, fmt, ...) \ + pr_warn_once("ata%u.%02u: " fmt, \ + (dev)->link->ap->print_id, \ + (dev)->link->pmp + (dev)->devno, \ + ##__VA_ARGS__) + static inline void ata_print_version_once(const struct device *dev, const char *version) { From 7e8242405b94ceac6db820de7d4fd9318cbc1219 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Wed, 1 Oct 2025 08:08:03 +0200 Subject: [PATCH 063/305] rpmb: move rpmb_frame struct and constants to common header Move struct rpmb_frame and RPMB operation constants from MMC block driver to include/linux/rpmb.h for reuse across different RPMB implementations (UFS, NVMe, etc.). Signed-off-by: Bean Huo Reviewed-by: Avri Altman Acked-by: Jens Wiklander Reviewed-by: Bart Van Assche Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 42 -------------------------------------- include/linux/rpmb.h | 44 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 42 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9399bf6c766a..c0ffe0817fd4 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -79,48 +79,6 @@ MODULE_ALIAS("mmc:block"); #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16) #define MMC_EXTRACT_VALUE_FROM_ARG(x) ((x & 0x0000FF00) >> 8) -/** - * struct rpmb_frame - rpmb frame as defined by eMMC 5.1 (JESD84-B51) - * - * @stuff : stuff bytes - * @key_mac : The authentication key or the message authentication - * code (MAC) depending on the request/response type. - * The MAC will be delivered in the last (or the only) - * block of data. - * @data : Data to be written or read by signed access. - * @nonce : Random number generated by the host for the requests - * and copied to the response by the RPMB engine. - * @write_counter: Counter value for the total amount of the successful - * authenticated data write requests made by the host. - * @addr : Address of the data to be programmed to or read - * from the RPMB. Address is the serial number of - * the accessed block (half sector 256B). - * @block_count : Number of blocks (half sectors, 256B) requested to be - * read/programmed. - * @result : Includes information about the status of the write counter - * (valid, expired) and result of the access made to the RPMB. - * @req_resp : Defines the type of request and response to/from the memory. - * - * The stuff bytes and big-endian properties are modeled to fit to the spec. - */ -struct rpmb_frame { - u8 stuff[196]; - u8 key_mac[32]; - u8 data[256]; - u8 nonce[16]; - __be32 write_counter; - __be16 addr; - __be16 block_count; - __be16 result; - __be16 req_resp; -} __packed; - -#define RPMB_PROGRAM_KEY 0x1 /* Program RPMB Authentication Key */ -#define RPMB_GET_WRITE_COUNTER 0x2 /* Read RPMB write counter */ -#define RPMB_WRITE_DATA 0x3 /* Write data to RPMB partition */ -#define RPMB_READ_DATA 0x4 /* Read data from RPMB partition */ -#define RPMB_RESULT_READ 0x5 /* Read result request (Internal) */ - #define RPMB_FRAME_SIZE sizeof(struct rpmb_frame) #define CHECK_SIZE_NEQ(val) ((val) != sizeof(struct rpmb_frame)) #define CHECK_SIZE_ALIGNED(val) IS_ALIGNED((val), sizeof(struct rpmb_frame)) diff --git a/include/linux/rpmb.h b/include/linux/rpmb.h index cccda73eea4d..ed3f8e431eff 100644 --- a/include/linux/rpmb.h +++ b/include/linux/rpmb.h @@ -61,6 +61,50 @@ struct rpmb_dev { #define to_rpmb_dev(x) container_of((x), struct rpmb_dev, dev) +/** + * struct rpmb_frame - RPMB frame structure for authenticated access + * + * @stuff : stuff bytes, a padding/reserved area of 196 bytes at the + * beginning of the RPMB frame. They don’t carry meaningful + * data but are required to make the frame exactly 512 bytes. + * @key_mac : The authentication key or the message authentication + * code (MAC) depending on the request/response type. + * The MAC will be delivered in the last (or the only) + * block of data. + * @data : Data to be written or read by signed access. + * @nonce : Random number generated by the host for the requests + * and copied to the response by the RPMB engine. + * @write_counter: Counter value for the total amount of the successful + * authenticated data write requests made by the host. + * @addr : Address of the data to be programmed to or read + * from the RPMB. Address is the serial number of + * the accessed block (half sector 256B). + * @block_count : Number of blocks (half sectors, 256B) requested to be + * read/programmed. + * @result : Includes information about the status of the write counter + * (valid, expired) and result of the access made to the RPMB. + * @req_resp : Defines the type of request and response to/from the memory. + * + * The stuff bytes and big-endian properties are modeled to fit to the spec. + */ +struct rpmb_frame { + u8 stuff[196]; + u8 key_mac[32]; + u8 data[256]; + u8 nonce[16]; + __be32 write_counter; + __be16 addr; + __be16 block_count; + __be16 result; + __be16 req_resp; +}; + +#define RPMB_PROGRAM_KEY 0x1 /* Program RPMB Authentication Key */ +#define RPMB_GET_WRITE_COUNTER 0x2 /* Read RPMB write counter */ +#define RPMB_WRITE_DATA 0x3 /* Write data to RPMB partition */ +#define RPMB_READ_DATA 0x4 /* Read data from RPMB partition */ +#define RPMB_RESULT_READ 0x5 /* Read result request (Internal) */ + #if IS_ENABLED(CONFIG_RPMB) struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev); void rpmb_dev_put(struct rpmb_dev *rdev); From ed25dcfbc4327570b28f0328a8e17d121434c0ea Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 26 Sep 2025 12:41:07 -0700 Subject: [PATCH 064/305] KVM: arm64: nv: Don't treat ZCR_EL2 as a 'mapped' register Unlike the other mapped EL2 sysregs ZCR_EL2 isn't guaranteed to be resident when a vCPU is loaded as it actually follows the SVE context. As such, the contents of ZCR_EL1 may belong to another guest if the vCPU has been preempted before reaching sysreg emulation. Unconditionally use the in-memory value of ZCR_EL2 and switch to the memory-only accessors. The in-memory value is guaranteed to be valid as fpsimd_lazy_switch_to_{guest,host}() will restore/save the register appropriately. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 91053aa832d0..4a75e5f0c259 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -203,7 +203,6 @@ static void locate_register(const struct kvm_vcpu *vcpu, enum vcpu_sysreg reg, MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL ); MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL ); MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL ); - MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL ); MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL ); MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1, NULL ); case CNTHCTL_EL2: @@ -2709,14 +2708,13 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, } if (!p->is_write) { - p->regval = vcpu_read_sys_reg(vcpu, ZCR_EL2); + p->regval = __vcpu_sys_reg(vcpu, ZCR_EL2); return true; } vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; vq = min(vq, vcpu_sve_max_vq(vcpu)); - vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2); - + __vcpu_assign_sys_reg(vcpu, ZCR_EL2, vq - 1); return true; } From 9a1950f97741a23fc68a7b2cfd487e059d389be5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 26 Sep 2025 12:41:08 -0700 Subject: [PATCH 065/305] KVM: arm64: nv: Don't advance PC when pending an SVE exception Jan reports that running a nested guest on Neoverse-V2 leads to a WARN in the host due to simultaneously pending an exception and PC increment after an access to ZCR_EL2. Returning true from a sysreg accessor is an indication that the sysreg instruction has been retired. Of course this isn't the case when we've pended a synchronous SVE exception for the guest. Fix the return value and let the exception propagate to the guest as usual. Reported-by: Jan Kotas Closes: https://lore.kernel.org/kvmarm/865xd61tt5.wl-maz@kernel.org/ Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4a75e5f0c259..ee8a7033c85b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2704,7 +2704,7 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, if (guest_hyp_sve_traps_enabled(vcpu)) { kvm_inject_nested_sve_trap(vcpu); - return true; + return false; } if (!p->is_write) { From a46c09b382eea3f9e3d16576096b987a2171fcca Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 26 Sep 2025 15:42:46 -0700 Subject: [PATCH 066/305] KVM: arm64: Use the in-context stage-1 in __kvm_find_s1_desc_level() Running the external_aborts selftest at EL2 leads to an ugly splat due to the stage-1 MMU being disabled for the walked context, owing to the fact that __kvm_find_s1_desc_level() is hardcoded to the EL1&0 regime. Select the appropriate translation regime for the stage-1 walk based on the current vCPU context. Fixes: b8e625167a32 ("KVM: arm64: Add S1 IPA to page table level walker") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 20bb9af125b1..e2e06ec8a67b 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -1602,13 +1602,17 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) .fn = match_s1_desc, .priv = &dm, }, - .regime = TR_EL10, .as_el0 = false, .pan = false, }; struct s1_walk_result wr = {}; int ret; + if (is_hyp_ctxt(vcpu)) + wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; + else + wi.regime = TR_EL10; + ret = setup_s1_walk(vcpu, &wi, &wr, va); if (ret) return ret; From 890c608b4d5e6a616693da92a2d4e7de4ab9e6c5 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 26 Sep 2025 15:44:54 -0700 Subject: [PATCH 067/305] KVM: arm64: selftests: Test effective value of HCR_EL2.AMO A defect against the architecture now allows an implementation to treat AMO as 1 when HCR_EL2.{E2H, TGE} = {1, 0}. KVM now takes advantage of this interpretation to address a quality of emulation issue w.r.t. SError injection. Add a corresponding test case and expect a pending SError to be taken. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- .../selftests/kvm/arm64/external_aborts.c | 43 +++++++++++++++++++ .../selftests/kvm/include/arm64/processor.h | 12 +++++- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c index 592b26ded779..d8fe17a6cc59 100644 --- a/tools/testing/selftests/kvm/arm64/external_aborts.c +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -359,6 +359,44 @@ static void test_mmio_ease(void) kvm_vm_free(vm); } +static void test_serror_amo_guest(void) +{ + /* + * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked) + * since the write is redirected to memory. But don't write (intentionally) + * broken code! + */ + sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0); + isb(); + + GUEST_SYNC(0); + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + /* + * KVM treats the effective value of AMO as 1 when + * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when + * unmasked. + */ + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror_amo(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + int main(void) { test_mmio_abort(); @@ -369,4 +407,9 @@ int main(void) test_serror_emulated(); test_mmio_ease(); test_s1ptw_abort(); + + if (!test_supports_el2()) + return 0; + + test_serror_amo(); } diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 6f481475c135..ff928716574d 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -305,7 +305,17 @@ void test_wants_mte(void); void test_disable_default_vgic(void); bool vm_supports_el2(struct kvm_vm *vm); -static bool vcpu_has_el2(struct kvm_vcpu *vcpu) + +static inline bool test_supports_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + bool supported = vm_supports_el2(vm); + + kvm_vm_free(vm); + return supported; +} + +static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu) { return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); } From cb49b7b8622e914171e9eb7197c006320889e0fc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 26 Sep 2025 08:58:38 -0700 Subject: [PATCH 068/305] KVM: arm64: selftests: Track width of timer counter as "int", not "uint64_t" Store the width of arm64's timer counter as an "int", not a "uint64_t". ilog2() returns an "int", and more importantly using what is an "unsigned long" under the hood makes clang unhappy due to a type mismatch when clamping the width to a sane value. arm64/arch_timer_edge_cases.c:1032:10: error: comparison of distinct pointer types ('typeof (width) *' (aka 'unsigned long *') and 'typeof (56) *' (aka 'int *')) [-Werror,-Wcompare-distinct-pointer-types] 1032 | width = clamp(width, 56, 64); | ^~~~~~~~~~~~~~~~~~~~ tools/include/linux/kernel.h:47:45: note: expanded from macro 'clamp' 47 | #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) | ^~~~~~~~~~~~ tools/include/linux/kernel.h:33:17: note: expanded from macro 'max' 33 | (void) (&_max1 == &_max2); \ | ~~~~~~ ^ ~~~~~~ tools/include/linux/kernel.h:39:9: note: expanded from macro 'min' 39 | typeof(x) _min1 = (x); \ | ^ Fixes: fad4cf944839 ("KVM: arm64: selftests: Determine effective counter width in arch_timer_edge_cases") Cc: Sebastian Ott Signed-off-by: Sean Christopherson Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index 91906414a474..993c9e38e729 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -1020,7 +1020,7 @@ static void set_counter_defaults(void) { const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; uint64_t freq = read_sysreg(CNTFRQ_EL0); - uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq); + int width = ilog2(MIN_ROLLOVER_SECS * freq); width = clamp(width, 56, 64); CVAL_MAX = GENMASK_ULL(width - 1, 0); From 0aa1b76fe1429629215a7c79820e4b96233ac4a3 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 30 Sep 2025 01:52:37 -0700 Subject: [PATCH 069/305] KVM: arm64: Prevent access to vCPU events before init Another day, another syzkaller bug. KVM erroneously allows userspace to pend vCPU events for a vCPU that hasn't been initialized yet, leading to KVM interpreting a bunch of uninitialized garbage for routing / injecting the exception. In one case the injection code and the hyp disagree on whether the vCPU has a 32bit EL1 and put the vCPU into an illegal mode for AArch64, tripping the BUG() in exception_target_el() during the next injection: kernel BUG at arch/arm64/kvm/inject_fault.c:40! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP CPU: 3 UID: 0 PID: 318 Comm: repro Not tainted 6.17.0-rc4-00104-g10fd0285305d #6 PREEMPT Hardware name: linux,dummy-virt (DT) pstate: 21402009 (nzCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : exception_target_el+0x88/0x8c lr : pend_serror_exception+0x18/0x13c sp : ffff800082f03a10 x29: ffff800082f03a10 x28: ffff0000cb132280 x27: 0000000000000000 x26: 0000000000000000 x25: ffff0000c2a99c20 x24: 0000000000000000 x23: 0000000000008000 x22: 0000000000000002 x21: 0000000000000004 x20: 0000000000008000 x19: ffff0000c2a99c20 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 00000000200000c0 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff800082f03af8 x7 : 0000000000000000 x6 : 0000000000000000 x5 : ffff800080f621f0 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 000000000040009b x1 : 0000000000000003 x0 : ffff0000c2a99c20 Call trace: exception_target_el+0x88/0x8c (P) kvm_inject_serror_esr+0x40/0x3b4 __kvm_arm_vcpu_set_events+0xf0/0x100 kvm_arch_vcpu_ioctl+0x180/0x9d4 kvm_vcpu_ioctl+0x60c/0x9f4 __arm64_sys_ioctl+0xac/0x104 invoke_syscall+0x48/0x110 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x34/0xf0 el0t_64_sync_handler+0xa0/0xe4 el0t_64_sync+0x198/0x19c Code: f946bc01 b4fffe61 9101e020 17fffff2 (d4210000) Reject the ioctls outright as no sane VMM would call these before KVM_ARM_VCPU_INIT anyway. Even if it did the exception would've been thrown away by the eventual reset of the vCPU's state. Cc: stable@vger.kernel.org # 6.17 Fixes: b7b27facc7b5 ("arm/arm64: KVM: Add KVM_GET/SET_VCPU_EVENTS") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f21d1b7f20f8..f01cacb669cf 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1794,6 +1794,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (kvm_arm_vcpu_get_events(vcpu, &events)) return -EINVAL; @@ -1805,6 +1808,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_SET_VCPU_EVENTS: { struct kvm_vcpu_events events; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + if (copy_from_user(&events, argp, sizeof(events))) return -EFAULT; From cc4309324dc695f62d25d56c0b29805e9724170c Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 30 Sep 2025 16:36:20 -0700 Subject: [PATCH 070/305] KVM: arm64: Document vCPU event ioctls as requiring init'ed vCPU KVM rejects calls to KVM_{GET,SET}_VCPU_EVENTS for an uninitialized vCPU as of commit cc96679f3c03 ("KVM: arm64: Prevent access to vCPU events before init"). Update the corresponding API documentation. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/api.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 6ae24c5ca559..4973c74db5c6 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1229,6 +1229,9 @@ It is not possible to read back a pending external abort (injected via KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered directly to the virtual CPU). +Calling this ioctl on a vCPU that hasn't been initialized will return +-ENOEXEC. + :: struct kvm_vcpu_events { @@ -1309,6 +1312,8 @@ exceptions by manipulating individual registers using the KVM_SET_ONE_REG API. See KVM_GET_VCPU_EVENTS for the data structure. +Calling this ioctl on a vCPU that hasn't been initialized will return +-ENOEXEC. 4.33 KVM_GET_DEBUGREGS ---------------------- From a133052666bed0dc0b169952e9d3f9e6b2125f9a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 30 Sep 2025 12:33:02 -0700 Subject: [PATCH 071/305] KVM: selftests: Fix irqfd_test for non-x86 architectures The KVM_IRQFD ioctl fails if no irqchip is present in-kernel, which isn't too surprising as there's not much KVM can do for an IRQ if it cannot resolve a destination. As written the irqfd_test assumes that a 'default' VM created in selftests has an in-kernel irqchip created implicitly. That may be the case on x86 but it isn't necessarily true on other architectures. Add an arch predicate indicating if 'default' VMs get an irqchip and make the irqfd_test depend on it. Work around arm64 VGIC initialization requirements by using vm_create_with_one_vcpu(), ignoring the created vCPU as it isn't used for the test. Reported-by: Sebastian Ott Reported-by: Naresh Kamboju Acked-by: Sean Christopherson Fixes: 7e9b231c402a ("KVM: selftests: Add a KVM_IRQFD test to verify uniqueness requirements") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/include/kvm_util.h | 2 ++ tools/testing/selftests/kvm/irqfd_test.c | 14 +++++++++++--- tools/testing/selftests/kvm/lib/arm64/processor.c | 5 +++++ tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++++ tools/testing/selftests/kvm/lib/s390/processor.c | 5 +++++ tools/testing/selftests/kvm/lib/x86/processor.c | 5 +++++ 6 files changed, 33 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 26cc30290e76..112d3f443a17 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -1273,4 +1273,6 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); uint32_t guest_get_vcpuid(void); +bool kvm_arch_has_default_irqchip(void); + #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c index 7c301b4c7005..5d7590d01868 100644 --- a/tools/testing/selftests/kvm/irqfd_test.c +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -89,11 +89,19 @@ static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) int main(int argc, char *argv[]) { pthread_t racing_thread; + struct kvm_vcpu *unused; int r, i; - /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */ - vm1 = vm_create(1); - vm2 = vm_create(1); + TEST_REQUIRE(kvm_arch_has_default_irqchip()); + + /* + * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also + * create an unused vCPU as certain architectures (like arm64) need to + * complete IRQ chip initialization after all possible vCPUs for a VM + * have been created. + */ + vm1 = vm_create_with_one_vcpu(&unused, NULL); + vm2 = vm_create_with_one_vcpu(&unused, NULL); WRITE_ONCE(__eventfd, kvm_new_eventfd()); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 369a4c87dd8f..54f6d17c78f7 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -725,3 +725,8 @@ void kvm_arch_vm_release(struct kvm_vm *vm) if (vm->arch.has_gic) close(vm->arch.gic_fd); } + +bool kvm_arch_has_default_irqchip(void) +{ + return request_vgic && kvm_supports_vgic_v3(); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6743fbd9bd67..a35adfebfa23 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2344,3 +2344,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) pg = paddr >> vm->page_shift; return sparsebit_is_set(region->protected_phy_pages, pg); } + +__weak bool kvm_arch_has_default_irqchip(void) +{ + return false; +} diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 20cfe970e3e3..8ceeb17c819a 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index c748cd9b2eef..b418502c5ecc 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1318,3 +1318,8 @@ bool sys_clocksource_is_based_on_tsc(void) return ret; } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} From 05a02490faeb952f1c8d2f5c38346fa0a717a483 Mon Sep 17 00:00:00 2001 From: Osama Abdelkader Date: Tue, 30 Sep 2025 16:56:21 +0300 Subject: [PATCH 072/305] KVM: arm64: Remove unreachable break after return Remove an unnecessary 'break' statement that follows a 'return' in arch/arm64/kvm/at.c. The break is unreachable. Signed-off-by: Osama Abdelkader Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index e2e06ec8a67b..be26d5aa668c 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -91,7 +91,6 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o case OP_AT_S1E2W: case OP_AT_S1E2A: return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; - break; default: return (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; From 9a7f87eb587da49993f47f44c4c5535d8de76750 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sun, 12 Oct 2025 23:43:52 +0800 Subject: [PATCH 073/305] KVM: arm64: selftests: Sync ID_AA64PFR1, MPIDR, CLIDR in guest We forgot to sync several registers (ID_AA64PFR1, MPIDR, CLIDR) in guest to make sure that the guest had seen the written value. Add them to the list. Signed-off-by: Zenghui Yu Reviewed-By: Ben Horgan Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/set_id_regs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8ff1e853f7f8..5e24f77868b5 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -249,11 +249,14 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_MPIDR_EL1); + GUEST_REG_SYNC(SYS_CLIDR_EL1); GUEST_REG_SYNC(SYS_CTR_EL0); GUEST_REG_SYNC(SYS_MIDR_EL1); GUEST_REG_SYNC(SYS_REVIDR_EL1); From b9ce79887e270ed64ed499aa69f903cdca401c2f Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 10 Oct 2025 21:04:16 +0200 Subject: [PATCH 074/305] smb: client: Return a status code only as a constant in sid_to_id() Return a status code without storing it in an intermediate variable. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Steve French --- fs/smb/client/cifsacl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 63b3b1290bed..ce2ebc213a1d 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -339,7 +339,6 @@ int sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, struct cifs_fattr *fattr, uint sidtype) { - int rc = 0; struct key *sidkey; char *sidstr; const struct cred *saved_cred; @@ -446,12 +445,12 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid, * fails then we just fall back to using the ctx->linux_uid/linux_gid. */ got_valid_id: - rc = 0; if (sidtype == SIDOWNER) fattr->cf_uid = fuid; else fattr->cf_gid = fgid; - return rc; + + return 0; } int From 911063b590ce77d473e92716f05f34712f97ef95 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 10 Oct 2025 14:48:13 +0200 Subject: [PATCH 075/305] smb: client: Omit one redundant variable assignment in cifs_xattr_set() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The local variable “rc” is assigned a value in an if branch without using it before it is reassigned there. Thus delete this assignment statement. Signed-off-by: Markus Elfring Signed-off-by: Steve French --- fs/smb/client/xattr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c index b88fa04f5792..029910d56c22 100644 --- a/fs/smb/client/xattr.c +++ b/fs/smb/client/xattr.c @@ -178,7 +178,6 @@ static int cifs_xattr_set(const struct xattr_handler *handler, memcpy(pacl, value, size); if (pTcon->ses->server->ops->set_acl) { int aclflags = 0; - rc = 0; switch (handler->flags) { case XATTR_CIFS_NTSD_FULL: From e487f13cc94fa80c71f217d7b176e48fbb5d6c46 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:21 +0200 Subject: [PATCH 076/305] smb: smbdirect: introduce smbdirect_mr_io.{kref,mutex} and SMBDIRECT_MR_DISABLED This will be used in the next commits in order to improve the client code. A broken connection can just disable the smbdirect_mr_io while keeping the memory arround for the caller. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index db22a1d0546b..361db7f9f623 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -437,13 +437,22 @@ enum smbdirect_mr_state { SMBDIRECT_MR_READY, SMBDIRECT_MR_REGISTERED, SMBDIRECT_MR_INVALIDATED, - SMBDIRECT_MR_ERROR + SMBDIRECT_MR_ERROR, + SMBDIRECT_MR_DISABLED }; struct smbdirect_mr_io { struct smbdirect_socket *socket; struct ib_cqe cqe; + /* + * We can have up to two references: + * 1. by the connection + * 2. by the registration + */ + struct kref kref; + struct mutex mutex; + struct list_head list; enum smbdirect_mr_state state; From abe5b71c391352127925bf951f3169d205c5caa7 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:22 +0200 Subject: [PATCH 077/305] smb: client: change smbd_deregister_mr() to return void No callers checks the return value and this makes further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 4 +--- fs/smb/client/smbdirect.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 316f398c70f4..a20aa2ddf57d 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2612,7 +2612,7 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) +void smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) { struct ib_send_wr *wr; struct smbdirect_socket *sc = smbdirect_mr->socket; @@ -2662,8 +2662,6 @@ int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) done: if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); - - return rc; } static bool smb_set_sge(struct smb_extract_to_rdma *rdma, diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index d67ac5ddaff4..577d37dbeb8a 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -60,7 +60,7 @@ int smbd_send(struct TCP_Server_Info *server, struct smbdirect_mr_io *smbd_register_mr( struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate); -int smbd_deregister_mr(struct smbdirect_mr_io *mr); +void smbd_deregister_mr(struct smbdirect_mr_io *mr); #else #define cifs_rdma_enabled(server) 0 From 19421ec198981f60373080af67e67b6a6fcf191e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:23 +0200 Subject: [PATCH 078/305] smb: client: let destroy_mr_list() call list_del(&mr->list) This makes the code clearer and will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index a20aa2ddf57d..b7be67dacd09 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2363,6 +2363,7 @@ static void destroy_mr_list(struct smbdirect_socket *sc) mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); + list_del(&mr->list); kfree(mr); } } From a8e128b293e2b08d4ecca7c63ed1cb5b97f30af9 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:24 +0200 Subject: [PATCH 079/305] smb: client: let destroy_mr_list() remove locked from the list This should make sure get_mr() can't see the removed entries. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index b7be67dacd09..b974ca4e0b2e 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2355,9 +2355,16 @@ static void smbd_mr_recovery_work(struct work_struct *work) static void destroy_mr_list(struct smbdirect_socket *sc) { struct smbdirect_mr_io *mr, *tmp; + LIST_HEAD(all_list); + unsigned long flags; disable_work_sync(&sc->mr_io.recovery_work); - list_for_each_entry_safe(mr, tmp, &sc->mr_io.all.list, list) { + + spin_lock_irqsave(&sc->mr_io.all.lock, flags); + list_splice_tail_init(&sc->mr_io.all.list, &all_list); + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); + + list_for_each_entry_safe(mr, tmp, &all_list, list) { if (mr->state == SMBDIRECT_MR_INVALIDATED) ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); From 9bebb8924b27f06e7072f0b18a5f78cef561c810 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:25 +0200 Subject: [PATCH 080/305] smb: client: improve logic in allocate_mr_list() - use 'mr' as variable name - use goto lables for easier cleanup - use destroy_mr_list() - style fixes - INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work) on success This will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 69 +++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index b974ca4e0b2e..658ca11cb26c 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2385,10 +2385,9 @@ static void destroy_mr_list(struct smbdirect_socket *sc) static int allocate_mr_list(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; - int i; - struct smbdirect_mr_io *smbdirect_mr, *tmp; - - INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); + struct smbdirect_mr_io *mr; + int ret; + u32 i; if (sp->responder_resources == 0) { log_rdma_mr(ERR, "responder_resources negotiated as 0\n"); @@ -2397,42 +2396,48 @@ static int allocate_mr_list(struct smbdirect_socket *sc) /* Allocate more MRs (2x) than hardware responder_resources */ for (i = 0; i < sp->responder_resources * 2; i++) { - smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); - if (!smbdirect_mr) - goto cleanup_entries; - smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type, - sp->max_frmr_depth); - if (IS_ERR(smbdirect_mr->mr)) { + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + ret = -ENOMEM; + goto kzalloc_mr_failed; + } + + mr->mr = ib_alloc_mr(sc->ib.pd, + sc->mr_io.type, + sp->max_frmr_depth); + if (IS_ERR(mr->mr)) { + ret = PTR_ERR(mr->mr); log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", sc->mr_io.type, sp->max_frmr_depth); - goto out; + goto ib_alloc_mr_failed; } - smbdirect_mr->sgt.sgl = kcalloc(sp->max_frmr_depth, - sizeof(struct scatterlist), - GFP_KERNEL); - if (!smbdirect_mr->sgt.sgl) { - log_rdma_mr(ERR, "failed to allocate sgl\n"); - ib_dereg_mr(smbdirect_mr->mr); - goto out; - } - smbdirect_mr->state = SMBDIRECT_MR_READY; - smbdirect_mr->socket = sc; - list_add_tail(&smbdirect_mr->list, &sc->mr_io.all.list); + mr->sgt.sgl = kcalloc(sp->max_frmr_depth, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!mr->sgt.sgl) { + ret = -ENOMEM; + log_rdma_mr(ERR, "failed to allocate sgl\n"); + goto kcalloc_sgl_failed; + } + mr->state = SMBDIRECT_MR_READY; + mr->socket = sc; + + list_add_tail(&mr->list, &sc->mr_io.all.list); atomic_inc(&sc->mr_io.ready.count); } + + INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); + return 0; -out: - kfree(smbdirect_mr); -cleanup_entries: - list_for_each_entry_safe(smbdirect_mr, tmp, &sc->mr_io.all.list, list) { - list_del(&smbdirect_mr->list); - ib_dereg_mr(smbdirect_mr->mr); - kfree(smbdirect_mr->sgt.sgl); - kfree(smbdirect_mr); - } - return -ENOMEM; +kcalloc_sgl_failed: + ib_dereg_mr(mr->mr); +ib_alloc_mr_failed: + kfree(mr); +kzalloc_mr_failed: + destroy_mr_list(sc); + return ret; } /* From c8478502960eb8fb9847b36a66380adf421cdc62 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:26 +0200 Subject: [PATCH 081/305] smb: client: improve logic in smbd_register_mr() - use 'mr' as variable name - style fixes This will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 52 +++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 658ca11cb26c..a863b6fff87a 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2517,9 +2517,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, { struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbdirect_mr_io *smbdirect_mr; + struct smbdirect_mr_io *mr; int rc, num_pages; - enum dma_data_direction dir; struct ib_reg_wr *reg_wr; num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); @@ -2530,49 +2529,45 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, return NULL; } - smbdirect_mr = get_mr(sc); - if (!smbdirect_mr) { + mr = get_mr(sc); + if (!mr) { log_rdma_mr(ERR, "get_mr returning NULL\n"); return NULL; } - dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - smbdirect_mr->dir = dir; - smbdirect_mr->need_invalidate = need_invalidate; - smbdirect_mr->sgt.nents = 0; - smbdirect_mr->sgt.orig_nents = 0; + mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + mr->need_invalidate = need_invalidate; + mr->sgt.nents = 0; + mr->sgt.orig_nents = 0; log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", num_pages, iov_iter_count(iter), sp->max_frmr_depth); - smbd_iter_to_mr(iter, &smbdirect_mr->sgt, sp->max_frmr_depth); + smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth); - rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, dir); + rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); if (!rc) { log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", - num_pages, dir, rc); + num_pages, mr->dir, rc); goto dma_map_error; } - rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, NULL, PAGE_SIZE); - if (rc != smbdirect_mr->sgt.nents) { + rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE); + if (rc != mr->sgt.nents) { log_rdma_mr(ERR, - "ib_map_mr_sg failed rc = %d nents = %x\n", - rc, smbdirect_mr->sgt.nents); + "ib_map_mr_sg failed rc = %d nents = %x\n", + rc, mr->sgt.nents); goto map_mr_error; } - ib_update_fast_reg_key(smbdirect_mr->mr, - ib_inc_rkey(smbdirect_mr->mr->rkey)); - reg_wr = &smbdirect_mr->wr; + ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); + reg_wr = &mr->wr; reg_wr->wr.opcode = IB_WR_REG_MR; - smbdirect_mr->cqe.done = register_mr_done; - reg_wr->wr.wr_cqe = &smbdirect_mr->cqe; + mr->cqe.done = register_mr_done; + reg_wr->wr.wr_cqe = &mr->cqe; reg_wr->wr.num_sge = 0; reg_wr->wr.send_flags = IB_SEND_SIGNALED; - reg_wr->mr = smbdirect_mr->mr; - reg_wr->key = smbdirect_mr->mr->rkey; + reg_wr->mr = mr->mr; + reg_wr->key = mr->mr->rkey; reg_wr->access = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; @@ -2584,18 +2579,17 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, */ rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); if (!rc) - return smbdirect_mr; + return mr; log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", rc, reg_wr->key); /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ map_mr_error: - ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, smbdirect_mr->dir); + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); dma_map_error: - smbdirect_mr->state = SMBDIRECT_MR_ERROR; + mr->state = SMBDIRECT_MR_ERROR; if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); From 56c817e31acedc8a9041b181a15755e5a7b55f2b Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:27 +0200 Subject: [PATCH 082/305] smb: client: improve logic in smbd_deregister_mr() - use 'mr' as variable name - style fixes This will make further changes easier. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index a863b6fff87a..af0642e94d7e 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2619,44 +2619,41 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -void smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) +void smbd_deregister_mr(struct smbdirect_mr_io *mr) { - struct ib_send_wr *wr; - struct smbdirect_socket *sc = smbdirect_mr->socket; - int rc = 0; + struct smbdirect_socket *sc = mr->socket; + + if (mr->need_invalidate) { + struct ib_send_wr *wr = &mr->inv_wr; + int rc; - if (smbdirect_mr->need_invalidate) { /* Need to finish local invalidation before returning */ - wr = &smbdirect_mr->inv_wr; wr->opcode = IB_WR_LOCAL_INV; - smbdirect_mr->cqe.done = local_inv_done; - wr->wr_cqe = &smbdirect_mr->cqe; + mr->cqe.done = local_inv_done; + wr->wr_cqe = &mr->cqe; wr->num_sge = 0; - wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey; + wr->ex.invalidate_rkey = mr->mr->rkey; wr->send_flags = IB_SEND_SIGNALED; - init_completion(&smbdirect_mr->invalidate_done); + init_completion(&mr->invalidate_done); rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); smbd_disconnect_rdma_connection(sc); goto done; } - wait_for_completion(&smbdirect_mr->invalidate_done); - smbdirect_mr->need_invalidate = false; + wait_for_completion(&mr->invalidate_done); + mr->need_invalidate = false; } else /* * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED * and defer to mr_recovery_work to recover the MR for next use */ - smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; + mr->state = SMBDIRECT_MR_INVALIDATED; - if (smbdirect_mr->state == SMBDIRECT_MR_INVALIDATED) { - ib_dma_unmap_sg( - sc->ib.dev, smbdirect_mr->sgt.sgl, - smbdirect_mr->sgt.nents, - smbdirect_mr->dir); - smbdirect_mr->state = SMBDIRECT_MR_READY; + if (mr->state == SMBDIRECT_MR_INVALIDATED) { + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + mr->state = SMBDIRECT_MR_READY; if (atomic_inc_return(&sc->mr_io.ready.count) == 1) wake_up(&sc->mr_io.ready.wait_queue); } else From b9c0becc2fceb53e4a958575344e92c0e4f812bb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:28 +0200 Subject: [PATCH 083/305] smb: client: call ib_dma_unmap_sg if mr->sgt.nents is not 0 This seems to be the more reliable way to check if we need to call ib_dma_unmap_sg(). Fixes: c7398583340a ("CIFS: SMBD: Implement RDMA memory registration") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index af0642e94d7e..21dcd326af3d 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2365,9 +2365,8 @@ static void destroy_mr_list(struct smbdirect_socket *sc) spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); list_for_each_entry_safe(mr, tmp, &all_list, list) { - if (mr->state == SMBDIRECT_MR_INVALIDATED) - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, - mr->sgt.nents, mr->dir); + if (mr->sgt.nents) + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); list_del(&mr->list); @@ -2589,6 +2588,7 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); dma_map_error: + mr->sgt.nents = 0; mr->state = SMBDIRECT_MR_ERROR; if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); @@ -2651,8 +2651,12 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) */ mr->state = SMBDIRECT_MR_INVALIDATED; - if (mr->state == SMBDIRECT_MR_INVALIDATED) { + if (mr->sgt.nents) { ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + mr->sgt.nents = 0; + } + + if (mr->state == SMBDIRECT_MR_INVALIDATED) { mr->state = SMBDIRECT_MR_READY; if (atomic_inc_return(&sc->mr_io.ready.count) == 1) wake_up(&sc->mr_io.ready.wait_queue); From 1ef0e16c3d7ca07432987840d8eef1a9ffb67dec Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:29 +0200 Subject: [PATCH 084/305] smb: client: let destroy_mr_list() call ib_dereg_mr() before ib_dma_unmap_sg() This is more consistent as we call ib_dma_unmap_sg() only when the memory is no longer registered. This is the same pattern as calling ib_dma_unmap_sg() after IB_WR_LOCAL_INV. Fixes: c7398583340a ("CIFS: SMBD: Implement RDMA memory registration") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 21dcd326af3d..c3330e43488f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2365,9 +2365,10 @@ static void destroy_mr_list(struct smbdirect_socket *sc) spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); list_for_each_entry_safe(mr, tmp, &all_list, list) { + if (mr->mr) + ib_dereg_mr(mr->mr); if (mr->sgt.nents) ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); list_del(&mr->list); kfree(mr); From c35dd838666d47de2848639234ec32e3ba22b49f Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Fri, 10 Oct 2025 23:17:07 +0530 Subject: [PATCH 085/305] KVM: arm64: Guard PMSCR_EL1 initialization with SPE presence check Commit efad60e46057 ("KVM: arm64: Initialize PMSCR_EL1 when in VHE") does not perform sufficient check before initializing PMSCR_EL1 to 0 when running in VHE mode. On some platforms, this causes the system to hang during boot, as EL3 has not delegated access to the Profiling Buffer to the Non-secure world, nor does it reinject an UNDEF on sysreg trap. To avoid this issue, restrict the PMSCR_EL1 initialization to CPUs that support Statistical Profiling Extension (FEAT_SPE) and have the Profiling Buffer accessible in Non-secure EL1. This is determined via a new helper `cpu_has_spe()` which checks both PMSVer and PMBIDR_EL1.P. This ensures the initialization only affects CPUs where SPE is implemented and usable, preventing boot failures on platforms where SPE is not properly configured. Fixes: efad60e46057 ("KVM: arm64: Initialize PMSCR_EL1 when in VHE") Signed-off-by: Mukesh Ojha Signed-off-by: Marc Zyngier --- arch/arm64/kvm/debug.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 3515a273eaa2..3ad6b7c6e4ba 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -15,6 +15,12 @@ #include #include +static int cpu_has_spe(u64 dfr0) +{ + return cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && + !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P); +} + /** * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value * @@ -77,13 +83,12 @@ void kvm_init_host_debug_data(void) *host_data_ptr(debug_brps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr0); *host_data_ptr(debug_wrps) = SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr0); + if (cpu_has_spe(dfr0)) + host_data_set_flag(HAS_SPE); + if (has_vhe()) return; - if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) && - !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P)) - host_data_set_flag(HAS_SPE); - /* Check if we have BRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT)) host_data_set_flag(HAS_BRBE); @@ -102,7 +107,7 @@ void kvm_init_host_debug_data(void) void kvm_debug_init_vhe(void) { /* Clear PMSCR_EL1.E{0,1}SPE which reset to UNKNOWN values. */ - if (SYS_FIELD_GET(ID_AA64DFR0_EL1, PMSVer, read_sysreg(id_aa64dfr0_el1))) + if (host_data_test_flag(HAS_SPE)) write_sysreg_el1(0, SYS_PMSCR); } From 2192d348c0aa0cc2e7249dc3709f21bfe0a0170c Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 8 Oct 2025 23:45:20 +0800 Subject: [PATCH 086/305] KVM: arm64: selftests: Allocate vcpus with correct size vcpus array contains pointers to struct kvm_vcpu {}. It is way overkill to allocate the array with (nr_cpus * sizeof(struct kvm_vcpu)). Fix the allocation by using the correct size. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index 87922a89b134..b134a304f0a6 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -331,7 +331,7 @@ static void setup_vm(void) { int i; - vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); From d5e6310a0d996493b1af9f3eeec418350523388b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 7 Oct 2025 12:52:55 -0700 Subject: [PATCH 087/305] KVM: arm64: selftests: Actually enable IRQs in vgic_lpi_stress vgic_lpi_stress rather hilariously leaves IRQs disabled for the duration of the test. While the ITS translation of MSIs happens regardless of this, for completeness the guest should actually handle the LPIs. Signed-off-by: Oliver Upton Reviewed-by: Zenghui Yu Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index b134a304f0a6..687d04463983 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -123,6 +123,7 @@ static void guest_setup_gic(void) static void guest_code(size_t nr_lpis) { guest_setup_gic(); + local_irq_enable(); GUEST_SYNC(0); From 3193287ddffbce29fd1a79d812f543c0fe4861d1 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 7 Oct 2025 16:07:13 +0000 Subject: [PATCH 088/305] KVM: arm64: gic-v3: Only set ICH_HCR traps for v2-on-v3 or v3 guests The ICH_HCR_EL2 traps are used when running on GICv3 hardware, or when running a GICv3-based guest using FEAT_GCIE_LEGACY on GICv5 hardware. When running a GICv2 guest on GICv3 hardware the traps are used to ensure that the guest never sees any part of GICv3 (only GICv2 is visible to the guest), and when running a GICv3 guest they are used to trap in specific scenarios. They are not applicable for a GICv2-native guest, and won't be applicable for a(n upcoming) GICv5 guest. The traps themselves are configured in the vGIC CPU IF state, which is stored as a union. Updating the wrong aperture of the union risks corrupting state, and therefore needs to be avoided at all costs. Bail early if we're not running a compatible guest (GICv2 on GICv3 hardware, GICv3 native, GICv3 on GICv5 hardware). Trap everything unconditionally if we're running a GICv2 guest on GICv3 hardware. Otherwise, conditionally set up GICv3-native trapping. Signed-off-by: Sascha Bischoff Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index f1c153106c56..6fbb4b099855 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -297,8 +297,11 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + if (!vgic_is_v3(vcpu->kvm)) + return; + /* Hide GICv3 sysreg if necessary */ - if (!kvm_has_gicv3(vcpu->kvm)) { + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | ICH_HCR_EL2_TC); return; From 164ecbf73c3ea61455e07eefdad8050a7b569558 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 7 Oct 2025 15:48:54 +0000 Subject: [PATCH 089/305] Documentation: KVM: Update GICv3 docs for GICv5 hosts GICv5 hosts optionally include FEAT_GCIE_LEGACY, which allows them to execute GICv3-based VMs on GICv5 hardware. Update the GICv3 documentation to reflect this now that GICv3 guests are supports on compatible GICv5 hosts. Signed-off-by: Sascha Bischoff Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/devices/arm-vgic-v3.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst index ff02102f7141..5395ee66fc32 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst @@ -13,7 +13,8 @@ will act as the VM interrupt controller, requiring emulated user-space devices to inject interrupts to the VGIC instead of directly to CPUs. It is not possible to create both a GICv3 and GICv2 on the same VM. -Creating a guest GICv3 device requires a host GICv3 as well. +Creating a guest GICv3 device requires a host GICv3 host, or a GICv5 host with +support for FEAT_GCIE_LEGACY. Groups: From 4cab5c857d1f92b4b322e30349fdc5e2e38e7a2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:45 +0100 Subject: [PATCH 090/305] KVM: arm64: Hide CNTHV_*_EL2 from userspace for nVHE guests Although we correctly UNDEF any CNTHV_*_EL2 access from the guest when E2H==0, we still expose these registers to userspace, which is a bad idea. Drop the ad-hoc UNDEF injection and switch to a .visibility() callback which will also hide the register from userspace. Fixes: 0e45981028550 ("KVM: arm64: timer: Don't adjust the EL2 virtual timer offset") Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ee8a7033c85b..9f2f4e0b042e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1594,16 +1594,6 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } -static bool access_hv_timer(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (!vcpu_el2_e2h_is_set(vcpu)) - return undef_access(vcpu, p, r); - - return access_arch_timer(vcpu, p, r); -} - static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -2831,6 +2821,16 @@ static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu, return __el2_visibility(vcpu, rd, s1pie_visibility); } +static unsigned int cnthv_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + if (vcpu_has_nv(vcpu) && + !vcpu_has_feature(vcpu, KVM_ARM_VCPU_HAS_EL2_E2H0)) + return 0; + + return REG_HIDDEN; +} + static bool access_mdcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -3691,9 +3691,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), - { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer }, - EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0), - EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0), + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, + EL2_REG_FILTERED(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), + EL2_REG_FILTERED(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, From aa68975c973ed3b0bd4ff513113495588afb855c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:46 +0100 Subject: [PATCH 091/305] KVM: arm64: Introduce timer_context_to_vcpu() helper We currently have a vcpu pointer nested into each timer context. As we are about to remove this pointer, introduce a helper (aptly named timer_context_to_vcpu()) that returns this pointer, at least until we repaint the data structure. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 25 +++++++++++++------------ include/kvm/arm_arch_timer.h | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index dbd74e4885e2..e5a25e743f5b 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -66,7 +66,7 @@ static int nr_timers(struct kvm_vcpu *vcpu) u32 timer_get_ctl(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -85,7 +85,7 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt) u64 timer_get_cval(struct arch_timer_context *ctxt) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -104,7 +104,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt) static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -126,7 +126,7 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) { - struct kvm_vcpu *vcpu = ctxt->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: @@ -343,7 +343,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) u64 ns; ctx = container_of(hrt, struct arch_timer_context, hrtimer); - vcpu = ctx->vcpu; + vcpu = timer_context_to_vcpu(ctx); trace_kvm_timer_hrtimer_expire(ctx); @@ -436,8 +436,9 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) * * But hey, it's fast, right? */ - if (is_hyp_ctxt(ctx->vcpu) && - (ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) { + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); + if (is_hyp_ctxt(vcpu) && + (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) { unsigned long val = timer_get_ctl(ctx); __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); timer_set_ctl(ctx, val); @@ -470,7 +471,7 @@ static void timer_emulate(struct arch_timer_context *ctx) trace_kvm_timer_emulate(ctx, should_fire); if (should_fire != ctx->irq.level) - kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); + kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx); kvm_timer_update_status(ctx, should_fire); @@ -498,7 +499,7 @@ static void set_cntpoff(u64 cntpoff) static void timer_save_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -609,7 +610,7 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) static void timer_restore_state(struct arch_timer_context *ctx) { - struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); enum kvm_arch_timers index = arch_timer_ctx_index(ctx); unsigned long flags; @@ -668,7 +669,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { - struct kvm_vcpu *vcpu = ctx->vcpu; + struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); bool phys_active = false; /* @@ -677,7 +678,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) * this point and the register restoration, we'll take the * interrupt anyway. */ - kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); + kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx); if (irqchip_in_kernel(vcpu->kvm)) phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 681cf0c8b9df..d188c716d03c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -128,7 +128,7 @@ void kvm_timer_init_vhe(void); #define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) - +#define timer_context_to_vcpu(ctx) ((ctx)->vcpu) #define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) From 8625a670afb05f1e1d69d50a74dbcc9d1b855efe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:47 +0100 Subject: [PATCH 092/305] KVM: arm64: Replace timer context vcpu pointer with timer_id Having to follow a pointer to a vcpu is pretty dumb, when the timers are are a fixed offset in the vcpu structure itself. Trade the vcpu pointer for a timer_id, which can then be used to compute the vcpu address as needed. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 4 ++-- include/kvm/arm_arch_timer.h | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e5a25e743f5b..c832c293676a 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -149,7 +149,7 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) { if (!ctxt->offset.vm_offset) { - WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); + WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt)); return; } @@ -1064,7 +1064,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); struct kvm *kvm = vcpu->kvm; - ctxt->vcpu = vcpu; + ctxt->timer_id = timerid; if (timerid == TIMER_VTIMER) ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d188c716d03c..d8e400cb2bff 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,8 +51,6 @@ struct arch_timer_vm_data { }; struct arch_timer_context { - struct kvm_vcpu *vcpu; - /* Emulated Timer (may be unused) */ struct hrtimer hrtimer; u64 ns_frac; @@ -71,6 +69,9 @@ struct arch_timer_context { bool level; } irq; + /* Who am I? */ + enum kvm_arch_timers timer_id; + /* Duplicated state from arch_timer.c for convenience */ u32 host_timer_irq; }; @@ -127,9 +128,9 @@ void kvm_timer_init_vhe(void); #define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER]) #define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) -#define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) -#define timer_context_to_vcpu(ctx) ((ctx)->vcpu) -#define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) +#define arch_timer_ctx_index(ctx) ((ctx)->timer_id) +#define timer_context_to_vcpu(ctx) container_of((ctx), struct kvm_vcpu, arch.timer_cpu.timers[(ctx)->timer_id]) +#define timer_vm_data(ctx) (&(timer_context_to_vcpu(ctx)->kvm->arch.timer_data)) #define timer_irq(ctx) (timer_vm_data(ctx)->ppi[arch_timer_ctx_index(ctx)]) u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, From a92d552266890f83126fdef4f777a985cc1302bd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:48 +0100 Subject: [PATCH 093/305] KVM: arm64: Make timer_set_offset() generally accessible Move the timer_set_offset() helper to arm_arch_timer.h, so that it is next to timer_get_offset(), and accessible by the rest of KVM. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 10 ---------- include/kvm/arm_arch_timer.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index c832c293676a..27662a3a3043 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -146,16 +146,6 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) } } -static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) -{ - if (!ctxt->offset.vm_offset) { - WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt)); - return; - } - - WRITE_ONCE(*ctxt->offset.vm_offset, offset); -} - u64 kvm_phys_timer_read(void) { return timecounter->cc->read(timecounter->cc); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index d8e400cb2bff..5f7f2ed8817c 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -179,4 +179,14 @@ static inline u64 timer_get_offset(struct arch_timer_context *ctxt) return offset; } +static inline void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) +{ + if (!ctxt->offset.vm_offset) { + WARN(offset, "timer %d\n", arch_timer_ctx_index(ctxt)); + return; + } + + WRITE_ONCE(*ctxt->offset.vm_offset, offset); +} + #endif From 77a0c42eaf03c66936429d190bb2ea1a214bd528 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:49 +0100 Subject: [PATCH 094/305] KVM: arm64: Add timer UAPI workaround to sysreg infrastructure Amongst the numerous bugs that plague the KVM/arm64 UAPI, one of the most annoying thing is that the userspace view of the virtual timer has its CVAL and CNT encodings swapped. In order to reduce the amount of code that has to know about this, start by adding handling for this bug in the sys_reg code. Nothing is making use of it yet, as the code responsible for userspace interaction is catching the accesses early. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 33 ++++++++++++++++++++++++++++++--- arch/arm64/kvm/sys_regs.h | 6 ++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9f2f4e0b042e..8e6f50f54b4b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -5231,15 +5231,28 @@ static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) } } +static u64 kvm_one_reg_to_id(const struct kvm_one_reg *reg) +{ + switch(reg->id) { + case KVM_REG_ARM_TIMER_CVAL: + return TO_ARM64_SYS_REG(CNTV_CVAL_EL0); + case KVM_REG_ARM_TIMER_CNT: + return TO_ARM64_SYS_REG(CNTVCT_EL0); + default: + return reg->id; + } +} + int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5272,13 +5285,14 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, { u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 id = kvm_one_reg_to_id(reg); u64 val; int ret; if (get_user(val, uaddr)) return -EFAULT; - r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + r = id_to_sys_reg_desc(vcpu, id, table, num); if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; @@ -5338,10 +5352,23 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg) static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) { + u64 idx; + if (!*uind) return true; - if (put_user(sys_reg_to_index(reg), *uind)) + switch (reg_to_encoding(reg)) { + case SYS_CNTV_CVAL_EL0: + idx = KVM_REG_ARM_TIMER_CVAL; + break; + case SYS_CNTVCT_EL0: + idx = KVM_REG_ARM_TIMER_CNT; + break; + default: + idx = sys_reg_to_index(reg); + } + + if (put_user(idx, *uind)) return false; (*uind)++; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 317abc490368..b3f904472fac 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -257,4 +257,10 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu); (val); \ }) +#define TO_ARM64_SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ From 09424d5d7d4e8b427ee4a737fb7765103789e08a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:50 +0100 Subject: [PATCH 095/305] KVM: arm64: Move CNT*_CTL_EL0 userspace accessors to generic infrastructure Remove the handling of CNT*_CTL_EL0 from guest.c, and move it to sys_regs.c, using a new TIMER_REG() definition to encapsulate it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 4 ---- arch/arm64/kvm/sys_regs.c | 36 +++++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 16ba5e9ac86c..dea648706fd5 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -592,10 +592,8 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) } static const u64 timer_reg_list[] = { - KVM_REG_ARM_TIMER_CTL, KVM_REG_ARM_TIMER_CNT, KVM_REG_ARM_TIMER_CVAL, - KVM_REG_ARM_PTIMER_CTL, KVM_REG_ARM_PTIMER_CNT, KVM_REG_ARM_PTIMER_CVAL, }; @@ -605,10 +603,8 @@ static const u64 timer_reg_list[] = { static bool is_timer_reg(u64 index) { switch (index) { - case KVM_REG_ARM_TIMER_CTL: case KVM_REG_ARM_TIMER_CNT: case KVM_REG_ARM_TIMER_CVAL: - case KVM_REG_ARM_PTIMER_CTL: case KVM_REG_ARM_PTIMER_CNT: case KVM_REG_ARM_PTIMER_CVAL: return true; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8e6f50f54b4b..d97aacf4c1dc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1594,6 +1594,23 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static int arch_timer_set_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTV_CTL_EL0: + case SYS_CNTP_CTL_EL0: + case SYS_CNTHV_CTL_EL2: + case SYS_CNTHP_CTL_EL2: + val &= ~ARCH_TIMER_CTRL_IT_STAT; + break; + } + + __vcpu_assign_sys_reg(vcpu, rd->reg, val); + return 0; +} + static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -2496,15 +2513,20 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, "trap of EL2 register redirected to EL1"); } -#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \ +#define SYS_REG_USER_FILTER(name, acc, rst, v, gu, su, filter) { \ SYS_DESC(SYS_##name), \ .access = acc, \ .reset = rst, \ .reg = name, \ + .get_user = gu, \ + .set_user = su, \ .visibility = filter, \ .val = v, \ } +#define EL2_REG_FILTERED(name, acc, rst, v, filter) \ + SYS_REG_USER_FILTER(name, acc, rst, v, NULL, NULL, filter) + #define EL2_REG(name, acc, rst, v) \ EL2_REG_FILTERED(name, acc, rst, v, el2_visibility) @@ -2515,6 +2537,10 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, EL2_REG_VNCR_FILT(name, hidden_visibility) #define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v) +#define TIMER_REG(name, vis) \ + SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \ + NULL, arch_timer_set_user, vis) + /* * Since reset() callback and field val are not used for idregs, they will be * used for specific purposes for idregs. @@ -3485,11 +3511,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, + TIMER_REG(CNTP_CTL_EL0, NULL), { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTV_CTL_EL0), access_arch_timer }, + TIMER_REG(CNTV_CTL_EL0, NULL), { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer }, /* PMEVCNTRn_EL0 */ @@ -3688,11 +3714,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0), EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, - EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), + TIMER_REG(CNTHP_CTL_EL2, el2_visibility), EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, - EL2_REG_FILTERED(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), + TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility), EL2_REG_FILTERED(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, From 8af198980eff2ed2a5df3d2ee39f8c9d61f40559 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:51 +0100 Subject: [PATCH 096/305] KVM: arm64: Move CNT*_CVAL_EL0 userspace accessors to generic infrastructure As for the control registers, move the comparator registers to the common infrastructure. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 4 ---- arch/arm64/kvm/sys_regs.c | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dea648706fd5..c23ec9be4ce2 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -593,9 +593,7 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) static const u64 timer_reg_list[] = { KVM_REG_ARM_TIMER_CNT, - KVM_REG_ARM_TIMER_CVAL, KVM_REG_ARM_PTIMER_CNT, - KVM_REG_ARM_PTIMER_CVAL, }; #define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) @@ -604,9 +602,7 @@ static bool is_timer_reg(u64 index) { switch (index) { case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_TIMER_CVAL: case KVM_REG_ARM_PTIMER_CNT: - case KVM_REG_ARM_PTIMER_CVAL: return true; } return false; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d97aacf4c1dc..68e88d5c0dfb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3512,11 +3512,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, TIMER_REG(CNTP_CTL_EL0, NULL), - { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTP_CVAL_EL0, NULL), { SYS_DESC(SYS_CNTV_TVAL_EL0), access_arch_timer }, TIMER_REG(CNTV_CTL_EL0, NULL), - { SYS_DESC(SYS_CNTV_CVAL_EL0), access_arch_timer }, + TIMER_REG(CNTV_CVAL_EL0, NULL), /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -3715,11 +3715,11 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_CNTHP_TVAL_EL2), access_arch_timer }, TIMER_REG(CNTHP_CTL_EL2, el2_visibility), - EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), + TIMER_REG(CNTHP_CVAL_EL2, el2_visibility), { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer, .visibility = cnthv_visibility }, TIMER_REG(CNTHV_CTL_EL2, cnthv_visibility), - EL2_REG_FILTERED(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0, cnthv_visibility), + TIMER_REG(CNTHV_CVAL_EL2, cnthv_visibility), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, From c3be3a48fb18f9d243fac452e0be41469bb246b4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:52 +0100 Subject: [PATCH 097/305] KVM: arm64: Move CNT*CT_EL0 userspace accessors to generic infrastructure Moving the counter registers is a bit more involved than for the control and comparator (there is no shadow data for the counter), but still pretty manageable. Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- arch/arm64/kvm/guest.c | 7 ------- arch/arm64/kvm/sys_regs.c | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index c23ec9be4ce2..138e5e2dc10c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -592,19 +592,12 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) } static const u64 timer_reg_list[] = { - KVM_REG_ARM_TIMER_CNT, - KVM_REG_ARM_PTIMER_CNT, }; #define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) static bool is_timer_reg(u64 index) { - switch (index) { - case KVM_REG_ARM_TIMER_CNT: - case KVM_REG_ARM_PTIMER_CNT: - return true; - } return false; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 68e88d5c0dfb..e67eb39ddc11 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1605,12 +1605,38 @@ static int arch_timer_set_user(struct kvm_vcpu *vcpu, case SYS_CNTHP_CTL_EL2: val &= ~ARCH_TIMER_CTRL_IT_STAT; break; + case SYS_CNTVCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read() - val); + return 0; + case SYS_CNTPCT_EL0: + if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) + timer_set_offset(vcpu_ptimer(vcpu), kvm_phys_timer_read() - val); + return 0; } __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } +static int arch_timer_get_user(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 *val) +{ + switch (reg_to_encoding(rd)) { + case SYS_CNTVCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_vtimer(vcpu)); + break; + case SYS_CNTPCT_EL0: + *val = kvm_phys_timer_read() - timer_get_offset(vcpu_ptimer(vcpu)); + break; + default: + *val = __vcpu_sys_reg(vcpu, rd->reg); + } + + return 0; +} + static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -2539,7 +2565,7 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, #define TIMER_REG(name, vis) \ SYS_REG_USER_FILTER(name, access_arch_timer, reset_val, 0, \ - NULL, arch_timer_set_user, vis) + arch_timer_get_user, arch_timer_set_user, vis) /* * Since reset() callback and field val are not used for idregs, they will be @@ -3506,8 +3532,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVTYPER1_EL0(14), AMU_AMEVTYPER1_EL0(15), - { SYS_DESC(SYS_CNTPCT_EL0), access_arch_timer }, - { SYS_DESC(SYS_CNTVCT_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTPCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, + { SYS_DESC(SYS_CNTVCT_EL0), .access = access_arch_timer, + .get_user = arch_timer_get_user, .set_user = arch_timer_set_user }, { SYS_DESC(SYS_CNTPCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTVCTSS_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, From 892f7c38ba3b7de19b3dffb8e148d5fbf1228f20 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:53 +0100 Subject: [PATCH 098/305] KVM: arm64: Fix WFxT handling of nested virt The spec for WFxT indicates that the parameter to the WFxT instruction is relative to the reading of CNTVCT_EL0. This means that the implementation needs to take the execution context into account, as CNTVOFF_EL2 does not always affect readings of CNTVCT_EL0 (such as when HCR_EL2.E2H is 1 and that we're in host context). This also rids us of the last instance of KVM_REG_ARM_TIMER_CNT outside of the userspace interaction code. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/handle_exit.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index bca8c80e11da..cc7d5d1709cb 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -147,7 +147,12 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) if (esr & ESR_ELx_WFx_ISS_RV) { u64 val, now; - now = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_TIMER_CNT); + now = kvm_phys_timer_read(); + if (is_hyp_ctxt(vcpu) && vcpu_el2_e2h_is_set(vcpu)) + now -= timer_get_offset(vcpu_hvtimer(vcpu)); + else + now -= timer_get_offset(vcpu_vtimer(vcpu)); + val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); if (now >= val) From 386aac77da112651a5cdadc4a6b29181592f5aa0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:54 +0100 Subject: [PATCH 099/305] KVM: arm64: Kill leftovers of ad-hoc timer userspace access Now that the whole timer infrastructure is handled as system register accesses, get rid of the now unused ad-hoc infrastructure. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 68 ------------------------------------ arch/arm64/kvm/guest.c | 55 ----------------------------- include/kvm/arm_arch_timer.h | 3 -- 3 files changed, 126 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 27662a3a3043..3f675875abea 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1112,49 +1112,6 @@ void kvm_timer_cpu_down(void) disable_percpu_irq(host_ptimer_irq); } -int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - struct arch_timer_context *timer; - - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_TIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_vtimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_TIMER_CVAL: - timer = vcpu_vtimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - case KVM_REG_ARM_PTIMER_CTL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); - break; - case KVM_REG_ARM_PTIMER_CNT: - if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, - &vcpu->kvm->arch.flags)) { - timer = vcpu_ptimer(vcpu); - timer_set_offset(timer, kvm_phys_timer_read() - value); - } - break; - case KVM_REG_ARM_PTIMER_CVAL: - timer = vcpu_ptimer(vcpu); - kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); - break; - - default: - return -1; - } - - return 0; -} - static u64 read_timer_ctl(struct arch_timer_context *timer) { /* @@ -1171,31 +1128,6 @@ static u64 read_timer_ctl(struct arch_timer_context *timer) return ctl; } -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - switch (regid) { - case KVM_REG_ARM_TIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_TIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_TIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_vtimer(vcpu), TIMER_REG_CVAL); - case KVM_REG_ARM_PTIMER_CTL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CTL); - case KVM_REG_ARM_PTIMER_CNT: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CNT); - case KVM_REG_ARM_PTIMER_CVAL: - return kvm_arm_timer_read(vcpu, - vcpu_ptimer(vcpu), TIMER_REG_CVAL); - } - return (u64)-1; -} - static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, struct arch_timer_context *timer, enum kvm_arch_timer_regs treg) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 138e5e2dc10c..1c87699fd886 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -591,49 +591,6 @@ static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) return copy_core_reg_indices(vcpu, NULL); } -static const u64 timer_reg_list[] = { -}; - -#define NUM_TIMER_REGS ARRAY_SIZE(timer_reg_list) - -static bool is_timer_reg(u64 index) -{ - return false; -} - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - for (int i = 0; i < NUM_TIMER_REGS; i++) { - if (put_user(timer_reg_list[i], uindices)) - return -EFAULT; - uindices++; - } - - return 0; -} - -static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - int ret; - - ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); - if (ret != 0) - return -EFAULT; - - return kvm_arm_timer_set_reg(vcpu, reg->id, val); -} - -static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) -{ - void __user *uaddr = (void __user *)(long)reg->addr; - u64 val; - - val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; -} - static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) { const unsigned int slices = vcpu_sve_slices(vcpu); @@ -709,7 +666,6 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) res += num_sve_regs(vcpu); res += kvm_arm_num_sys_reg_descs(vcpu); res += kvm_arm_get_fw_num_regs(vcpu); - res += NUM_TIMER_REGS; return res; } @@ -740,11 +696,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return ret; uindices += kvm_arm_get_fw_num_regs(vcpu); - ret = copy_timer_indices(vcpu, uindices); - if (ret < 0) - return ret; - uindices += NUM_TIMER_REGS; - return kvm_arm_copy_sys_reg_indices(vcpu, uindices); } @@ -762,9 +713,6 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return get_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_get_reg(vcpu, reg); } @@ -782,9 +730,6 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); } - if (is_timer_reg(reg->id)) - return set_timer_reg(vcpu, reg); - return kvm_arm_sys_reg_set_reg(vcpu, reg); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 5f7f2ed8817c..7310841f4512 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -107,9 +107,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); void kvm_timer_init_vm(struct kvm *kvm); -u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); -int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); - int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); From 6418330c8478735f625398bc4e96d3ac6ce1e055 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:55 +0100 Subject: [PATCH 100/305] KVM: arm64: selftests: Make dependencies on VHE-specific registers explicit The hyp virtual timer registers only exist when VHE is present, Similarly, VNCR_EL2 only exists when NV2 is present. Make these dependencies explicit. Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 011fad95dd02..0a4cfb368512 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -65,6 +65,9 @@ static struct feature_id_reg feat_id_regs[] = { REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), + REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), }; bool filter_reg(__u64 reg) From 4da5a9af78b74fb771a4d25dc794296d10e170b1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:56 +0100 Subject: [PATCH 101/305] KVM: arm64: selftests: Add an E2H=0-specific configuration to get_reg_list Add yet another configuration, this time dealing E2H=0. Signed-off-by: Marc Zyngier --- .../selftests/kvm/arm64/get-reg-list.c | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 0a4cfb368512..7a238755f072 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -758,6 +758,10 @@ static __u64 el2_regs[] = { SYS_REG(VSESR_EL2), }; +static __u64 el2_e2h0_regs[] = { + /* Empty */ +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -792,6 +796,15 @@ static __u64 el2_regs[] = { .regs = el2_regs, \ .regs_n = ARRAY_SIZE(el2_regs), \ } +#define EL2_E2H0_SUBLIST \ + EL2_SUBLIST, \ + { \ + .name = "EL2 E2H0", \ + .capability = KVM_CAP_ARM_EL2_E2H0, \ + .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \ + .regs = el2_e2h0_regs, \ + .regs_n = ARRAY_SIZE(el2_e2h0_regs), \ + } static struct vcpu_reg_list vregs_config = { .sublists = { @@ -900,6 +913,65 @@ static struct vcpu_reg_list el2_pauth_pmu_config = { }, }; +static struct vcpu_reg_list el2_e2h0_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, @@ -914,5 +986,12 @@ struct vcpu_reg_list *vcpu_configs[] = { &el2_sve_pmu_config, &el2_pauth_config, &el2_pauth_pmu_config, + + &el2_e2h0_vregs_config, + &el2_e2h0_vregs_pmu_config, + &el2_e2h0_sve_config, + &el2_e2h0_sve_pmu_config, + &el2_e2h0_pauth_config, + &el2_e2h0_pauth_pmu_config, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); From 5c7cf1e44e94a5408b1b5277810502b0f82b77fe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Sep 2025 17:04:57 +0100 Subject: [PATCH 102/305] KVM: arm64: selftests: Fix misleading comment about virtual timer encoding The userspace-visible encoding for CNTV_CVAL_EL0 and CNTVCNT_EL0 have been swapped for as long as usersapce has had access to the registers. This is documented in arch/arm64/include/uapi/asm/kvm.h. Despite that, the get_reg_list test has unhelpful comments indicating the wrong register for the encoding. Replace this with definitions exposed in the include file, and a comment explaining again the brokenness. Signed-off-by: Marc Zyngier --- .../testing/selftests/kvm/arm64/get-reg-list.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 7a238755f072..c9b84eeaab6b 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -348,9 +348,20 @@ static __u64 base_regs[] = { KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */ - ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 0, 2), + + /* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ + KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0), + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_TIMER_CNT, + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ From fb10ddf35c1cc3b2888a944c0a3b1aa3baea585e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 24 Sep 2025 16:51:49 -0700 Subject: [PATCH 103/305] KVM: arm64: Compute per-vCPU FGTs at vcpu_load() To date KVM has used the fine-grained traps for the sake of UNDEF enforcement (so-called FGUs), meaning the constituent parts could be computed on a per-VM basis and folded into the effective value when programmed. Prepare for traps changing based on the vCPU context by computing the whole mess of them at vcpu_load(). Aggressively inline all the helpers to preserve the build-time checks that were there before. Signed-off-by: Oliver Upton Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 50 ++++++++ arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/config.c | 82 +++++++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 148 +++--------------------- arch/arm64/kvm/hyp/nvhe/pkvm.c | 1 + 5 files changed, 151 insertions(+), 131 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index b763293281c8..64302c438355 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -816,6 +816,11 @@ struct kvm_vcpu_arch { u64 hcrx_el2; u64 mdcr_el2; + struct { + u64 r; + u64 w; + } fgt[__NR_FGT_GROUP_IDS__]; + /* Exception Information */ struct kvm_vcpu_fault_info fault; @@ -1600,6 +1605,51 @@ static inline bool kvm_arch_has_irq_bypass(void) void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt); void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1); void check_feature_map(void); +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu); +static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + case HFGWTR_EL2: + return HFGRTR_GROUP; + case HFGITR_EL2: + return HFGITR_GROUP; + case HDFGRTR_EL2: + case HDFGWTR_EL2: + return HDFGRTR_GROUP; + case HAFGRTR_EL2: + return HAFGRTR_GROUP; + case HFGRTR2_EL2: + case HFGWTR2_EL2: + return HFGRTR2_GROUP; + case HFGITR2_EL2: + return HFGITR2_GROUP; + case HDFGRTR2_EL2: + case HDFGWTR2_EL2: + return HDFGRTR2_GROUP; + default: + BUILD_BUG_ON(1); + } +} + +#define vcpu_fgt(vcpu, reg) \ + ({ \ + enum fgt_group_id id = __fgt_reg_to_group_id(reg); \ + u64 *p; \ + switch (reg) { \ + case HFGWTR_EL2: \ + case HDFGWTR_EL2: \ + case HFGWTR2_EL2: \ + case HDFGWTR2_EL2: \ + p = &(vcpu)->arch.fgt[id].w; \ + break; \ + default: \ + p = &(vcpu)->arch.fgt[id].r; \ + break; \ + } \ + \ + p; \ + }) #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f01cacb669cf..870953b4a8a7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -642,6 +642,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.hcr_el2 |= HCR_TWI; vcpu_set_pauth_traps(vcpu); + kvm_vcpu_load_fgt(vcpu); if (is_protected_kvm_enabled()) { kvm_call_hyp_nvhe(__pkvm_vcpu_load, diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index fbd8944a3dea..b1cf7660efe1 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -5,6 +5,8 @@ */ #include +#include +#include #include /* @@ -1428,3 +1430,83 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r break; } } + +static __always_inline struct fgt_masks *__fgt_reg_to_masks(enum vcpu_sysreg reg) +{ + switch (reg) { + case HFGRTR_EL2: + return &hfgrtr_masks; + case HFGWTR_EL2: + return &hfgwtr_masks; + case HFGITR_EL2: + return &hfgitr_masks; + case HDFGRTR_EL2: + return &hdfgrtr_masks; + case HDFGWTR_EL2: + return &hdfgwtr_masks; + case HAFGRTR_EL2: + return &hafgrtr_masks; + case HFGRTR2_EL2: + return &hfgrtr2_masks; + case HFGWTR2_EL2: + return &hfgwtr2_masks; + case HFGITR2_EL2: + return &hfgitr2_masks; + case HDFGRTR2_EL2: + return &hdfgrtr2_masks; + case HDFGWTR2_EL2: + return &hdfgwtr2_masks; + default: + BUILD_BUG_ON(1); + } +} + +static __always_inline void __compute_fgt(struct kvm_vcpu *vcpu, enum vcpu_sysreg reg) +{ + u64 fgu = vcpu->kvm->arch.fgu[__fgt_reg_to_group_id(reg)]; + struct fgt_masks *m = __fgt_reg_to_masks(reg); + u64 clear = 0, set = 0, val = m->nmask; + + set |= fgu & m->mask; + clear |= fgu & m->nmask; + + if (is_nested_ctxt(vcpu)) { + u64 nested = __vcpu_sys_reg(vcpu, reg); + set |= nested & m->mask; + clear |= ~nested & m->nmask; + } + + val |= set; + val &= ~clear; + *vcpu_fgt(vcpu, reg) = val; +} + +static void __compute_hfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HFGWTR_EL2); + + if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) + *vcpu_fgt(vcpu, HFGWTR_EL2) |= HFGWTR_EL2_TCR_EL1; +} + +void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_final_cap(ARM64_HAS_FGT)) + return; + + __compute_fgt(vcpu, HFGRTR_EL2); + __compute_hfgwtr(vcpu); + __compute_fgt(vcpu, HFGITR_EL2); + __compute_fgt(vcpu, HDFGRTR_EL2); + __compute_fgt(vcpu, HDFGWTR_EL2); + __compute_fgt(vcpu, HAFGRTR_EL2); + + if (!cpus_have_final_cap(ARM64_HAS_FGT2)) + return; + + __compute_fgt(vcpu, HFGRTR2_EL2); + __compute_fgt(vcpu, HFGWTR2_EL2); + __compute_fgt(vcpu, HFGITR2_EL2); + __compute_fgt(vcpu, HDFGRTR2_EL2); + __compute_fgt(vcpu, HDFGWTR2_EL2); +} diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b6682202edf3..c5d5e5b86eaf 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -195,123 +195,6 @@ static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) __deactivate_cptr_traps_nvhe(vcpu); } -#define reg_to_fgt_masks(reg) \ - ({ \ - struct fgt_masks *m; \ - switch(reg) { \ - case HFGRTR_EL2: \ - m = &hfgrtr_masks; \ - break; \ - case HFGWTR_EL2: \ - m = &hfgwtr_masks; \ - break; \ - case HFGITR_EL2: \ - m = &hfgitr_masks; \ - break; \ - case HDFGRTR_EL2: \ - m = &hdfgrtr_masks; \ - break; \ - case HDFGWTR_EL2: \ - m = &hdfgwtr_masks; \ - break; \ - case HAFGRTR_EL2: \ - m = &hafgrtr_masks; \ - break; \ - case HFGRTR2_EL2: \ - m = &hfgrtr2_masks; \ - break; \ - case HFGWTR2_EL2: \ - m = &hfgwtr2_masks; \ - break; \ - case HFGITR2_EL2: \ - m = &hfgitr2_masks; \ - break; \ - case HDFGRTR2_EL2: \ - m = &hdfgrtr2_masks; \ - break; \ - case HDFGWTR2_EL2: \ - m = &hdfgwtr2_masks; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - m; \ - }) - -#define compute_clr_set(vcpu, reg, clr, set) \ - do { \ - u64 hfg = __vcpu_sys_reg(vcpu, reg); \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= ~hfg & m->nmask; \ - } while(0) - -#define reg_to_fgt_group_id(reg) \ - ({ \ - enum fgt_group_id id; \ - switch(reg) { \ - case HFGRTR_EL2: \ - case HFGWTR_EL2: \ - id = HFGRTR_GROUP; \ - break; \ - case HFGITR_EL2: \ - id = HFGITR_GROUP; \ - break; \ - case HDFGRTR_EL2: \ - case HDFGWTR_EL2: \ - id = HDFGRTR_GROUP; \ - break; \ - case HAFGRTR_EL2: \ - id = HAFGRTR_GROUP; \ - break; \ - case HFGRTR2_EL2: \ - case HFGWTR2_EL2: \ - id = HFGRTR2_GROUP; \ - break; \ - case HFGITR2_EL2: \ - id = HFGITR2_GROUP; \ - break; \ - case HDFGRTR2_EL2: \ - case HDFGWTR2_EL2: \ - id = HDFGRTR2_GROUP; \ - break; \ - default: \ - BUILD_BUG_ON(1); \ - } \ - \ - id; \ - }) - -#define compute_undef_clr_set(vcpu, kvm, reg, clr, set) \ - do { \ - u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)]; \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - set |= hfg & m->mask; \ - clr |= hfg & m->nmask; \ - } while(0) - -#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set) \ - do { \ - struct fgt_masks *m = reg_to_fgt_masks(reg); \ - u64 c = clr, s = set; \ - u64 val; \ - \ - ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ - if (is_nested_ctxt(vcpu)) \ - compute_clr_set(vcpu, reg, c, s); \ - \ - compute_undef_clr_set(vcpu, kvm, reg, c, s); \ - \ - val = m->nmask; \ - val |= s; \ - val &= ~c; \ - write_sysreg_s(val, SYS_ ## reg); \ - } while(0) - -#define update_fgt_traps(hctxt, vcpu, kvm, reg) \ - update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0) - static inline bool cpu_has_amu(void) { u64 pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); @@ -320,33 +203,36 @@ static inline bool cpu_has_amu(void) ID_AA64PFR0_EL1_AMU_SHIFT); } +#define __activate_fgt(hctxt, vcpu, reg) \ + do { \ + ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg); \ + write_sysreg_s(*vcpu_fgt(vcpu, reg), SYS_ ## reg); \ + } while (0) + static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); - struct kvm *kvm = kern_hyp_va(vcpu->kvm); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2); - update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0, - cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ? - HFGWTR_EL2_TCR_EL1_MASK : 0); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR_EL2); + __activate_fgt(hctxt, vcpu, HFGITR_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR_EL2); if (cpu_has_amu()) - update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2); + __activate_fgt(hctxt, vcpu, HAFGRTR_EL2); if (!cpus_have_final_cap(ARM64_HAS_FGT2)) return; - update_fgt_traps(hctxt, vcpu, kvm, HFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGWTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HFGITR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR2_EL2); - update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGWTR2_EL2); + __activate_fgt(hctxt, vcpu, HFGITR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGRTR2_EL2); + __activate_fgt(hctxt, vcpu, HDFGWTR2_EL2); } #define __deactivate_fgt(htcxt, vcpu, reg) \ diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 05774aed09cb..43bde061b65d 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -172,6 +172,7 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu) /* Trust the host for non-protected vcpu features. */ vcpu->arch.hcrx_el2 = host_vcpu->arch.hcrx_el2; + memcpy(vcpu->arch.fgt, host_vcpu->arch.fgt, sizeof(vcpu->arch.fgt)); return 0; } From e0b5a7967dec05144bc98125f98c47f74fd1152b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 24 Sep 2025 16:51:50 -0700 Subject: [PATCH 104/305] KVM: arm64: nv: Use FGT write trap of MDSCR_EL1 when available Marc reports that the performance of running an L3 guest has regressed by 60% as a result of setting MDCR_EL2.TDA to hide bad architecture. That's of course terrible for the single user of recursive NV ;-) While there's nothing to be done on non-FGT systems, take advantage of the precise write trap of MDSCR_EL1 and leave the rest of the debug registers untrapped. Reported-by: Marc Zyngier Signed-off-by: Oliver Upton Reviewed-by: Joey Gouly Signed-off-by: Marc Zyngier --- arch/arm64/kvm/config.c | 10 +++++++++- arch/arm64/kvm/nested.c | 9 ++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/config.c b/arch/arm64/kvm/config.c index b1cf7660efe1..24bb3f36e9d5 100644 --- a/arch/arm64/kvm/config.c +++ b/arch/arm64/kvm/config.c @@ -1489,6 +1489,14 @@ static void __compute_hfgwtr(struct kvm_vcpu *vcpu) *vcpu_fgt(vcpu, HFGWTR_EL2) |= HFGWTR_EL2_TCR_EL1; } +static void __compute_hdfgwtr(struct kvm_vcpu *vcpu) +{ + __compute_fgt(vcpu, HDFGWTR_EL2); + + if (is_hyp_ctxt(vcpu)) + *vcpu_fgt(vcpu, HDFGWTR_EL2) |= HDFGWTR_EL2_MDSCR_EL1; +} + void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) { if (!cpus_have_final_cap(ARM64_HAS_FGT)) @@ -1498,7 +1506,7 @@ void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu) __compute_hfgwtr(vcpu); __compute_fgt(vcpu, HFGITR_EL2); __compute_fgt(vcpu, HDFGRTR_EL2); - __compute_fgt(vcpu, HDFGWTR_EL2); + __compute_hdfgwtr(vcpu); __compute_fgt(vcpu, HAFGRTR_EL2); if (!cpus_have_final_cap(ARM64_HAS_FGT2)) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 7a045cad6bdf..f04cda40545b 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1859,13 +1859,16 @@ void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu) { u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2); + if (is_nested_ctxt(vcpu)) + vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); /* * In yet another example where FEAT_NV2 is fscking broken, accesses * to MDSCR_EL1 are redirected to the VNCR despite having an effect * at EL2. Use a big hammer to apply sanity. + * + * Unless of course we have FEAT_FGT, in which case we can precisely + * trap MDSCR_EL1. */ - if (is_hyp_ctxt(vcpu)) + else if (!cpus_have_final_cap(ARM64_HAS_FGT)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; - else - vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE); } From 1696b0cfcf004a3af34ffe4c57a14e837ef18144 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 29 Sep 2025 11:29:04 -0400 Subject: [PATCH 105/305] drm/i915/guc: Skip communication warning on reset in progress GuC IRQ and tasklet handler receive just single G2H message, and let other messages to be received from next tasklet. During this chained tasklet process, if reset process started, communication will be disabled. Skip warning for this condition. Fixes: 65dd4ed0f4e1 ("drm/i915/guc: Don't receive all G2H messages in irq handler") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15018 Signed-off-by: Zhanjun Dong Reviewed-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250929152904.269776-1-zhanjun.dong@intel.com (cherry picked from commit 604b5ee4a653a70979ce689dbd6a5d942eb016bf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 3e7e5badcc2b..2c651ec024ef 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1325,9 +1325,16 @@ static int ct_receive(struct intel_guc_ct *ct) static void ct_try_receive_message(struct intel_guc_ct *ct) { + struct intel_guc *guc = ct_to_guc(ct); int ret; - if (GEM_WARN_ON(!ct->enabled)) + if (!ct->enabled) { + GEM_WARN_ON(!guc_to_gt(guc)->uc.reset_in_progress); + return; + } + + /* When interrupt disabled, message handling is not expected */ + if (!guc->interrupts.enabled) return; ret = ct_receive(ct); From 760039c95c78490c5c66ef584fcd536797ed6a2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 3 Oct 2025 17:57:30 +0300 Subject: [PATCH 106/305] drm/i915/frontbuffer: Move bo refcounting intel_frontbuffer_{get,release}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently xe's intel_frontbuffer implementation forgets to hold a reference on the bo. This makes the entire thing extremely fragile as the cleanup order now depends on bo references held by other things (namely intel_fb_bo_framebuffer_fini()). Move the bo refcounting to intel_frontbuffer_{get,release}() so that both i915 and xe do this the same way. I first tried to fix this by having xe do the refcounting from its intel_bo_set_frontbuffer() implementation (which is what i915 does currently), but turns out xe's drm_gem_object_free() can sleep and thus drm_gem_object_put() isn't safe to call while we hold fb_tracking.lock. Fixes: 10690b8a49bc ("drm/i915/display: Add intel_fb_bo_framebuffer_fini") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit eb4d490729a5fd8dc5a76d334f8d01fec7c14bbe) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_frontbuffer.c | 10 +++++++++- drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h | 2 -- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 43be5377ddc1..73ed28ac9573 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -270,6 +270,8 @@ static void frontbuffer_release(struct kref *ref) spin_unlock(&display->fb_tracking.lock); i915_active_fini(&front->write); + + drm_gem_object_put(obj); kfree_rcu(front, rcu); } @@ -287,6 +289,8 @@ intel_frontbuffer_get(struct drm_gem_object *obj) if (!front) return NULL; + drm_gem_object_get(obj); + front->obj = obj; kref_init(&front->ref); atomic_set(&front->bits, 0); @@ -299,8 +303,12 @@ intel_frontbuffer_get(struct drm_gem_object *obj) spin_lock(&display->fb_tracking.lock); cur = intel_bo_set_frontbuffer(obj, front); spin_unlock(&display->fb_tracking.lock); - if (cur != front) + + if (cur != front) { + drm_gem_object_put(obj); kfree(front); + } + return cur; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h index b6dc3d1b9bb1..b682969e3a29 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_frontbuffer.h @@ -89,12 +89,10 @@ i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj, if (!front) { RCU_INIT_POINTER(obj->frontbuffer, NULL); - drm_gem_object_put(intel_bo_to_drm_bo(obj)); } else if (rcu_access_pointer(obj->frontbuffer)) { cur = rcu_dereference_protected(obj->frontbuffer, true); kref_get(&cur->ref); } else { - drm_gem_object_get(intel_bo_to_drm_bo(obj)); rcu_assign_pointer(obj->frontbuffer, front); } From 86af6b90e0556fcefbc6e98eb78bdce90327ee76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 3 Oct 2025 17:57:31 +0300 Subject: [PATCH 107/305] drm/i915/fb: Fix the set_tiling vs. addfb race, again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_frontbuffer_get() is what locks out subsequent set_tiling changes to the bo. Thus the fence vs. modifier check must be done after intel_frontbuffer_get(), or else a concurrent set_tiling ioctl might sneak in and change the fence after the check has been done. Close the race again. See commit dd689287b977 ("drm/i915: Prevent concurrent tiling/framebuffer modifications") for the previous instance. v2: Reorder intel_user_framebuffer_destroy() to match the unwind (Jani) Cc: Jouni Högander Reviewed-by: Jani Nikula Fixes: 10690b8a49bc ("drm/i915/display: Add intel_fb_bo_framebuffer_fini") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251003145734.7634-3-ville.syrjala@linux.intel.com (cherry picked from commit 1d1e4ded216017f8febd91332ee337f0e0e79285) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fb.c | 38 +++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 22a4a1575d22..b817ff44c043 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2113,10 +2113,10 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); - intel_frontbuffer_put(intel_fb->frontbuffer); - intel_fb_bo_framebuffer_fini(intel_fb_bo(fb)); + intel_frontbuffer_put(intel_fb->frontbuffer); + kfree(intel_fb); } @@ -2218,15 +2218,17 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, int ret = -EINVAL; int i; + /* + * intel_frontbuffer_get() must be done before + * intel_fb_bo_framebuffer_init() to avoid set_tiling vs. addfb race. + */ + intel_fb->frontbuffer = intel_frontbuffer_get(obj); + if (!intel_fb->frontbuffer) + return -ENOMEM; + ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); if (ret) - return ret; - - intel_fb->frontbuffer = intel_frontbuffer_get(obj); - if (!intel_fb->frontbuffer) { - ret = -ENOMEM; - goto err; - } + goto err_frontbuffer_put; ret = -EINVAL; if (!drm_any_plane_has_format(display->drm, @@ -2235,7 +2237,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "unsupported pixel format %p4cc / modifier 0x%llx\n", &mode_cmd->pixel_format, mode_cmd->modifier[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } max_stride = intel_fb_max_stride(display, mode_cmd->pixel_format, @@ -2246,7 +2248,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", mode_cmd->pitches[0], max_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -2254,7 +2256,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane 0 offset (0x%08x) must be 0\n", mode_cmd->offsets[0]); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } drm_helper_mode_fill_fb_struct(display->drm, fb, info, mode_cmd); @@ -2264,7 +2266,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->handles[i] != mode_cmd->handles[0]) { drm_dbg_kms(display->drm, "bad plane %d handle\n", i); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } stride_alignment = intel_fb_stride_alignment(fb, i); @@ -2272,7 +2274,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "plane %d pitch (%d) must be at least %u byte aligned\n", i, fb->pitches[i], stride_alignment); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } if (intel_fb_is_gen12_ccs_aux_plane(fb, i)) { @@ -2282,7 +2284,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_dbg_kms(display->drm, "ccs aux plane %d pitch (%d) must be %d\n", i, fb->pitches[i], ccs_aux_stride); - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; } } @@ -2291,7 +2293,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, ret = intel_fill_fb_info(display, intel_fb); if (ret) - goto err_frontbuffer_put; + goto err_bo_framebuffer_fini; if (intel_fb_uses_dpt(fb)) { struct i915_address_space *vm; @@ -2317,10 +2319,10 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, err_free_dpt: if (intel_fb_uses_dpt(fb)) intel_dpt_destroy(intel_fb->dpt_vm); +err_bo_framebuffer_fini: + intel_fb_bo_framebuffer_fini(obj); err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); -err: - intel_fb_bo_framebuffer_fini(obj); return ret; } From 9858ea4c29c283f0a8a3cdbb42108d464ece90a8 Mon Sep 17 00:00:00 2001 From: Matthew Schwartz Date: Thu, 9 Oct 2025 14:19:00 +0200 Subject: [PATCH 108/305] Revert "drm/amd/display: Only restore backlight after amdgpu_dm_init or dm_resume" This fix regressed the original issue that commit 7875afafba84 ("drm/amd/display: Fix brightness level not retained over reboot") solved, so revert it until a different approach to solve the regression that it caused with AMD_PRIVATE_COLOR is found. Fixes: a490c8d77d50 ("drm/amd/display: Only restore backlight after amdgpu_dm_init or dm_resume") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4620 Cc: stable@vger.kernel.org Signed-off-by: Matthew Schwartz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++-------- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 7 ------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0d03e324d5b9..6597475e245d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2085,8 +2085,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); - adev->dm.restore_backlight = true; - adev->dm.hpd_rx_offload_wq = hpd_rx_irq_create_workqueue(adev); if (!adev->dm.hpd_rx_offload_wq) { drm_err(adev_to_drm(adev), "failed to create hpd rx offload workqueue.\n"); @@ -3442,7 +3440,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); dc_resume(dm->dc); - adev->dm.restore_backlight = true; amdgpu_dm_irq_resume_early(adev); @@ -9969,6 +9966,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, bool mode_set_reset_required = false; u32 i; struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count}; + bool set_backlight_level = false; /* Disable writeback */ for_each_old_connector_in_state(state, connector, old_con_state, i) { @@ -10088,6 +10086,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->hw_mode = new_crtc_state->mode; crtc->hwmode = new_crtc_state->mode; mode_set_reset_required = true; + set_backlight_level = true; } else if (modereset_required(new_crtc_state)) { drm_dbg_atomic(dev, "Atomic commit: RESET. crtc id %d:[%p]\n", @@ -10144,16 +10143,13 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, * to fix a flicker issue. * It will cause the dm->actual_brightness is not the current panel brightness * level. (the dm->brightness is the correct panel level) - * So we set the backlight level with dm->brightness value after initial - * set mode. Use restore_backlight flag to avoid setting backlight level - * for every subsequent mode set. + * So we set the backlight level with dm->brightness value after set mode */ - if (dm->restore_backlight) { + if (set_backlight_level) { for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i]) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } - dm->restore_backlight = false; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 009f206226f0..db75e991ac7b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -630,13 +630,6 @@ struct amdgpu_display_manager { */ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; - /** - * @restore_backlight: - * - * Flag to indicate whether to restore backlight after modeset. - */ - bool restore_backlight; - /** * @aux_hpd_discon_quirk: * From 5c05bcf6ae7732da1bd4dc1958d527b5f07f216a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 26 Sep 2025 20:26:12 +0200 Subject: [PATCH 109/305] drm/amd/pm: Disable MCLK switching on SI at high pixel clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On various SI GPUs, a flickering can be observed near the bottom edge of the screen when using a single 4K 60Hz monitor over DP. Disabling MCLK switching works around this problem. Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index cf9932e68055..3a9522c17fee 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -3500,6 +3500,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, * for these GPUs to calculate bandwidth requirements. */ if (high_pixelclock_count) { + /* Work around flickering lines at the bottom edge + * of the screen when using a single 4K 60Hz monitor. + */ + disable_mclk_switching = true; + /* On Oland, we observe some flickering when two 4K 60Hz * displays are connected, possibly because voltage is too low. * Raise the voltage by requiring a higher SCLK. From 7bdd91abf0cb3ea78160e2e78fb58b12f6a38d55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 26 Sep 2025 20:26:13 +0200 Subject: [PATCH 110/305] drm/amd: Disable ASPM on SI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4. It's unclear if this is a platform-specific or GPU-specific issue. Disable ASPM on SI for the time being. Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7a899fb4de29..3d032c4e2dce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1882,6 +1882,13 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) { + /* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4. + * It's unclear if this is a platform-specific or GPU-specific issue. + * Disable ASPM on SI for the time being. + */ + if (adev->family == AMDGPU_FAMILY_SI) + return true; + #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); From 5d55ed19d4190d2c210ac05ac7a53f800a8c6fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2025 14:47:23 +0200 Subject: [PATCH 111/305] drm/amdgpu: remove two invalid BUG_ON()s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those can be triggered trivially by userspace. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 66c47c466532..d61eb9f187c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5862,8 +5862,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 710ec9c34e43..93fde0f9af87 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4419,8 +4419,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; - BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); control |= ib->length_dw | (vmid << 24); From 8f74c70be57527d7b79e2ecf6de1a154d148254d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 22 Sep 2025 14:18:16 +0200 Subject: [PATCH 112/305] drm/amdgpu: block CE CS if not explicitely allowed by module option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Constant Engine found on gfx6-gfx10 HW has been a notorious source of problems. RADV never used it in the first place, radeonsi only used it for a few releases around 2017 for gfx6-gfx9 before dropping support for it as well. While investigating another problem I just recently found that submitting to the CE seems to be completely broken on gfx9 for quite a while. Since nobody complained about that problem it most likely means that nobody is using any of the affected radeonsi versions on current Linux kernels any more. So to potentially phase out the support for the CE and eliminate another source of problems block submitting CE IBs unless it is enabled again using a debug flag. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 +++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2a0df4cabb99..6f5b4a0e0a34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1290,6 +1290,7 @@ struct amdgpu_device { bool debug_disable_gpu_ring_reset; bool debug_vm_userptr; bool debug_disable_ce_logs; + bool debug_enable_ce_cs; /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9cd7741d2254..ba9fb08db094 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; + if (!p->adev->debug_enable_ce_cs && + chunk_ib->flags & AMDGPU_IB_FLAG_CE) { + dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n"); + return -EINVAL; + } + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bff25ef3e2d0..61268aa82df4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), AMDGPU_DEBUG_SMU_POOL = BIT(7), AMDGPU_DEBUG_VM_USERPTR = BIT(8), - AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9) + AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9), + AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10) }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: disable kernel logs of correctable errors\n"); adev->debug_disable_ce_logs = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) { + pr_info("debug: allowing command submission to CE engine\n"); + adev->debug_enable_ce_cs = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) From 357d90be2c7aaa526a840cddffd2b8d676fe75a6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 26 Sep 2025 17:31:32 -0400 Subject: [PATCH 113/305] drm/amdgpu: fix handling of harvesting for ip_discovery firmware Chips which use the IP discovery firmware loaded by the driver reported incorrect harvesting information in the ip discovery table in sysfs because the driver only uses the ip discovery firmware for populating sysfs and not for direct parsing for the driver itself as such, the fields that are used to print the harvesting info in sysfs report incorrect data for some IPs. Populate the relevant fields for this case as well. Fixes: 514678da56da ("drm/amdgpu/discovery: fix fw based ip discovery") Acked-by: Tom St Denis Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 73401f0aeb34..dd7b2b796427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1033,7 +1033,9 @@ static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev, /* Until a uniform way is figured, get mask based on hwid */ switch (hw_id) { case VCN_HWID: - harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; + /* VCN vs UVD+VCE */ + if (!amdgpu_ip_version(adev, VCE_HWIP, 0)) + harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; break; case DMU_HWID: if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK) @@ -2565,7 +2567,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0); @@ -2592,7 +2596,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1); @@ -2619,8 +2625,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega10_reg_base_init(adev); adev->sdma.num_instances = 1; + adev->sdma.sdma_mask = 1; adev->vcn.num_vcn_inst = 1; adev->gmc.num_umc = 2; + adev->gfx.xcc_mask = 1; if (adev->apu_flags & AMD_APU_IS_RAVEN2) { adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0); @@ -2665,7 +2673,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); vega20_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; adev->gmc.num_umc = 8; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0); @@ -2693,8 +2703,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); arct_reg_base_init(adev); adev->sdma.num_instances = 8; + adev->sdma.sdma_mask = 0xff; adev->vcn.num_vcn_inst = 2; adev->gmc.num_umc = 8; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1); @@ -2726,8 +2738,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init(adev); aldebaran_reg_base_init(adev); adev->sdma.num_instances = 5; + adev->sdma.sdma_mask = 0x1f; adev->vcn.num_vcn_inst = 2; adev->gmc.num_umc = 4; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0); @@ -2762,6 +2776,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) } else { cyan_skillfish_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->sdma.sdma_mask = 3; + adev->gfx.xcc_mask = 1; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1); From 1f22fcb88bfef26a966e9eb242c692c6bf253d47 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Sep 2025 12:37:32 -0400 Subject: [PATCH 114/305] drm/amdgpu: handle wrap around in reemit handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compare the sequence numbers directly. Fixes: 77cc0da39c7c ("drm/amdgpu: track ring state associated with a fence") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index fd8cca241da6..e270df30c279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -790,14 +790,19 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, struct dma_fence *unprocessed; struct dma_fence __rcu **ptr; struct amdgpu_fence *fence; - u64 wptr, i, seqno; + u64 wptr; + u32 seq, last_seq; - seqno = amdgpu_fence_read(ring); + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; wptr = ring->fence_drv.signalled_wptr; ring->ring_backup_entries_to_copy = 0; - for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { - ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; rcu_read_lock(); unprocessed = rcu_dereference(*ptr); @@ -813,7 +818,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, wptr = fence->wptr; } rcu_read_unlock(); - } + } while (last_seq != seq); } /* From ff780f4f80323148d43198f2052c14160c8428d3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Sep 2025 13:48:23 -0400 Subject: [PATCH 115/305] drm/amdgpu: set an error on all fences from a bad context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we backup ring contents to reemit after a queue reset, we don't backup ring contents from the bad context. When we signal the fences, we should set an error on those fences as well. v2: misc cleanups v3: add locking for fence error, fix comment (Christian) v4: fix wrap around, locking (Christian) Fixes: 77cc0da39c7c ("drm/amdgpu: track ring state associated with a fence") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 39 ++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index e270df30c279..18a7829122d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -758,11 +758,42 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) * @fence: fence of the ring to signal * */ -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence) +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af) { - dma_fence_set_error(&fence->base, -ETIME); - amdgpu_fence_write(fence->ring, fence->seq); - amdgpu_fence_process(fence->ring); + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + struct amdgpu_ring *ring = af->ring; + unsigned long flags; + u32 seq, last_seq; + + last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask; + seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask; + + /* mark all fences from the guilty context with an error */ + spin_lock_irqsave(&ring->fence_drv.lock, flags); + do { + last_seq++; + last_seq &= ring->fence_drv.num_fences_mask; + + ptr = &ring->fence_drv.fences[last_seq]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + if (fence == af) + dma_fence_set_error(&fence->base, -ETIME); + else if (fence->context == af->context) + dma_fence_set_error(&fence->base, -ECANCELED); + } + rcu_read_unlock(); + } while (last_seq != seq); + spin_unlock_irqrestore(&ring->fence_drv.lock, flags); + /* signal the guilty fence */ + amdgpu_fence_write(ring, af->seq); + amdgpu_fence_process(ring); } void amdgpu_fence_save_wptr(struct dma_fence *fence) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 8f6ce948c684..5ec5c3ff22bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -811,7 +811,7 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, if (r) return r; - /* signal the fence of the bad job */ + /* signal the guilty fence and set an error on all fences from the context */ if (guilty_fence) amdgpu_fence_driver_guilty_force_completion(guilty_fence); /* Re-emit the non-guilty commands */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b6b649179776..4b46e3c26ff3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -155,7 +155,7 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); -void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence); +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af); void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); From 6df8e84aa6b5b1812cc2cacd6b3f5ccbb18cda2b Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Wed, 8 Oct 2025 03:43:27 +0000 Subject: [PATCH 116/305] drm/amdgpu: use atomic functions with memory barriers for vm fault info The atomic variable vm_fault_info_updated is used to synchronize access to adev->gmc.vm_fault_info between the interrupt handler and get_vm_fault_info(). The default atomic functions like atomic_set() and atomic_read() do not provide memory barriers. This allows for CPU instruction reordering, meaning the memory accesses to vm_fault_info and the vm_fault_info_updated flag are not guaranteed to occur in the intended order. This creates a race condition that can lead to inconsistent or stale data being used. The previous implementation, which used an explicit mb(), was incomplete and inefficient. It failed to account for all potential CPU reorderings, such as the access of vm_fault_info being reordered before the atomic_read of the flag. This approach is also more verbose and less performant than using the proper atomic functions with acquire/release semantics. Fix this by switching to atomic_set_release() and atomic_read_acquire(). These functions provide the necessary acquire and release semantics, which act as memory barriers to ensure the correct order of operations. It is also more efficient and idiomatic than using explicit full memory barriers. Fixes: b97dfa27ef3a ("drm/amdgpu: save vm fault information for amdkfd") Cc: stable@vger.kernel.org Signed-off-by: Gui-Dong Han Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 7 +++---- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 83020963dfde..a2ca9acf8c4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2329,10 +2329,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *mem) { - if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { + if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) { *mem = *adev->gmc.vm_fault_info; - mb(); /* make sure read happened */ - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 93d7ccb7d013..0e5e54d0a9a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1068,7 +1068,7 @@ static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block) GFP_KERNEL); if (!adev->gmc.vm_fault_info) return -ENOMEM; - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); return 0; } @@ -1290,7 +1290,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) { struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1306,8 +1306,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, info->prot_read = protections & 0x8 ? true : false; info->prot_write = protections & 0x10 ? true : false; info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 1); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 1); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index c5e2a2c41e06..e1509480dfc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1183,7 +1183,7 @@ static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block) GFP_KERNEL); if (!adev->gmc.vm_fault_info) return -ENOMEM; - atomic_set(&adev->gmc.vm_fault_info_updated, 0); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 0); return 0; } @@ -1478,7 +1478,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) - && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + && !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) { struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1494,8 +1494,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, info->prot_read = protections & 0x8 ? true : false; info->prot_write = protections & 0x10 ? true : false; info->prot_exec = protections & 0x20 ? true : false; - mb(); - atomic_set(&adev->gmc.vm_fault_info_updated, 1); + atomic_set_release(&adev->gmc.vm_fault_info_updated, 1); } return 0; From ef38b4eab146715bc68d45029257f5e69ea3f2cd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 10 Oct 2025 16:40:57 -0400 Subject: [PATCH 117/305] drm/amdgpu: drop unused structures in amdgpu_drm.h These were never used and are duplicated with the interface that is used. Maybe leftovers from a previous revision of the patch that added them. Fixes: 90c448fef312 ("drm/amdgpu: add new AMDGPU_INFO subquery for userq objects") Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index cd7402e36b6d..406a42be429b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1555,27 +1555,6 @@ struct drm_amdgpu_info_hw_ip { __u32 userq_num_slots; }; -/* GFX metadata BO sizes and alignment info (in bytes) */ -struct drm_amdgpu_info_uq_fw_areas_gfx { - /* shadow area size */ - __u32 shadow_size; - /* shadow area base virtual mem alignment */ - __u32 shadow_alignment; - /* context save area size */ - __u32 csa_size; - /* context save area base virtual mem alignment */ - __u32 csa_alignment; -}; - -/* IP specific fw related information used in the - * subquery AMDGPU_INFO_UQ_FW_AREAS - */ -struct drm_amdgpu_info_uq_fw_areas { - union { - struct drm_amdgpu_info_uq_fw_areas_gfx gfx; - }; -}; - struct drm_amdgpu_info_num_handles { /** Max handles as supported by firmware for UVD */ __u32 uvd_max_handles; From 6917112af2ba36c5f19075eb9f2933ffd07e55bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 13 Oct 2025 08:06:42 +0200 Subject: [PATCH 118/305] drm/amd/powerplay: Fix CIK shutdown temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove extra multiplication. CIK GPUs such as Hawaii appear to use PP_TABLE_V0 in which case the shutdown temperature is hardcoded in smu7_init_dpm_defaults and is already multiplied by 1000. The value was mistakenly multiplied another time by smu7_get_thermal_temperature_range. Fixes: 4ba082572a42 ("drm/amd/powerplay: export the thermal ranges of VI asics (V2)") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1676 Reviewed-by: Alex Deucher Signed-off-by: Timur Kristóf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 8da882c51856..9b28c0728269 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -5444,8 +5444,7 @@ static int smu7_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = table_info->cac_dtp_table->usSoftwareShutdownTemp * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; else if (hwmgr->pp_table_version == PP_TABLE_V0) - thermal_data->max = data->thermal_temp_setting.temperature_shutdown * - PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->max = data->thermal_temp_setting.temperature_shutdown; thermal_data->sw_ctf_threshold = thermal_data->max; From 74de0eaa00eac2e0cbad1dda6dcf8f44ab27629e Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Fri, 10 Oct 2025 23:32:40 +0530 Subject: [PATCH 119/305] drm/amdgpu: fix bit shift logic BIT_ULL(n) sets nth bit, remove explicit shift and set the position Fixes: a7a411e24626 ("drm/amdgpu: fix shift-out-of-bounds in amdgpu_debugfs_jpeg_sched_mask_set") Signed-off-by: Sathishkumar S Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 6b7d66b6d4cc..63ee6ba6a931 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -371,7 +371,7 @@ static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val) for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; - if (val & (BIT_ULL(1) << ((i * adev->jpeg.num_jpeg_rings) + j))) + if (val & (BIT_ULL((i * adev->jpeg.num_jpeg_rings) + j))) ring->sched.ready = true; else ring->sched.ready = false; From 33cc891b56b93cad1a83263eaf2e417436f70c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 7 Oct 2025 10:10:52 +0200 Subject: [PATCH 120/305] drm/amdgpu: hide VRAM sysfs attributes on GPUs without VRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise accessing them can cause a crash. Signed-off-by: Christian König Tested-by: Mangesh Gadre Acked-by: Alex Deucher Reviewed-by: Arunpravin Paneer Selvam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a5adb2ed9b3c..9d934c07fa6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -234,6 +234,9 @@ static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, !adev->gmc.vram_vendor) return 0; + if (!ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) + return 0; + return attr->mode; } From 883f309add55060233bf11c1ea6947140372920f Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Mon, 13 Oct 2025 13:46:12 +0800 Subject: [PATCH 121/305] drm/amdgpu: Fix NULL pointer dereference in VRAM logic for APU devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, APU platforms (and other scenarios with uninitialized VRAM managers) triggered a NULL pointer dereference in `ttm_resource_manager_usage()`. The root cause is not that the `struct ttm_resource_manager *man` pointer itself is NULL, but that `man->bdev` (the backing device pointer within the manager) remains uninitialized (NULL) on APUs—since APUs lack dedicated VRAM and do not fully set up VRAM manager structures. When `ttm_resource_manager_usage()` attempts to acquire `man->bdev->lru_lock`, it dereferences the NULL `man->bdev`, leading to a kernel OOPS. 1. **amdgpu_cs.c**: Extend the existing bandwidth control check in `amdgpu_cs_get_threshold_for_moves()` to include a check for `ttm_resource_manager_used()`. If the manager is not used (uninitialized `bdev`), return 0 for migration thresholds immediately—skipping VRAM-specific logic that would trigger the NULL dereference. 2. **amdgpu_kms.c**: Update the `AMDGPU_INFO_VRAM_USAGE` ioctl and memory info reporting to use a conditional: if the manager is used, return the real VRAM usage; otherwise, return 0. This avoids accessing `man->bdev` when it is NULL. 3. **amdgpu_virt.c**: Modify the vf2pf (virtual function to physical function) data write path. Use `ttm_resource_manager_used()` to check validity: if the manager is usable, calculate `fb_usage` from VRAM usage; otherwise, set `fb_usage` to 0 (APUs have no discrete framebuffer to report). This approach is more robust than APU-specific checks because it: - Works for all scenarios where the VRAM manager is uninitialized (not just APUs), - Aligns with TTM's design by using its native helper function, - Preserves correct behavior for discrete GPUs (which have fully initialized `man->bdev` and pass the `ttm_resource_manager_used()` check). v4: use ttm_resource_manager_used(&adev->mman.vram_mgr.manager) instead of checking the adev->gmc.is_app_apu flag (Christian) Reviewed-by: Christian König Suggested-by: Lijo Lazar Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ba9fb08db094..2f6a96af7fb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -708,7 +708,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) { + if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) { *max_bytes = 0; *max_vis_bytes = 0; return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a9327472c651..b3e6b3fcdf2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -758,7 +758,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); + ui64 = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) : 0; return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); @@ -804,8 +805,8 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.vram.usable_heap_size = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size) - AMDGPU_VM_RESERVED_VRAM; - mem.vram.heap_usage = - ttm_resource_manager_usage(vram_man); + mem.vram.heap_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(vram_man) : 0; mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 3328ab63376b..f96beb96c75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -598,8 +598,8 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->driver_cert = 0; vf2pf_info->os_info.all = 0; - vf2pf_info->fb_usage = - ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20; + vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ? + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0; vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20; vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20; From d0de79f66a80eeb849033fae34bd07a69ce72235 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 9 Oct 2025 10:45:42 -0400 Subject: [PATCH 122/305] drm/amdgpu: fix gfx12 mes packet status return check GFX12 MES uses low 32 bits of status return for success (1 or 0) and high bits for debug information if low bits are 0. GFX11 MES doesn't do this so checking full 64-bit status return for 1 or 0 is still valid. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index aff06f06aeee..e3149196143e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -228,7 +228,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, pipe, x_pkt->header.opcode); r = amdgpu_fence_wait_polling(ring, seq, timeout); - if (r < 1 || !*status_ptr) { + + /* + * status_ptr[31:0] == 0 (fail) or status_ptr[63:0] == 1 (success). + * If status_ptr[31:0] == 0 then status_ptr[63:32] will have debug error information. + */ + if (r < 1 || !(lower_32_bits(*status_ptr))) { if (misc_op_str) dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", From 8745ca5efb2aad0b6591d9b8cd48573ea49c929d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 9 Oct 2025 10:48:09 -0400 Subject: [PATCH 123/305] drm/amdgpu: fix initialization of doorbell array for detect and hang Initialized doorbells should be set to invalid rather than 0 to prevent driver from over counting hung doorbells since it checks against the invalid value to begin with. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 5bf9be073cdd..30e1fb510600 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -409,7 +409,7 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, return -EINVAL; /* Clear the doorbell array before detection */ - memset(adev->mes.hung_queue_db_array_cpu_addr, 0, + memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET, adev->mes.hung_queue_db_array_size * sizeof(u32)); input.queue_type = queue_type; input.detect_only = detect_only; From 0ef930e1faca6418316e5b9a3b4d1f6ae9e5b240 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 9 Oct 2025 11:28:19 -0400 Subject: [PATCH 124/305] drm/amdgpu: fix hung reset queue array memory allocation By design the MES will return an array result that is twice the number of hung doorbells it can report. i.e. if up k reported doorbells are supported, then the second half of the array, also of length k, holds the HQD information (type/queue/pipe) where queue 1 corresponds to index 0 and k, queue 2 corresponds to index 1 and k + 1 etc ... The driver will use the HDQ info to target queue/pipe reset for hardware scheduled user compute queues. Signed-off-by: Jonathan Kim Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 8 +++++--- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 8 +++++--- 5 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 30e1fb510600..94973018f761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -420,12 +420,17 @@ int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, dev_err(adev->dev, "failed to detect and reset\n"); } else { *hung_db_num = 0; - for (i = 0; i < adev->mes.hung_queue_db_array_size; i++) { + for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { hung_db_array[i] = db_array[i]; *hung_db_num += 1; } } + + /* + * TODO: return HQD info for MES scheduled user compute queue reset cases + * stored in hung_db_array hqd info offset to full array size + */ } return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 6b506fc72f58..97c137c90f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -149,6 +149,7 @@ struct amdgpu_mes { void *resource_1_addr[AMDGPU_MAX_MES_PIPES]; int hung_queue_db_array_size; + int hung_queue_hqd_info_offset; struct amdgpu_bo *hung_queue_db_array_gpu_obj; uint64_t hung_queue_db_array_gpu_addr; void *hung_queue_db_array_cpu_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 2db9b2c63693..1cd9eaeef38f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -208,10 +208,10 @@ static int mes_userq_detect_and_reset(struct amdgpu_device *adev, struct amdgpu_userq_mgr *uqm, *tmp; unsigned int hung_db_num = 0; int queue_id, r, i; - u32 db_array[4]; + u32 db_array[8]; - if (db_array_size > 4) { - dev_err(adev->dev, "DB array size (%d vs 4) too small\n", + if (db_array_size > 8) { + dev_err(adev->dev, "DB array size (%d vs 8) too small\n", db_array_size); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index e82188431f79..da575bb1377f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -66,7 +66,8 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define GFX_MES_DRAM_SIZE 0x80000 #define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE) -#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 4 +#define MES11_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset, [4:7] = hqd info */ +#define MES11_HUNG_HQD_INFO_OFFSET 4 static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -1720,8 +1721,9 @@ static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; - adev->mes.hung_queue_db_array_size = - MES11_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_db_array_size = MES11_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_hqd_info_offset = MES11_HUNG_HQD_INFO_OFFSET; + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index e3149196143e..7f3512d9de07 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -47,7 +47,8 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 -#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 4 +#define MES12_HUNG_DB_OFFSET_ARRAY_SIZE 8 /* [0:3] = db offset [4:7] hqd info */ +#define MES12_HUNG_HQD_INFO_OFFSET 4 static void mes_v12_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -1904,8 +1905,9 @@ static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe, r; - adev->mes.hung_queue_db_array_size = - MES12_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_db_array_size = MES12_HUNG_DB_OFFSET_ARRAY_SIZE; + adev->mes.hung_queue_hqd_info_offset = MES12_HUNG_HQD_INFO_OFFSET; + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { r = amdgpu_mes_init_microcode(adev, pipe); if (r) From 277bb0f83e98261018ddd82b7ab8154bb9b93237 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 5 Jun 2025 10:18:37 -0400 Subject: [PATCH 125/305] drm/amdgpu: enable suspend/resume all for gfx 12 Suspend/resume all gangs has been available for GFX12 for a while now so enable it. Signed-off-by: Jonathan Kim Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 94973018f761..4883adcfbb4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -691,14 +691,11 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) { uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; - bool is_supported = false; - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && - mes_rev >= 0x63) - is_supported = true; - - return is_supported; + return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) || + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); } /* Fix me -- node_id is used to identify the correct MES instances in the future */ From 079ae5118e1f0dcf5b1ab68ffdb5760b06ed79a2 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 18 Jun 2025 10:31:15 -0400 Subject: [PATCH 126/305] drm/amdkfd: fix suspend/resume all calls in mes based eviction path Suspend/resume all gangs should be done with the device lock is held. Signed-off-by: Jonathan Kim Acked-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 73 ++++++------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 6c5c7c1bf5ed..6e7bc983fc0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1209,6 +1209,15 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, pr_debug_ratelimited("Evicting process pid %d queues\n", pdd->process->lead_thread->pid); + if (dqm->dev->kfd->shared_resources.enable_mes) { + pdd->last_evict_timestamp = get_jiffies_64(); + retval = suspend_all_queues_mes(dqm); + if (retval) { + dev_err(dev, "Suspending all queues failed"); + goto out; + } + } + /* Mark all queues as evicted. Deactivate all active queues on * the qpd. */ @@ -1221,23 +1230,27 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - int err; - - err = remove_queue_mes(dqm, q, qpd); - if (err) { + retval = remove_queue_mes(dqm, q, qpd); + if (retval) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - retval = err; + goto out; } } } - pdd->last_evict_timestamp = get_jiffies_64(); - if (!dqm->dev->kfd->shared_resources.enable_mes) + + if (!dqm->dev->kfd->shared_resources.enable_mes) { + pdd->last_evict_timestamp = get_jiffies_64(); retval = execute_queues_cpsch(dqm, qpd->is_debug ? KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); + } else { + retval = resume_all_queues_mes(dqm); + if (retval) + dev_err(dev, "Resuming all queues failed"); + } out: dqm_unlock(dqm); @@ -3098,61 +3111,17 @@ int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbel return ret; } -static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct device *dev = dqm->dev->adev->dev; - int ret = 0; - - /* Check if process is already evicted */ - dqm_lock(dqm); - if (qpd->evicted) { - /* Increment the evicted count to make sure the - * process stays evicted before its terminated. - */ - qpd->evicted++; - dqm_unlock(dqm); - goto out; - } - dqm_unlock(dqm); - - ret = suspend_all_queues_mes(dqm); - if (ret) { - dev_err(dev, "Suspending all queues failed"); - goto out; - } - - ret = dqm->ops.evict_process_queues(dqm, qpd); - if (ret) { - dev_err(dev, "Evicting process queues failed"); - goto out; - } - - ret = resume_all_queues_mes(dqm); - if (ret) - dev_err(dev, "Resuming all queues failed"); - -out: - return ret; -} - int kfd_evict_process_device(struct kfd_process_device *pdd) { struct device_queue_manager *dqm; struct kfd_process *p; - int ret = 0; p = pdd->process; dqm = pdd->dev->dqm; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); - if (dqm->dev->kfd->shared_resources.enable_mes) - ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); - else - ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); - - return ret; + return dqm->ops.evict_process_queues(dqm, &pdd->qpd); } int reserve_debug_trap_vmid(struct device_queue_manager *dqm, From 927069c4ac2cd1a37efa468596fb5b8f86db9df0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Oct 2025 12:05:31 -0600 Subject: [PATCH 127/305] Revert "io_uring/rw: drop -EOPNOTSUPP check in __io_complete_rw_common()" This reverts commit 90bfb28d5fa8127a113a140c9791ea0b40ab156a. Kevin reports that this commit causes an issue for him with LVM snapshots, most likely because of turning off NOWAIT support while a snapshot is being created. This makes -EOPNOTSUPP bubble back through the completion handler, where io_uring read/write handling should just retry it. Reinstate the previous check removed by the referenced commit. Cc: stable@vger.kernel.org Fixes: 90bfb28d5fa8 ("io_uring/rw: drop -EOPNOTSUPP check in __io_complete_rw_common()") Reported-by: Salvatore Bonaccorso Reported-by: Kevin Lumik Link: https://lore.kernel.org/io-uring/cceb723c-051b-4de2-9a4c-4aa82e1619ee@kernel.dk/ Signed-off-by: Jens Axboe --- io_uring/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 08882648d569..a0f9d2021e3f 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -542,7 +542,7 @@ static void __io_complete_rw_common(struct io_kiocb *req, long res) { if (res == req->cqe.res) return; - if (res == -EAGAIN && io_rw_should_reissue(req)) { + if ((res == -EOPNOTSUPP || res == -EAGAIN) && io_rw_should_reissue(req)) { req->flags |= REQ_F_REISSUE | REQ_F_BL_NO_RECYCLE; } else { req_set_fail(req); From 8db4a1d146f83c6bdb0f5b98c50c509ae8549827 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 7 Oct 2025 13:39:05 -0400 Subject: [PATCH 128/305] NFSv4/flexfiles: fix to allocate mirror->dss before use Move mirror_array's dss_count initialization and dss allocation to ff_layout_alloc_mirror(), just before the loop that initializes each nfs4_ff_layout_ds_stripe's nfs_file_localio. Also handle NULL return from kcalloc() and remove one level of indent in ff_layout_alloc_mirror(). This commit fixes dangling nfsd_serv refcount issues seen when using NFS LOCALIO and then attempting to stop the NFSD service. Fixes: 20b1d75fb840 ("NFSv4/flexfiles: Add support for striped layouts") Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 35 +++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index df01d2876b68..9056f05a67dc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -270,19 +270,31 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror) mirror->layout = NULL; } -static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) +static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(u32 dss_count, + gfp_t gfp_flags) { struct nfs4_ff_layout_mirror *mirror; - u32 dss_id; mirror = kzalloc(sizeof(*mirror), gfp_flags); - if (mirror != NULL) { - spin_lock_init(&mirror->lock); - refcount_set(&mirror->ref, 1); - INIT_LIST_HEAD(&mirror->mirrors); - for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) - nfs_localio_file_init(&mirror->dss[dss_id].nfl); + if (mirror == NULL) + return NULL; + + spin_lock_init(&mirror->lock); + refcount_set(&mirror->ref, 1); + INIT_LIST_HEAD(&mirror->mirrors); + + mirror->dss_count = dss_count; + mirror->dss = + kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), + gfp_flags); + if (mirror->dss == NULL) { + kfree(mirror); + return NULL; } + + for (u32 dss_id = 0; dss_id < mirror->dss_count; dss_id++) + nfs_localio_file_init(&mirror->dss[dss_id].nfl); + return mirror; } @@ -507,17 +519,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, if (dss_count > 1 && stripe_unit == 0) goto out_err_free; - fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags); + fls->mirror_array[i] = ff_layout_alloc_mirror(dss_count, gfp_flags); if (fls->mirror_array[i] == NULL) { rc = -ENOMEM; goto out_err_free; } - fls->mirror_array[i]->dss_count = dss_count; - fls->mirror_array[i]->dss = - kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe), - gfp_flags); - for (dss_id = 0; dss_id < dss_count; dss_id++) { dss_info = &fls->mirror_array[i]->dss[dss_id]; dss_info->mirror = fls->mirror_array[i]; From 7a84394f02ab1985ebbe0a8d6f6d69bd040de4b3 Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Tue, 7 Oct 2025 15:22:58 -0600 Subject: [PATCH 129/305] NFS4: Apply delay_retrans to async operations The setting of delay_retrans is applied to synchronous RPC operations because the retransmit count is stored in same struct nfs4_exception that is passed each time an error is checked. However, for asynchronous operations (READ, WRITE, LOCKU, CLOSE, DELEGRETURN), a new struct nfs4_exception is made on the stack each time the task callback is invoked. This means that the retransmit count is always zero and thus delay_retrans never takes effect. Apply delay_retrans to these operations by tracking and updating their retransmit count. Change-Id: Ieb33e046c2b277cb979caa3faca7f52faf0568c9 Signed-off-by: Joshua Watt Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 13 +++++++++++++ include/linux/nfs_xdr.h | 1 + 2 files changed, 14 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f58098417142..411776718494 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3636,6 +3636,7 @@ struct nfs4_closedata { } lr; struct nfs_fattr fattr; unsigned long timestamp; + unsigned short retrans; }; static void nfs4_free_closedata(void *data) @@ -3664,6 +3665,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) .state = state, .inode = calldata->inode, .stateid = &calldata->arg.stateid, + .retrans = calldata->retrans, }; if (!nfs4_sequence_done(task, &calldata->res.seq_res)) @@ -3711,6 +3713,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) default: task->tk_status = nfs4_async_handle_exception(task, server, task->tk_status, &exception); + calldata->retrans = exception.retrans; if (exception.retry) goto out_restart; } @@ -5593,9 +5596,11 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) .inode = hdr->inode, .state = hdr->args.context->state, .stateid = &hdr->args.stateid, + .retrans = hdr->retrans, }; task->tk_status = nfs4_async_handle_exception(task, server, task->tk_status, &exception); + hdr->retrans = exception.retrans; if (exception.retry) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -5709,10 +5714,12 @@ static int nfs4_write_done_cb(struct rpc_task *task, .inode = hdr->inode, .state = hdr->args.context->state, .stateid = &hdr->args.stateid, + .retrans = hdr->retrans, }; task->tk_status = nfs4_async_handle_exception(task, NFS_SERVER(inode), task->tk_status, &exception); + hdr->retrans = exception.retrans; if (exception.retry) { rpc_restart_call_prepare(task); return -EAGAIN; @@ -6726,6 +6733,7 @@ struct nfs4_delegreturndata { struct nfs_fh fh; nfs4_stateid stateid; unsigned long timestamp; + unsigned short retrans; struct { struct nfs4_layoutreturn_args arg; struct nfs4_layoutreturn_res res; @@ -6746,6 +6754,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) .inode = data->inode, .stateid = &data->stateid, .task_is_privileged = data->args.seq_args.sa_privileged, + .retrans = data->retrans, }; if (!nfs4_sequence_done(task, &data->res.seq_res)) @@ -6817,6 +6826,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) task->tk_status = nfs4_async_handle_exception(task, data->res.server, task->tk_status, &exception); + data->retrans = exception.retrans; if (exception.retry) goto out_restart; } @@ -7093,6 +7103,7 @@ struct nfs4_unlockdata { struct file_lock fl; struct nfs_server *server; unsigned long timestamp; + unsigned short retrans; }; static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, @@ -7147,6 +7158,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) struct nfs4_exception exception = { .inode = calldata->lsp->ls_state->inode, .stateid = &calldata->arg.stateid, + .retrans = calldata->retrans, }; if (!nfs4_sequence_done(task, &calldata->res.seq_res)) @@ -7180,6 +7192,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) task->tk_status = nfs4_async_handle_exception(task, calldata->server, task->tk_status, &exception); + calldata->retrans = exception.retrans; if (exception.retry) rpc_restart_call_prepare(task); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d56583572c98..31463286402f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1659,6 +1659,7 @@ struct nfs_pgio_header { void *netfs; #endif + unsigned short retrans; int pnfs_error; int error; /* merge with pnfs_error */ unsigned int good_bytes; /* boundary of good data */ From 9ff022f3820a31507cb93be6661bf5f3ca0609a4 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 9 Oct 2025 16:42:12 -0400 Subject: [PATCH 130/305] NFS: check if suid/sgid was cleared after a write as needed I noticed xfstests generic/193 and generic/355 started failing against knfsd after commit e7a8ebc305f2 ("NFSD: Offer write delegation for OPEN with OPEN4_SHARE_ACCESS_WRITE"). I ran those same tests against ONTAP (which has had write delegation support for a lot longer than knfsd) and they fail there too... so while it's a new failure against knfsd, it isn't an entirely new failure. Add the NFS_INO_REVAL_FORCED flag so that the presence of a delegation doesn't keep the inode from being revalidated to fetch the updated mode. Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0fb6905736d5..336c510f3750 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1535,7 +1535,8 @@ static int nfs_writeback_done(struct rpc_task *task, /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE + | NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); } return 0; From 9bb3baa9d1604cd20f49ae7dac9306b4037a0e7a Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Thu, 9 Oct 2025 15:48:04 -0600 Subject: [PATCH 131/305] NFS4: Fix state renewals missing after boot Since the last renewal time was initialized to 0 and jiffies start counting at -5 minutes, any clients connected in the first 5 minutes after a reboot would have their renewal timer set to a very long interval. If the connection was idle, this would result in the client state timing out on the server and the next call to the server would return NFS4ERR_BADSESSION. Fix this by initializing the last renewal time to the current jiffies instead of 0. Signed-off-by: Joshua Watt Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 6fddf43d729c..5998d6bd8a4f 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -222,6 +222,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; clp->cl_mig_gen = 1; + clp->cl_last_renewal = jiffies; #if IS_ENABLED(CONFIG_NFS_V4_1) init_waitqueue_head(&clp->cl_lock_waitq); #endif From 02e7567f5da023524476053a38c54f4f19130959 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Wed, 1 Oct 2025 14:03:37 +0800 Subject: [PATCH 132/305] cxl/port: Avoid missing port component registers setup port->nr_dports is used to represent how many dports added to the cxl port, it will increase in add_dport() when a new dport is being added to the cxl port, but it will not be reduced when a dport is removed from the cxl port. Currently, when the first dport is added to a cxl port, it will trigger component registers setup on the cxl port, the implementation is using port->nr_dports to confirm if the dport is the first dport. A corner case here is that adding dport could fail after port->nr_dports updating and before checking port->nr_dports for component registers setup. If the failure happens during the first dport attaching, it will cause that CXL subsystem has not chance to execute component registers setup for the cxl port. the failure flow like below: port->nr_dports = 0 dport 1 adding to the port: add_dport() # port->nr_dports: 1 failed on devm_add_action_or_reset() or sysfs_create_link() return error # port->nr_dports: 1 dport 2 adding to the port: add_dport() # port->nr_dports: 2 no failure skip component registers setup because of port->nr_dports is 2 The solution here is that moving component registers setup closer to add_dport(), so if add_dport() is executed correctly for the first dport, component registers setup on the port will be executed immediately after that. Fixes: f6ee24913de2 ("cxl: Move port register setup to when first dport appear") Signed-off-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index d5f71eb1ade8..8128fd2b5b31 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1182,6 +1182,20 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) { + xa_erase(&port->dports, (unsigned long)dport->dport_dev); + return ERR_PTR(rc); + } + port->component_reg_phys = CXL_RESOURCE_NONE; + } + get_device(dport_dev); rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); if (rc) @@ -1200,18 +1214,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, cxl_debugfs_create_dport_dir(dport); - /* - * Setup port register if this is the first dport showed up. Having - * a dport also means that there is at least 1 active link. - */ - if (port->nr_dports == 1 && - port->component_reg_phys != CXL_RESOURCE_NONE) { - rc = cxl_port_setup_regs(port, port->component_reg_phys); - if (rc) - return ERR_PTR(rc); - port->component_reg_phys = CXL_RESOURCE_NONE; - } - return dport; } From 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 10 Oct 2025 12:08:35 -0500 Subject: [PATCH 133/305] x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID Users can create as many monitoring groups as the number of RMIDs supported by the hardware. However, on AMD systems, only a limited number of RMIDs are guaranteed to be actively tracked by the hardware. RMIDs that exceed this limit are placed in an "Unavailable" state. When a bandwidth counter is read for such an RMID, the hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable remains set on first read after tracking re-starts and is clear on all subsequent reads as long as the RMID is tracked. resctrl miscounts the bandwidth events after an RMID transitions from the "Unavailable" state back to being tracked. This happens because when the hardware starts counting again after resetting the counter to zero, resctrl in turn compares the new count against the counter value stored from the previous time the RMID was tracked. This results in resctrl computing an event value that is either undercounting (when new counter is more than stored counter) or a mistaken overflow (when new counter is less than stored counter). Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to zero whenever the RMID is in the "Unavailable" state to ensure accurate counting after the RMID resets to zero when it starts to be tracked again. Example scenario that results in mistaken overflow ================================================== 1. The resctrl filesystem is mounted, and a task is assigned to a monitoring group. $mount -t resctrl resctrl /sys/fs/resctrl $mkdir /sys/fs/resctrl/mon_groups/test1/ $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 21323 <- Total bytes on domain 0 "Unavailable" <- Total bytes on domain 1 Task is running on domain 0. Counter on domain 1 is "Unavailable". 2. The task runs on domain 0 for a while and then moves to domain 1. The counter starts incrementing on domain 1. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 7345357 <- Total bytes on domain 0 4545 <- Total bytes on domain 1 3. At some point, the RMID in domain 0 transitions to the "Unavailable" state because the task is no longer executing in that domain. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes "Unavailable" <- Total bytes on domain 0 434341 <- Total bytes on domain 1 4. Since the task continues to migrate between domains, it may eventually return to domain 0. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 17592178699059 <- Overflow on domain 0 3232332 <- Total bytes on domain 1 In this case, the RMID on domain 0 transitions from "Unavailable" state to active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when the counter is read and begins tracking the RMID counting from 0. Subsequent reads succeed but return a value smaller than the previously saved MSR value (7345357). Consequently, the resctrl's overflow logic is triggered, it compares the previous value (7345357) with the new, smaller value and incorrectly interprets this as a counter overflow, adding a large delta. In reality, this is a false positive: the counter did not overflow but was simply reset when the RMID transitioned from "Unavailable" back to active state. Here is the text from APM [1] available from [2]. "In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the first QM_CTR read when it begins tracking an RMID that it was not previously tracking. The U bit will be zero for all subsequent reads from that RMID while it is still tracked by the hardware. Therefore, a QM_CTR read with the U bit set when that RMID is in use by a processor can be considered 0 when calculating the difference with a subsequent read." [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory Bandwidth (MBM). [ bp: Split commit message into smaller paragraph chunks for better consumption. ] Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature") Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Reinette Chatre Cc: stable@vger.kernel.org # needs adjustments for <= v6.17 Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] --- arch/x86/kernel/cpu/resctrl/monitor.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c8945610d455..2cd25a0d4637 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; u64 msr_val; u32 prmid; int ret; @@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); - if (ret) - return ret; - *val = get_corrected_val(r, d, rmid, eventid, msr_val); + if (!ret) { + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + } else if (ret == -EINVAL) { + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + am->prev_msr = 0; + } - return 0; + return ret; } static int __cntr_id_read(u32 cntr_id, u64 *val) From c282993ccd97ad627d213645dc485086de034647 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 13 Oct 2025 19:10:22 +0900 Subject: [PATCH 134/305] can: remove false statement about 1:1 mapping between DLC and length The CAN-FD section of can.rst still states that there is a 1:1 mapping between the Classical CAN DLC and its length. This is only true for the DLC values up to 8. Beyond that point, the length remains at 8. For reference, the mapping between the CAN DLC and the length is given in below table [1]: DLC value CBFF and CEFF FBFF and FEFF [decimal] [byte] [byte] ---------------------------------------------- 0 0 0 1 1 1 2 2 2 3 3 3 4 4 4 5 5 5 6 6 6 7 7 7 8 8 8 9 8 12 10 8 16 11 8 20 12 8 24 13 8 32 14 8 48 15 8 64 Remove the erroneous statement. Instead just state that the length of a Classical CAN frame ranges from 0 to 8. [1] ISO 11898-1:2024, Table 5 -- DLC: coding of the four LSB Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20251013-can-fd-doc-v2-1-5d53bdc8f2ad@kernel.org Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index 7650c4b5be5f..ccd321d29a8a 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -1398,10 +1398,9 @@ second bit timing has to be specified in order to enable the CAN FD bitrate. Additionally CAN FD capable CAN controllers support up to 64 bytes of payload. The representation of this length in can_frame.len and canfd_frame.len for userspace applications and inside the Linux network -layer is a plain value from 0 .. 64 instead of the CAN 'data length code'. -The data length code was a 1:1 mapping to the payload length in the Classical -CAN frames anyway. The payload length to the bus-relevant DLC mapping is -only performed inside the CAN drivers, preferably with the helper +layer is a plain value from 0 .. 64 instead of the Classical CAN length +which ranges from 0 to 8. The payload length to the bus-relevant DLC mapping +is only performed inside the CAN drivers, preferably with the helper functions can_fd_dlc2len() and can_fd_len2dlc(). The CAN netdevice driver capabilities can be distinguished by the network From b5746b3e8ea4a8a4df776e0864322028d4f5e4b1 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 13 Oct 2025 19:10:23 +0900 Subject: [PATCH 135/305] can: add Transmitter Delay Compensation (TDC) documentation Back in 2021, support for CAN TDC was added to the kernel in series [1] and in iproute2 in series [2]. However, the documentation was never updated. Add a new sub-section under CAN-FD driver support to document how to configure the TDC using the "ip tool". [1] add the netlink interface for CAN-FD Transmitter Delay Compensation (TDC) Link: https://lore.kernel.org/all/20210918095637.20108-1-mailhol.vincent@wanadoo.fr/ [2] iplink_can: cleaning, fixes and adding TDC support Link: https://lore.kernel.org/all/20211103164428.692722-1-mailhol.vincent@wanadoo.fr/ Signed-off-by: Vincent Mailhol Link: https://patch.msgid.link/20251013-can-fd-doc-v2-2-5d53bdc8f2ad@kernel.org Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.rst | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst index ccd321d29a8a..194e305ae973 100644 --- a/Documentation/networking/can.rst +++ b/Documentation/networking/can.rst @@ -1464,6 +1464,70 @@ Example when 'fd-non-iso on' is added on this switchable CAN FD adapter:: can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 +Transmitter Delay Compensation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +At high bit rates, the propagation delay from the TX pin to the RX pin of +the transceiver might become greater than the actual bit time causing +measurement errors: the RX pin would still be measuring the previous bit. + +The Transmitter Delay Compensation (thereafter, TDC) resolves this problem +by introducing a Secondary Sample Point (SSP) equal to the distance, in +minimum time quantum, from the start of the bit time on the TX pin to the +actual measurement on the RX pin. The SSP is calculated as the sum of two +configurable values: the TDC Value (TDCV) and the TDC offset (TDCO). + +TDC, if supported by the device, can be configured together with CAN-FD +using the ip tool's "tdc-mode" argument as follow: + +**omitted** + When no "tdc-mode" option is provided, the kernel will automatically + decide whether TDC should be turned on, in which case it will + calculate a default TDCO and use the TDCV as measured by the + device. This is the recommended method to use TDC. + +**"tdc-mode off"** + TDC is explicitly disabled. + +**"tdc-mode auto"** + The user must provide the "tdco" argument. The TDCV will be + automatically calculated by the device. This option is only + available if the device supports the TDC-AUTO CAN controller mode. + +**"tdc-mode manual"** + The user must provide both the "tdco" and "tdcv" arguments. This + option is only available if the device supports the TDC-MANUAL CAN + controller mode. + +Note that some devices may offer an additional parameter: "tdcf" (TDC Filter +window). If supported by your device, this can be added as an optional +argument to either "tdc-mode auto" or "tdc-mode manual". + +Example configuring a 500 kbit/s arbitration bitrate, a 5 Mbit/s data +bitrate, a TDCO of 15 minimum time quantum and a TDCV automatically measured +by the device:: + + $ ip link set can0 up type can bitrate 500000 \ + fd on dbitrate 4000000 \ + tdc-mode auto tdco 15 + $ ip -details link show can0 + 5: can0: mtu 72 qdisc pfifo_fast state UP \ + mode DEFAULT group default qlen 10 + link/can promiscuity 0 allmulti 0 minmtu 72 maxmtu 72 + can state ERROR-ACTIVE restart-ms 0 + bitrate 500000 sample-point 0.875 + tq 12 prop-seg 69 phase-seg1 70 phase-seg2 20 sjw 10 brp 1 + ES582.1/ES584.1: tseg1 2..256 tseg2 2..128 sjw 1..128 brp 1..512 \ + brp_inc 1 + dbitrate 4000000 dsample-point 0.750 + dtq 12 dprop-seg 7 dphase-seg1 7 dphase-seg2 5 dsjw 2 dbrp 1 + tdco 15 tdcf 0 + ES582.1/ES584.1: dtseg1 2..32 dtseg2 1..16 dsjw 1..8 dbrp 1..32 \ + dbrp_inc 1 + tdco 0..127 tdcf 0..127 + clock 80000000 + + Supported CAN Hardware ---------------------- From 93a27b5891b8194a8c083c9a80d2141d4bf47ba8 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 27 Sep 2025 21:11:16 +0900 Subject: [PATCH 136/305] can: j1939: add missing calls in NETDEV_UNREGISTER notification handler Currently NETDEV_UNREGISTER event handler is not calling j1939_cancel_active_session() and j1939_sk_queue_drop_all(). This will result in these calls being skipped when j1939_sk_release() is called. And I guess that the reason syzbot is still reporting unregister_netdevice: waiting for vcan0 to become free. Usage count = 2 is caused by lack of these calls. Calling j1939_cancel_active_session(priv, sk) from j1939_sk_release() can be covered by calling j1939_cancel_active_session(priv, NULL) from j1939_netdev_notify(). Calling j1939_sk_queue_drop_all() from j1939_sk_release() can be covered by calling j1939_sk_netdev_event_netdown() from j1939_netdev_notify(). Therefore, we can reuse j1939_cancel_active_session(priv, NULL) and j1939_sk_netdev_event_netdown(priv) for NETDEV_UNREGISTER event handler. Fixes: 7fcbe5b2c6a4 ("can: j1939: implement NETDEV_UNREGISTER notification handler") Signed-off-by: Tetsuo Handa Tested-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://patch.msgid.link/3ad3c7f8-5a74-4b07-a193-cb0725823558@I-love.SAKURA.ne.jp Signed-off-by: Marc Kleine-Budde --- net/can/j1939/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 3706a872ecaf..a93af55df5fd 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -378,6 +378,8 @@ static int j1939_netdev_notify(struct notifier_block *nb, j1939_ecu_unmap_all(priv); break; case NETDEV_UNREGISTER: + j1939_cancel_active_session(priv, NULL); + j1939_sk_netdev_event_netdown(priv); j1939_sk_netdev_event_unregister(priv); break; } From e5ae8d1eb08a3e27fff4ae264af4c8056d908639 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 12 Sep 2025 15:31:45 -0700 Subject: [PATCH 137/305] drm/xe: Increase global invalidation timeout to 1000us The previous timeout of 500us seems to be too small; panning the map in the Roll20 VTT in Firefox on a KDE/Wayland desktop reliably triggered timeouts within a few seconds of usage, causing the monitor to freeze and the following to be printed to dmesg: [Jul30 13:44] xe 0000:03:00.0: [drm] *ERROR* GT0: Global invalidation timeout [Jul30 13:48] xe 0000:03:00.0: [drm] *ERROR* [CRTC:82:pipe A] flip_done timed out I haven't hit a single timeout since increasing it to 1000us even after several multi-hour testing sessions. Fixes: 0dd2dd0182bc ("drm/xe: Move DSB l2 flush to a more sensible place") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5710 Signed-off-by: Kenneth Graunke Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Reviewed-by: Shuicheng Lin Link: https://lore.kernel.org/r/20250912223254.147940-1-kenneth@whitecape.org Signed-off-by: Lucas De Marchi (cherry picked from commit 146046907b56578263434107f5a7d5051847c459) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2883b39c9b37..34d33965eac2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -1070,7 +1070,7 @@ void xe_device_l2_flush(struct xe_device *xe) spin_lock(>->global_invl_lock); xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); - if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) + if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); spin_unlock(>->global_invl_lock); From 7ac74613e5f2ef3450f44fd2127198662c2563a9 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Oct 2025 04:06:18 -0700 Subject: [PATCH 138/305] drm/xe: Don't allow evicting of BOs in same VM in array of VM binds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An array of VM binds can potentially evict other buffer objects (BOs) within the same VM under certain conditions, which may lead to NULL pointer dereferences later in the bind pipeline. To prevent this, clear the allow_res_evict flag in the xe_bo_validate call. v2: - Invert polarity of no_res_evict (Thomas) - Add comment in code explaining issue (Thomas) Cc: stable@vger.kernel.org Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6268 Fixes: 774b5fa509a9 ("drm/xe: Avoid evicting object of the same vm in none fault mode") Fixes: 77f2ef3f16f5 ("drm/xe: Lock all gpuva ops during VM bind IOCTL") Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Tested-by: Paulo Zanoni Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20251009110618.3481870-1-matthew.brost@intel.com (cherry picked from commit 8b9ba8d6d95fe75fed6b0480bb03da4b321bea08) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 32 +++++++++++++++++++++++--------- drivers/gpu/drm/xe/xe_vm_types.h | 1 + 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 027e6ce648c5..f602b874e054 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2832,7 +2832,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, } static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) + bool res_evict, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); @@ -2843,7 +2843,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) err = xe_bo_validate(bo, vm, - !xe_vm_in_preempt_fence_mode(vm), exec); + !xe_vm_in_preempt_fence_mode(vm) && + res_evict, exec); } return err; @@ -2913,14 +2914,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) } static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) + struct xe_vma_ops *vops, struct xe_vma_op *op) { int err = 0; + bool res_evict; + + /* + * We only allow evicting a BO within the VM if it is not part of an + * array of binds, as an array of binds can evict another BO within the + * bind. + */ + res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS); switch (op->base.op) { case DRM_GPUVA_OP_MAP: if (!op->map.invalidate_on_bind) err = vma_lock_and_validate(exec, op->map.vma, + res_evict, !xe_vm_in_fault_mode(vm) || op->map.immediate); break; @@ -2931,11 +2941,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.remap.unmap->va), - false); + res_evict, false); if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); + err = vma_lock_and_validate(exec, op->remap.prev, + res_evict, true); if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); + err = vma_lock_and_validate(exec, op->remap.next, + res_evict, true); break; case DRM_GPUVA_OP_UNMAP: err = check_ufence(gpuva_to_vma(op->base.unmap.va)); @@ -2944,7 +2956,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.unmap.va), - false); + res_evict, false); break; case DRM_GPUVA_OP_PREFETCH: { @@ -2959,7 +2971,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, err = vma_lock_and_validate(exec, gpuva_to_vma(op->base.prefetch.va), - false); + res_evict, false); if (!err && !xe_vma_has_no_bo(vma)) err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region], @@ -3005,7 +3017,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, return err; list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); + err = op_lock_and_prep(exec, vm, vops, op); if (err) return err; } @@ -3638,6 +3650,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); + if (args->num_binds > 1) + vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS; for (i = 0; i < args->num_binds; ++i) { u64 range = bind_ops[i].range; u64 addr = bind_ops[i].addr; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index da39940501d8..413353e1c225 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -476,6 +476,7 @@ struct xe_vma_ops { /** @flag: signify the properties within xe_vma_ops*/ #define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0) #define XE_VMA_OPS_FLAG_MADVISE BIT(1) +#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2) u32 flags; #ifdef TEST_VM_OPS_ERROR /** @inject_error: inject error to test error handling */ From d30203739be798d3de5c84db3060e96f00c54e82 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 18 Sep 2025 13:58:57 -0700 Subject: [PATCH 139/305] drm/xe: Move rebar to be done earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There may be cases in which the BAR0 also needs to move to accommodate the bigger BAR2. However if it's not released, the BAR2 resize fails. During the vram probe it can't be released as it's already in use by xe_mmio for early register access. Add a new function in xe_vram and let xe_pci call it directly before even early device probe. This allows the BAR2 to resize in cases BAR0 also needs to move, assuming there aren't other reasons to hold that move: [] xe 0000:03:00.0: vgaarb: deactivate vga console [] xe 0000:03:00.0: [drm] Attempting to resize bar from 8192MiB -> 16384MiB [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: releasing [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x41ffffffff 64bit pref]: releasing [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 2 [mem 0x4000000000-0x43ffffffff 64bit pref]: assigned [] xe 0000:03:00.0: BAR 0 [mem 0x83000000-0x83ffffff 64bit]: assigned [] pcieport 0000:00:01.0: PCI bridge to [bus 01-04] [] pcieport 0000:00:01.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:00:01.0: bridge window [mem 0x4000000000-0x44007fffff 64bit pref] [] pcieport 0000:01:00.0: PCI bridge to [bus 02-04] [] pcieport 0000:01:00.0: bridge window [mem 0x83000000-0x840fffff] [] pcieport 0000:01:00.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] pcieport 0000:02:01.0: PCI bridge to [bus 03] [] pcieport 0000:02:01.0: bridge window [mem 0x83000000-0x83ffffff] [] pcieport 0000:02:01.0: bridge window [mem 0x4000000000-0x43ffffffff 64bit pref] [] xe 0000:03:00.0: [drm] BAR2 resized to 16384M [] xe 0000:03:00.0: [drm:xe_pci_probe [xe]] BATTLEMAGE e221:0000 dgfx:1 gfx:Xe2_HPG (20.02) ... For BMG there are additional fix needed in the PCI side, but this helps getting it to a working resize. All the rebar logic is more pci-specific than xe-specific and can be done very early in the probe sequence. In future it would be good to move it out of xe_vram.c, but this refactor is left for later. Cc: Ilpo Järvinen Cc: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/intel-xe/fafda2a3-fc63-ce97-d22b-803f771a4d19@linux.intel.com Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250918-xe-pci-rebar-2-v1-2-6c094702a074@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 45e33f220fd625492c11e15733d8e9b4f9db82a4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ drivers/gpu/drm/xe/xe_vram.c | 34 ++++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_vram.h | 1 + 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index be91343829dd..9a6df79fc5b6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -867,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; + xe_vram_resize_bar(xe); + err = xe_device_probe_early(xe); /* * In Boot Survivability mode, no drm card is exposed and driver diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c index b44ebf50fedb..652df7a5f4f6 100644 --- a/drivers/gpu/drm/xe/xe_vram.c +++ b/drivers/gpu/drm/xe/xe_vram.c @@ -26,15 +26,35 @@ #define BAR_SIZE_SHIFT 20 -static void -_resize_bar(struct xe_device *xe, int resno, resource_size_t size) +/* + * Release all the BARs that could influence/block LMEMBAR resizing, i.e. + * assigned IORESOURCE_MEM_64 BARs + */ +static void release_bars(struct pci_dev *pdev) +{ + struct resource *res; + int i; + + pci_dev_for_each_resource(pdev, res, i) { + /* Resource already un-assigned, do not reset it */ + if (!res->parent) + continue; + + /* No need to release unrelated BARs */ + if (!(res->flags & IORESOURCE_MEM_64)) + continue; + + pci_release_resource(pdev, i); + } +} + +static void resize_bar(struct xe_device *xe, int resno, resource_size_t size) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int bar_size = pci_rebar_bytes_to_size(size); int ret; - if (pci_resource_len(pdev, resno)) - pci_release_resource(pdev, resno); + release_bars(pdev); ret = pci_resize_resource(pdev, resno, bar_size); if (ret) { @@ -50,7 +70,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size) * if force_vram_bar_size is set, attempt to set to the requested size * else set to maximum possible size */ -static void resize_vram_bar(struct xe_device *xe) +void xe_vram_resize_bar(struct xe_device *xe) { int force_vram_bar_size = xe_modparam.force_vram_bar_size; struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -119,7 +139,7 @@ static void resize_vram_bar(struct xe_device *xe) pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); - _resize_bar(xe, LMEM_BAR, rebar_size); + resize_bar(xe, LMEM_BAR, rebar_size); pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); @@ -148,8 +168,6 @@ static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region * return -ENXIO; } - resize_vram_bar(xe); - lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR); lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR); if (!lmem_bar->io_size) diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h index 72860f714fc6..13505cfb184d 100644 --- a/drivers/gpu/drm/xe/xe_vram.h +++ b/drivers/gpu/drm/xe/xe_vram.h @@ -11,6 +11,7 @@ struct xe_device; struct xe_vram_region; +void xe_vram_resize_bar(struct xe_device *xe); int xe_vram_probe(struct xe_device *xe); struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement); From 1117e7d1e8e66bf7e40291178b829a8513f83a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 10 Sep 2025 18:09:39 +0200 Subject: [PATCH 140/305] drm/xe/migrate: Fix an error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The exhaustive eviction accidently changed an error path goto to a return. Fix this. Fixes: 59eabff2a352 ("drm/xe: Convert xe_bo_create_pin_map() for exhaustive eviction") Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://lore.kernel.org/r/20250910160939.103473-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 381f1ed15159c4b3f00dd37cc70924dedebeb111) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 1d667fa36cf3..569869a2b339 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -434,7 +434,7 @@ int xe_migrate_init(struct xe_migrate *m) err = xe_migrate_lock_prepare_vm(tile, m, vm); if (err) - return err; + goto err_out; if (xe->info.has_usm) { struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt, From 7413e9f2be6b2b0caff9c517efa123d988914bba Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Oct 2025 06:06:29 -0700 Subject: [PATCH 141/305] drm/xe: Handle mixed mappings and existing VRAM on atomic faults Moving to VRAM will fail if mixed mappings are present or if the page is already located in VRAM. Atomic faults that require a move to VRAM currently retry without attempting to evict mixed mappings or locate existing VRAM mappings. This patch fixes the issue by attempting to evict mixed mappings or find existing VRAM pages when a move to VRAM fails during atomic fault handling. Fixes: a9ac0fa455b0 ("drm/xe: Strict migration policy for atomic SVM faults") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251009130629.3531962-1-matthew.brost@intel.com (cherry picked from commit 75188605c56d10c1bd3b1cd94f4872f349c3a9c8) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 7e2db71ff34e..b268ee0d2271 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1073,7 +1073,17 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, drm_dbg(&vm->xe->drm, "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", vm->usm.asid, ERR_PTR(err)); - goto retry; + + /* + * In the devmem-only case, mixed mappings may + * be found. The get_pages function will fix + * these up to a single location, allowing the + * page fault handler to make forward progress. + */ + if (ctx.devmem_only) + goto get_pages; + else + goto retry; } else { drm_err(&vm->xe->drm, "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", @@ -1083,6 +1093,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, } } +get_pages: get_pages_start = xe_svm_stats_ktime_get(); range_debug(range, "GET PAGES"); From 1852d27aa998272696680607b65a2ceac966104e Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 9 Oct 2025 18:10:47 -0700 Subject: [PATCH 142/305] drm/xe: Enable media sampler power gating Where applicable, enable media sampler power gating. Also, add it to the powergate_info debugfs. v2: Remove the sampler powergate status since it is cleared quickly anyway. v3: Use vcs mask (Rodrigo) and fix the version check for media v4: Remove extra spaces v5: Media samplers are independent of vcs mask, use Media version 1255 (Matt Roper) Fixes: 38e8c4184ea0 ("drm/xe: Enable Coarse Power Gating") Cc: Rodrigo Vivi Cc: Matt Roper Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20251010011047.2047584-1-vinay.belgaumkar@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 4cbc08649a54c3d533df9832342d52d409dfbbf0) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_gt_idle.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 06cb6b02ec64..51f2a03847f9 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -342,6 +342,7 @@ #define POWERGATE_ENABLE XE_REG(0xa210) #define RENDER_POWERGATE_ENABLE REG_BIT(0) #define MEDIA_POWERGATE_ENABLE REG_BIT(1) +#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2) #define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) #define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index f8950a52d0a4..bdc9d9877ec4 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -124,6 +124,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE; + if (xe->info.platform != XE_DG1) { for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) @@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, str_up_down(pg_status & media_slices[n].status_bit)); } + + if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255) + drm_printf(p, "Media Samplers Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE)); + return 0; } From 9f64b3cd051b825de0a2a9f145c8e003200cedd5 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 10 Oct 2025 17:25:29 +0000 Subject: [PATCH 143/305] drm/xe/guc: Check GuC running state before deregistering exec queue In normal operation, a registered exec queue is disabled and deregistered through the GuC, and freed only after the GuC confirms completion. However, if the driver is forced to unbind while the exec queue is still running, the user may call exec_destroy() after the GuC has already been stopped and CT communication disabled. In this case, the driver cannot receive a response from the GuC, preventing proper cleanup of exec queue resources. Fix this by directly releasing the resources when GuC is not running. Here is the failure dmesg log: " [ 468.089581] ---[ end trace 0000000000000000 ]--- [ 468.089608] pci 0000:03:00.0: [drm] *ERROR* GT0: GUC ID manager unclean (1/65535) [ 468.090558] pci 0000:03:00.0: [drm] GT0: total 65535 [ 468.090562] pci 0000:03:00.0: [drm] GT0: used 1 [ 468.090564] pci 0000:03:00.0: [drm] GT0: range 1..1 (1) [ 468.092716] ------------[ cut here ]------------ [ 468.092719] WARNING: CPU: 14 PID: 4775 at drivers/gpu/drm/xe/xe_ttm_vram_mgr.c:298 ttm_vram_mgr_fini+0xf8/0x130 [xe] " v2: use xe_uc_fw_is_running() instead of xe_guc_ct_enabled(). As CT may go down and come back during VF migration. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20251010172529.2967639-2-shuicheng.lin@intel.com (cherry picked from commit 9b42321a02c50a12b2beb6ae9469606257fbecea) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_submit.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 53024eb5670b..94ed8159496f 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -44,6 +44,7 @@ #include "xe_ring_ops_types.h" #include "xe_sched_job.h" #include "xe_trace.h" +#include "xe_uc_fw.h" #include "xe_vm.h" static struct xe_guc * @@ -1489,7 +1490,17 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); - if (exec_queue_registered(q)) + /* + * Expected state transitions for cleanup: + * - If the exec queue is registered and GuC firmware is running, we must first + * disable scheduling and deregister the queue to ensure proper teardown and + * resource release in the GuC, then destroy the exec queue on driver side. + * - If the GuC is already stopped (e.g., during driver unload or GPU reset), + * we cannot expect a response for the deregister request. In this case, + * it is safe to directly destroy the exec queue on driver side, as the GuC + * will not process further requests and all resources must be cleaned up locally. + */ + if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw)) disable_scheduling_deregister(guc, q); else __guc_exec_queue_destroy(guc, q); From 7e5a5983edda664e8e4bb20af17b80f5135c655c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 24 Sep 2025 16:10:38 +0100 Subject: [PATCH 144/305] btrfs: fix clearing of BTRFS_FS_RELOC_RUNNING if relocation already running When starting relocation, at reloc_chunk_start(), if we happen to find the flag BTRFS_FS_RELOC_RUNNING is already set we return an error (-EINPROGRESS) to the callers, however the callers call reloc_chunk_end() which will clear the flag BTRFS_FS_RELOC_RUNNING, which is wrong since relocation was started by another task and still running. Finding the BTRFS_FS_RELOC_RUNNING flag already set is an unexpected scenario, but still our current behaviour is not correct. Fix this by never calling reloc_chunk_end() if reloc_chunk_start() has returned an error, which is what logically makes sense, since the general widespread pattern is to have end functions called only if the counterpart start functions succeeded. This requires changing reloc_chunk_start() to clear BTRFS_FS_RELOC_RUNNING if there's a pending cancel request. Fixes: 907d2710d727 ("btrfs: add cancellable chunk relocation support") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Boris Burkov Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8dd8de6b9fb8..0765e06d00b8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3780,6 +3780,7 @@ static noinline_for_stack struct inode *create_reloc_inode( /* * Mark start of chunk relocation that is cancellable. Check if the cancellation * has been requested meanwhile and don't start in that case. + * NOTE: if this returns an error, reloc_chunk_end() must not be called. * * Return: * 0 success @@ -3796,10 +3797,8 @@ static int reloc_chunk_start(struct btrfs_fs_info *fs_info) if (atomic_read(&fs_info->reloc_cancel_req) > 0) { btrfs_info(fs_info, "chunk relocation canceled on start"); - /* - * On cancel, clear all requests but let the caller mark - * the end after cleanup operations. - */ + /* On cancel, clear all requests. */ + clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags); atomic_set(&fs_info->reloc_cancel_req, 0); return -ECANCELED; } @@ -3808,9 +3807,11 @@ static int reloc_chunk_start(struct btrfs_fs_info *fs_info) /* * Mark end of chunk relocation that is cancellable and wake any waiters. + * NOTE: call only if a previous call to reloc_chunk_start() succeeded. */ static void reloc_chunk_end(struct btrfs_fs_info *fs_info) { + ASSERT(test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)); /* Requested after start, clear bit first so any waiters can continue */ if (atomic_read(&fs_info->reloc_cancel_req) > 0) btrfs_info(fs_info, "chunk relocation canceled during operation"); @@ -4023,9 +4024,9 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start, if (err && rw) btrfs_dec_block_group_ro(rc->block_group); iput(rc->data_inode); + reloc_chunk_end(fs_info); out_put_bg: btrfs_put_block_group(bg); - reloc_chunk_end(fs_info); free_reloc_control(rc); return err; } @@ -4208,8 +4209,8 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info) ret = ret2; out_unset: unset_reloc_control(rc); -out_end: reloc_chunk_end(fs_info); +out_end: free_reloc_control(rc); out: free_reloc_roots(&reloc_roots); From 53a4acbfc1de85fa637521ffab4f4e2ee03cbeeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Thu, 25 Sep 2025 20:41:39 +0200 Subject: [PATCH 145/305] btrfs: fix memory leak on duplicated memory in the qgroup assign ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 'btrfs_ioctl_qgroup_assign' we first duplicate the argument as provided by the user, which is kfree'd in the end. But this was not the case when allocating memory for 'prealloc'. In this case, if it somehow failed, then the previous code would go directly into calling 'mnt_drop_write_file', without freeing the string duplicated from the user space. Fixes: 4addc1ffd67a ("btrfs: qgroup: preallocate memory before adding a relation") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Boris Burkov Reviewed-by: Filipe Manana Signed-off-by: Miquel Sabaté Solà Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a454b5ba2097..938286bee6a8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3740,7 +3740,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); if (!prealloc) { ret = -ENOMEM; - goto drop_write; + goto out; } } From b7fdfd29a136a17c5c8ad9e9bbf89c48919c3d19 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 26 Sep 2025 14:20:11 +0930 Subject: [PATCH 146/305] btrfs: only set the device specific options after devices are opened [BUG] With v6.17-rc kernels, btrfs will always set 'ssd' mount option even if the block device is not a rotating one: # cat /sys/block/sdd/queue/rotational 1 # cat /etc/fstab: LABEL=DATA2 /data2 btrfs rw,relatime,space_cache=v2,subvolid=5,subvol=/,nofail,nosuid,nodev 0 0 # mount [...] /dev/sdd on /data2 type btrfs (rw,nosuid,nodev,relatime,ssd,space_cache=v2,subvolid=5,subvol=/) [CAUSE] The 'ssd' mount option is set by set_device_specific_options(), and it expects that if there is any rotating device in the btrfs, it will set fs_devices::rotating. However after commit bddf57a70781 ("btrfs: delay btrfs_open_devices() until super block is created"), the device opening is delayed until the super block is created. But the timing of set_device_specific_options() is still left as is, this makes the function be called without any device opened. Since no device is opened, thus fs_devices::rotating will never be set, making btrfs incorrectly set 'ssd' mount option. [FIX] Only call set_device_specific_options() after btrfs_open_devices(). Also only call set_device_specific_options() after a new mount, if we're mounting a mounted btrfs, there is no need to set the device specific mount options again. Reported-by: HAN Yuwei Link: https://lore.kernel.org/linux-btrfs/C8FF75669DFFC3C5+5f93bf8a-80a0-48a6-81bf-4ec890abc99a@bupt.moe/ Fixes: bddf57a70781 ("btrfs: delay btrfs_open_devices() until super block is created") CC: stable@vger.kernel.org # 6.17 Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d6e496436539..aadc02374b2a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1900,8 +1900,6 @@ static int btrfs_get_tree_super(struct fs_context *fc) return PTR_ERR(sb); } - set_device_specific_options(fs_info); - if (sb->s_root) { /* * Not the first mount of the fs thus got an existing super block. @@ -1946,6 +1944,7 @@ static int btrfs_get_tree_super(struct fs_context *fc) deactivate_locked_super(sb); return -EACCES; } + set_device_specific_options(fs_info); bdev = fs_devices->latest_dev->bdev; snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id); From 42d3a055d946878a327ee030f0e0c7df0f0f15c8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 30 Sep 2025 07:54:30 +0930 Subject: [PATCH 147/305] btrfs: do not use folio_test_partial_kmap() in ASSERT()s [BUG] Syzbot reported an ASSERT() triggered inside scrub: BTRFS info (device loop0): scrub: started on devid 1 assertion failed: !folio_test_partial_kmap(folio) :: 0, in fs/btrfs/scrub.c:697 ------------[ cut here ]------------ kernel BUG at fs/btrfs/scrub.c:697! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 6077 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025 RIP: 0010:scrub_stripe_get_kaddr+0x1bb/0x1c0 fs/btrfs/scrub.c:697 Call Trace: scrub_bio_add_sector fs/btrfs/scrub.c:932 [inline] scrub_submit_initial_read+0xf21/0x1120 fs/btrfs/scrub.c:1897 submit_initial_group_read+0x423/0x5b0 fs/btrfs/scrub.c:1952 flush_scrub_stripes+0x18f/0x1150 fs/btrfs/scrub.c:1973 scrub_stripe+0xbea/0x2a30 fs/btrfs/scrub.c:2516 scrub_chunk+0x2a3/0x430 fs/btrfs/scrub.c:2575 scrub_enumerate_chunks+0xa70/0x1350 fs/btrfs/scrub.c:2839 btrfs_scrub_dev+0x6e7/0x10e0 fs/btrfs/scrub.c:3153 btrfs_ioctl_scrub+0x249/0x4b0 fs/btrfs/ioctl.c:3163 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f ---[ end trace 0000000000000000 ]--- Which doesn't make much sense, as all the folios we allocated for scrub should not be highmem. [CAUSE] Thankfully syzbot has a detailed kernel config file, showing that CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP is set to y. And that debug option will force all folio_test_partial_kmap() to return true, to improve coverage on highmem tests. But in our case we really just want to make sure the folios we allocated are not highmem (and they are indeed not). Such incorrect result from folio_test_partial_kmap() is just screwing up everything. [FIX] Replace folio_test_partial_kmap() to folio_test_highmem() so that we won't bother those highmem specific debuging options. Fixes: 5fbaae4b8567 ("btrfs: prepare scrub to support bs > ps cases") Reported-by: syzbot+bde59221318c592e6346@syzkaller.appspotmail.com Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4691d0bdb2e8..651b11884f82 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -694,7 +694,7 @@ static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr) /* stripe->folios[] is allocated by us and no highmem is allowed. */ ASSERT(folio); - ASSERT(!folio_test_partial_kmap(folio)); + ASSERT(!folio_test_highmem(folio)); return folio_address(folio) + offset_in_folio(folio, offset); } @@ -707,7 +707,7 @@ static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int secto /* stripe->folios[] is allocated by us and no highmem is allowed. */ ASSERT(folio); - ASSERT(!folio_test_partial_kmap(folio)); + ASSERT(!folio_test_highmem(folio)); /* And the range must be contained inside the folio. */ ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio)); return page_to_phys(folio_page(folio, 0)) + offset_in_folio(folio, offset); From a5a51bf4e9b7354ce7cd697e610d72c1b33fd949 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 1 Oct 2025 11:08:13 +0100 Subject: [PATCH 148/305] btrfs: do not assert we found block group item when creating free space tree Currently, when building a free space tree at populate_free_space_tree(), if we are not using the block group tree feature, we always expect to find block group items (either extent items or a block group item with key type BTRFS_BLOCK_GROUP_ITEM_KEY) when we search the extent tree with btrfs_search_slot_for_read(), so we assert that we found an item. However this expectation is wrong since we can have a new block group created in the current transaction which is still empty and for which we still have not added the block group's item to the extent tree, in which case we do not have any items in the extent tree associated to the block group. The insertion of a new block group's block group item in the extent tree happens at btrfs_create_pending_block_groups() when it calls the helper insert_block_group_item(). This typically is done when a transaction handle is released, committed or when running delayed refs (either as part of a transaction commit or when serving tickets for space reservation if we are low on free space). So remove the assertion at populate_free_space_tree() even when the block group tree feature is not enabled and update the comment to mention this case. Syzbot reported this with the following stack trace: BTRFS info (device loop3 state M): rebuilding free space tree assertion failed: ret == 0 :: 0, in fs/btrfs/free-space-tree.c:1115 ------------[ cut here ]------------ kernel BUG at fs/btrfs/free-space-tree.c:1115! Oops: invalid opcode: 0000 [#1] SMP KASAN PTI CPU: 1 UID: 0 PID: 6352 Comm: syz.3.25 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025 RIP: 0010:populate_free_space_tree+0x700/0x710 fs/btrfs/free-space-tree.c:1115 Code: ff ff e8 d3 (...) RSP: 0018:ffffc9000430f780 EFLAGS: 00010246 RAX: 0000000000000043 RBX: ffff88805b709630 RCX: fea61d0e2e79d000 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffffc9000430f8b0 R08: ffffc9000430f4a7 R09: 1ffff92000861e94 R10: dffffc0000000000 R11: fffff52000861e95 R12: 0000000000000001 R13: 1ffff92000861f00 R14: dffffc0000000000 R15: 0000000000000000 FS: 00007f424d9fe6c0(0000) GS:ffff888125afc000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd78ad212c0 CR3: 0000000076d68000 CR4: 00000000003526f0 Call Trace: btrfs_rebuild_free_space_tree+0x1ba/0x6d0 fs/btrfs/free-space-tree.c:1364 btrfs_start_pre_rw_mount+0x128f/0x1bf0 fs/btrfs/disk-io.c:3062 btrfs_remount_rw fs/btrfs/super.c:1334 [inline] btrfs_reconfigure+0xaed/0x2160 fs/btrfs/super.c:1559 reconfigure_super+0x227/0x890 fs/super.c:1076 do_remount fs/namespace.c:3279 [inline] path_mount+0xd1a/0xfe0 fs/namespace.c:4027 do_mount fs/namespace.c:4048 [inline] __do_sys_mount fs/namespace.c:4236 [inline] __se_sys_mount+0x313/0x410 fs/namespace.c:4213 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f424e39066a Code: d8 64 89 02 (...) RSP: 002b:00007f424d9fde68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 00007f424d9fdef0 RCX: 00007f424e39066a RDX: 0000200000000180 RSI: 0000200000000380 RDI: 0000000000000000 RBP: 0000200000000180 R08: 00007f424d9fdef0 R09: 0000000000000020 R10: 0000000000000020 R11: 0000000000000246 R12: 0000200000000380 R13: 00007f424d9fdeb0 R14: 0000000000000000 R15: 00002000000002c0 Modules linked in: ---[ end trace 0000000000000000 ]--- Reported-by: syzbot+884dc4621377ba579a6f@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/68dc3dab.a00a0220.102ee.004e.GAE@google.com/ Fixes: a5ed91828518 ("Btrfs: implement the free space B-tree") CC: # 6.1.x: 1961d20f6fa8: btrfs: fix assertion when building free space tree CC: # 6.1.x Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/free-space-tree.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index dad0b492a663..d86541073d42 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1106,14 +1106,15 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, * If ret is 1 (no key found), it means this is an empty block group, * without any extents allocated from it and there's no block group * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree - * because we are using the block group tree feature, so block group - * items are stored in the block group tree. It also means there are no - * extents allocated for block groups with a start offset beyond this - * block group's end offset (this is the last, highest, block group). + * because we are using the block group tree feature (so block group + * items are stored in the block group tree) or this is a new block + * group created in the current transaction and its block group item + * was not yet inserted in the extent tree (that happens in + * btrfs_create_pending_block_groups() -> insert_block_group_item()). + * It also means there are no extents allocated for block groups with a + * start offset beyond this block group's end offset (this is the last, + * highest, block group). */ - if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) - ASSERT(ret == 0); - start = block_group->start; end = block_group->start + block_group->length; while (ret == 0) { From 8ab2fa69691b2913a67f3c54fbb991247b3755be Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 30 Sep 2025 21:05:17 -0700 Subject: [PATCH 149/305] btrfs: fix incorrect readahead expansion length The intent of btrfs_readahead_expand() was to expand to the length of the current compressed extent being read. However, "ram_bytes" is *not* that, in the case where a single physical compressed extent is used for multiple file extents. Consider this case with a large compressed extent C and then later two non-compressed extents N1 and N2 written over C, leaving C1 and C2 pointing to offset/len pairs of C: [ C ] [ N1 ][ C1 ][ N2 ][ C2 ] In such a case, ram_bytes for both C1 and C2 is the full uncompressed length of C. So starting readahead in C1 will expand the readahead past the end of C1, past N2, and into C2. This will then expand readahead again, to C2_start + ram_bytes, way past EOF. First of all, this is totally undesirable, we don't want to read the whole file in arbitrary chunks of the large underlying extent if it happens to exist. Secondly, it results in zeroing the range past the end of C2 up to ram_bytes. This is particularly unpleasant with fs-verity as it can zero and set uptodate pages in the verity virtual space past EOF. This incorrect readahead behavior can lead to verity verification errors, if we iterate in a way that happens to do the wrong readahead. Fix this by using em->len for readahead expansion, not em->ram_bytes, resulting in the expected behavior of stopping readahead at the extent boundary. Reported-by: Max Chernoff Link: https://bugzilla.redhat.com/show_bug.cgi?id=2399898 Fixes: 9e9ff875e417 ("btrfs: use readahead_expand() on compressed extents") CC: stable@vger.kernel.org # 6.17 Reviewed-by: Filipe Manana Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c123a3ef154a..755ec6dfd51c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -973,7 +973,7 @@ static void btrfs_readahead_expand(struct readahead_control *ractl, { const u64 ra_pos = readahead_pos(ractl); const u64 ra_end = ra_pos + readahead_length(ractl); - const u64 em_end = em->start + em->ram_bytes; + const u64 em_end = em->start + em->len; /* No expansion for holes and inline extents. */ if (em->disk_bytenr > EXTENT_MAP_LAST_BYTE) From fec9b9d3ced39f16be8d7afdf81f4dd2653da319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miquel=20Sabat=C3=A9=20Sol=C3=A0?= Date: Wed, 8 Oct 2025 14:18:59 +0200 Subject: [PATCH 150/305] btrfs: fix memory leaks when rejecting a non SINGLE data profile without an RST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the end of btrfs_load_block_group_zone_info() the first thing we do is to ensure that if the mapping type is not a SINGLE one and there is no RAID stripe tree, then we return early with an error. Doing that, though, prevents the code from running the last calls from this function which are about freeing memory allocated during its run. Hence, in this case, instead of returning early, we set the ret value and fall through the rest of the cleanup code. Fixes: 5906333cc4af ("btrfs: zoned: don't skip block group profile checks on conventional zones") CC: stable@vger.kernel.org # 6.8+ Reviewed-by: Johannes Thumshirn Signed-off-by: Miquel Sabaté Solà Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index e3341a84f4ab..838149fa60ce 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1753,7 +1753,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", btrfs_bg_type_to_raid_name(map->type)); - return -EINVAL; + ret = -EINVAL; } if (unlikely(cache->alloc_offset > cache->zone_capacity)) { From e92c2941204de7b62e9c2deecfeb9eaefe54a22a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Oct 2025 18:08:58 +0300 Subject: [PATCH 151/305] btrfs: tree-checker: fix bounds check in check_inode_extref() The parentheses for the unlikely() annotation were put in the wrong place so it means that the condition is basically never true and the bounds checking is skipped. Fixes: aab9458b9f00 ("btrfs: tree-checker: add inode extref checks") Signed-off-by: Dan Carpenter Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ca30b15ea452..c10b4c242acf 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1797,7 +1797,7 @@ static int check_inode_extref(struct extent_buffer *leaf, struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr; u16 namelen; - if (unlikely(ptr + sizeof(*extref)) > end) { + if (unlikely(ptr + sizeof(*extref) > end)) { inode_ref_err(leaf, slot, "inode extref overflow, ptr %lu end %lu inode_extref size %zu", ptr, end, sizeof(*extref)); From 8aec9dbf2db2e958de5bd20e23b8fbb8f2aa1fa6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 3 Oct 2025 15:11:06 +0100 Subject: [PATCH 152/305] btrfs: send: fix -Wflex-array-member-not-at-end warning in struct send_ctx The warning -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Fix the following warning: fs/btrfs/send.c:181:24: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] and move the declaration of send_ctx::cur_inode_path to the end. Notice that struct fs_path contains a flexible array member inline_buf, but also a padding array and a limit calculated for the usable space of inline_buf (FS_PATH_INLINE_SIZE). It is not the pattern where flexible array is in the middle of a structure and could potentially overwrite other members. Signed-off-by: Gustavo A. R. Silva Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/send.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9230e5066fc6..6144e66661f5 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -178,7 +178,6 @@ struct send_ctx { u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 cur_inode_next_write_offset; - struct fs_path cur_inode_path; bool cur_inode_new; bool cur_inode_new_gen; bool cur_inode_deleted; @@ -305,6 +304,9 @@ struct send_ctx { struct btrfs_lru_cache dir_created_cache; struct btrfs_lru_cache dir_utimes_cache; + + /* Must be last as it ends in a flexible-array member. */ + struct fs_path cur_inode_path; }; struct pending_dir_move { From a375246fcf2bbdaeb1df7fa7ee5a8b884a89085e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 9 Oct 2025 08:40:01 -0700 Subject: [PATCH 153/305] cxl/features: Add check for no entries in cxl_feature_info cxl EDAC calls cxl_feature_info() to get the feature information and if the hardware has no Features support, cxlfs may be passed in as NULL. [ 51.957498] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 51.965571] #PF: supervisor read access in kernel mode [ 51.971559] #PF: error_code(0x0000) - not-present page [ 51.977542] PGD 17e4f6067 P4D 0 [ 51.981384] Oops: Oops: 0000 [#1] SMP NOPTI [ 51.986300] CPU: 49 UID: 0 PID: 3782 Comm: systemd-udevd Not tainted 6.17.0dj test+ #64 PREEMPT(voluntary) [ 51.997355] Hardware name: [ 52.009790] RIP: 0010:cxl_feature_info+0xa/0x80 [cxl_core] Add a check for cxlfs before dereferencing it and return -EOPNOTSUPP if there is no cxlfs created due to no hardware support. Fixes: eb5dfcb9e36d ("cxl: Add support to handle user feature commands for set feature") Reviewed-by: Davidlohr Bueso Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang --- drivers/cxl/core/features.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index 7c750599ea69..4bc484b46f43 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -371,6 +371,9 @@ cxl_feature_info(struct cxl_features_state *cxlfs, { struct cxl_feat_entry *feat; + if (!cxlfs || !cxlfs->entries) + return ERR_PTR(-EOPNOTSUPP); + for (int i = 0; i < cxlfs->entries->num_features; i++) { feat = &cxlfs->entries->ent[i]; if (uuid_equal(uuid, &feat->uuid)) From f25785f9b088ed65089dd0d0034da52858417839 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sun, 5 Oct 2025 23:48:05 -0400 Subject: [PATCH 154/305] x86/mm: Fix overflow in __cpa_addr() The change to have cpa_flush() call flush_kernel_pages() introduced a bug where __cpa_addr() can access an address one larger than the largest one in the cpa->pages array. KASAN reports the issue like this: BUG: KASAN: slab-out-of-bounds in __cpa_addr arch/x86/mm/pat/set_memory.c:309 [inline] BUG: KASAN: slab-out-of-bounds in __cpa_addr+0x1d3/0x220 arch/x86/mm/pat/set_memory.c:306 Read of size 8 at addr ffff88801f75e8f8 by task syz.0.17/5978 This bug could cause cpa_flush() to not properly flush memory, which somehow never showed any symptoms in my tests, possibly because cpa_flush() is called so rarely, but could potentially cause issues for other people. Fix the issue by directly calculating the flush end address from the start address. Fixes: 86e6815b316e ("x86/mm: Change cpa_flush() to call flush_kernel_range() directly") Reported-by: syzbot+afec6555eef563c66c97@syzkaller.appspotmail.com Signed-off-by: Rik van Riel Signed-off-by: Dave Hansen Reviewed-by: Kiryl Shutsemau Link: https://lore.kernel.org/all/68e2ff90.050a0220.2c17c1.0038.GAE@google.com/ --- arch/x86/mm/pat/set_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index d2d54b8c4dbb..970981893c9b 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -446,7 +446,7 @@ static void cpa_flush(struct cpa_data *cpa, int cache) } start = fix_addr(__cpa_addr(cpa, 0)); - end = fix_addr(__cpa_addr(cpa, cpa->numpages)); + end = start + cpa->numpages * PAGE_SIZE; if (cpa->force_flush_all) end = TLB_FLUSH_ALL; From 83b0177a6c4889b3a6e865da5e21b2c9d97d0551 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 May 2025 15:43:04 +0200 Subject: [PATCH 155/305] x86/mm: Fix SMP ordering in switch_mm_irqs_off() Stephen noted that it is possible to not have an smp_mb() between the loaded_mm store and the tlb_gen load in switch_mm(), meaning the ordering against flush_tlb_mm_range() goes out the window, and it becomes possible for switch_mm() to not observe a recent tlb_gen update and fail to flush the TLBs. [ dhansen: merge conflict fixed by Ingo ] Fixes: 209954cbc7d0 ("x86/mm/tlb: Update mm_cpumask lazily") Reported-by: Stephen Dolan Closes: https://lore.kernel.org/all/CAHDw0oGd0B4=uuv8NGqbUQ_ZVmSheU2bN70e4QhFXWvuAZdt2w@mail.gmail.com/ Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Signed-off-by: Dave Hansen --- arch/x86/mm/tlb.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 39f80111e6f1..5d221709353e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -911,11 +911,31 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); - barrier(); - /* Start receiving IPIs and then read tlb_gen (and LAM below) */ + /* + * Make sure this CPU is set in mm_cpumask() such that we'll + * receive invalidation IPIs. + * + * Rely on the smp_mb() implied by cpumask_set_cpu()'s atomic + * operation, or explicitly provide one. Such that: + * + * switch_mm_irqs_off() flush_tlb_mm_range() + * smp_store_release(loaded_mm, SWITCHING); atomic64_inc_return(tlb_gen) + * smp_mb(); // here // smp_mb() implied + * atomic64_read(tlb_gen); this_cpu_read(loaded_mm); + * + * we properly order against flush_tlb_mm_range(), where the + * loaded_mm load can happen in mative_flush_tlb_multi() -> + * should_flush_tlb(). + * + * This way switch_mm() must see the new tlb_gen or + * flush_tlb_mm_range() must see the new loaded_mm, or both. + */ if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); + else + smp_mb(); + next_tlb_gen = atomic64_read(&next->context.tlb_gen); ns = choose_new_asid(next, next_tlb_gen); From d6fc45100aa8c02be3ddd16fae569b84086c15a9 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Fri, 10 Oct 2025 22:43:07 +0800 Subject: [PATCH 156/305] PCI: cadence: Search for MSI Capability with correct ID 907912c1daa7 ("PCI: cadence: Use cdns_pcie_find_*capability() to avoid hardcoding offsets") incorrectly searched for the MSI-X Capability ID instead of the MSI Capability ID in cdns_pcie_ep_get_msi(). Search for PCI_CAP_ID_MSI, not PCI_CAP_ID_MSIX, to fix this problem. Fixes: 907912c1daa7 ("PCI: cadence: Use cdns_pcie_find_*capability() to avoid hardcoding offsets") Reported-by: Sasha Levin Closes: https://lore.kernel.org/r/aOfMk9BW8BH2P30V@laps/ Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251010144307.12979-1-18255117159@163.com --- drivers/pci/controller/cadence/pcie-cadence-ep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c index 1eac012a8226..c0e1194a936b 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-ep.c +++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c @@ -255,7 +255,7 @@ static int cdns_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn) u16 flags, mme; u8 cap; - cap = cdns_pcie_find_capability(pcie, PCI_CAP_ID_MSIX); + cap = cdns_pcie_find_capability(pcie, PCI_CAP_ID_MSI); fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn); /* Validate that the MSI feature is actually enabled. */ From 1ee889fdf409ce68c1e3b62912333a5cc69acaa0 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 1 Oct 2025 01:29:57 +0200 Subject: [PATCH 157/305] f2fs: don't call iput() from f2fs_drop_inode() iput() calls the problematic routine, which does a ->i_count inc/dec cycle. Undoing it with iput() recurses into the problem. Note f2fs should not be playing games with the refcount to begin with, but that will be handled later. Right now solve the immediate regression. Fixes: bc986b1d756482a ("fs: stop accessing ->i_count directly in f2fs and gfs2") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202509301450.138b448f-lkp@intel.com Signed-off-by: Mateusz Guzik Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fd8e7b0b2166..db7afb806411 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1820,7 +1820,7 @@ static int f2fs_drop_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); spin_lock(&inode->i_lock); - iput(inode); + atomic_dec(&inode->i_count); } trace_f2fs_drop_inode(inode, 0); return 0; From 9d5c4f5c7a2c7677e1b3942772122b032c265aae Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Oct 2025 03:32:30 +0000 Subject: [PATCH 158/305] f2fs: fix wrong block mapping for multi-devices Assuming the disk layout as below, disk0: 0 --- 0x00035abfff disk1: 0x00035ac000 --- 0x00037abfff disk2: 0x00037ac000 --- 0x00037ebfff and we want to read data from offset=13568 having len=128 across the block devices, we can illustrate the block addresses like below. 0 .. 0x00037ac000 ------------------- 0x00037ebfff, 0x00037ec000 ------- | ^ ^ ^ | fofs 0 13568 13568+128 | ------------------------------------------------------ | LBA 0x37e8aa9 0x37ebfa9 0x37ec029 --- map 0x3caa9 0x3ffa9 In this example, we should give the relative map of the target block device ranging from 0x3caa9 to 0x3ffa9 where the length should be calculated by 0x37ebfff + 1 - 0x37ebfa9. In the below equation, however, map->m_pblk was supposed to be the original address instead of the one from the target block address. - map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); Cc: stable@vger.kernel.org Fixes: 71f2c8206202 ("f2fs: multidevice: support direct IO") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ef38e62cda8f..775aa4f63aa3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1497,8 +1497,8 @@ static bool f2fs_map_blocks_cached(struct inode *inode, struct f2fs_dev_info *dev = &sbi->devs[bidx]; map->m_bdev = dev->bdev; - map->m_pblk -= dev->start_blk; map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); + map->m_pblk -= dev->start_blk; } else { map->m_bdev = inode->i_sb->s_bdev; } From fcb8b32a68fd40b0440cb9468cf6f6ab9de9f3c5 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Oct 2025 16:14:45 +0200 Subject: [PATCH 159/305] dpll: zl3073x: Handle missing or corrupted flash configuration If the internal flash contains missing or corrupted configuration, basic communication over the bus still functions, but the device is not capable of normal operation (for example, using mailboxes). This condition is indicated in the info register by the ready bit. If this bit is cleared, the probe procedure times out while fetching the device state. Handle this case by checking the ready bit value in zl3073x_dev_start() and skipping DPLL device and pin registration if it is cleared. Do not report this condition as an error, allowing the devlink device to be registered and enabling the user to flash the correct configuration. Prior this patch: [ 31.112299] zl3073x-i2c 1-0070: Failed to fetch input state: -ETIMEDOUT [ 31.116332] zl3073x-i2c 1-0070: error -ETIMEDOUT: Failed to start device [ 31.136881] zl3073x-i2c 1-0070: probe with driver zl3073x-i2c failed with error -110 After this patch: [ 41.011438] zl3073x-i2c 1-0070: FW not fully ready - missing or corrupted config Fixes: 75a71ecc24125 ("dpll: zl3073x: Register DPLL devices and pins") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251008141445.841113-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/core.c | 21 +++++++++++++++++++++ drivers/dpll/zl3073x/regs.h | 3 +++ 2 files changed, 24 insertions(+) diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c index 092e7027948a..e42e527813cf 100644 --- a/drivers/dpll/zl3073x/core.c +++ b/drivers/dpll/zl3073x/core.c @@ -1038,8 +1038,29 @@ zl3073x_dev_phase_meas_setup(struct zl3073x_dev *zldev) int zl3073x_dev_start(struct zl3073x_dev *zldev, bool full) { struct zl3073x_dpll *zldpll; + u8 info; int rc; + rc = zl3073x_read_u8(zldev, ZL_REG_INFO, &info); + if (rc) { + dev_err(zldev->dev, "Failed to read device status info\n"); + return rc; + } + + if (!FIELD_GET(ZL_INFO_READY, info)) { + /* The ready bit indicates that the firmware was successfully + * configured and is ready for normal operation. If it is + * cleared then the configuration stored in flash is wrong + * or missing. In this situation the driver will expose + * only devlink interface to give an opportunity to flash + * the correct config. + */ + dev_info(zldev->dev, + "FW not fully ready - missing or corrupted config\n"); + + return 0; + } + if (full) { /* Fetch device state */ rc = zl3073x_dev_state_fetch(zldev); diff --git a/drivers/dpll/zl3073x/regs.h b/drivers/dpll/zl3073x/regs.h index 19a25325bd9c..d837bee72b17 100644 --- a/drivers/dpll/zl3073x/regs.h +++ b/drivers/dpll/zl3073x/regs.h @@ -67,6 +67,9 @@ * Register Page 0, General **************************/ +#define ZL_REG_INFO ZL_REG(0, 0x00, 1) +#define ZL_INFO_READY BIT(7) + #define ZL_REG_ID ZL_REG(0, 0x01, 2) #define ZL_REG_REVISION ZL_REG(0, 0x03, 2) #define ZL_REG_FW_VER ZL_REG(0, 0x05, 2) From 25718fdcbdd2dadd15fc8b684df59b43970b91ed Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 9 Oct 2025 11:43:38 +0200 Subject: [PATCH 160/305] net: gro_cells: Use nested-BH locking for gro_cell The gro_cell data structure is per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Add a local_lock_t to the data structure and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Reported-by: syzbot+8715dd783e9b0bef43b1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68c6c3b1.050a0220.2ff435.0382.GAE@google.com/ Fixes: 3253cb49cbad ("softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT") Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20251009094338.j1jyKfjR@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/gro_cells.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index ff8e5b64bf6b..b43911562f4d 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -8,11 +8,13 @@ struct gro_cell { struct sk_buff_head napi_skbs; struct napi_struct napi; + local_lock_t bh_lock; }; int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct net_device *dev = skb->dev; + bool have_bh_lock = false; struct gro_cell *cell; int res; @@ -25,6 +27,8 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) goto unlock; } + local_lock_nested_bh(&gcells->cells->bh_lock); + have_bh_lock = true; cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(net_hotdata.max_backlog)) { @@ -39,6 +43,9 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); + if (have_bh_lock) + local_unlock_nested_bh(&gcells->cells->bh_lock); + res = NET_RX_SUCCESS; unlock: @@ -54,6 +61,7 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) struct sk_buff *skb; int work_done = 0; + __local_lock_nested_bh(&cell->bh_lock); while (work_done < budget) { skb = __skb_dequeue(&cell->napi_skbs); if (!skb) @@ -64,6 +72,7 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) if (work_done < budget) napi_complete_done(napi, work_done); + __local_unlock_nested_bh(&cell->bh_lock); return work_done; } @@ -79,6 +88,7 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + local_lock_init(&cell->bh_lock); set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); From 70f92ab97042f243e1c8da1c457ff56b9b3e49f1 Mon Sep 17 00:00:00 2001 From: Linmao Li Date: Thu, 9 Oct 2025 20:25:49 +0800 Subject: [PATCH 161/305] r8169: fix packet truncation after S4 resume on RTL8168H/RTL8111H After resume from S4 (hibernate), RTL8168H/RTL8111H truncates incoming packets. Packet captures show messages like "IP truncated-ip - 146 bytes missing!". The issue is caused by RxConfig not being properly re-initialized after resume. Re-initializing the RxConfig register before the chip re-initialization sequence avoids the truncation and restores correct packet reception. This follows the same pattern as commit ef9da46ddef0 ("r8169: fix data corruption issue on RTL8402"). Fixes: 6e1d0b898818 ("r8169:add support for RTL8168H and RTL8107E") Signed-off-by: Linmao Li Reviewed-by: Jacob Keller Reviewed-by: Heiner Kallweit Link: https://patch.msgid.link/20251009122549.3955845-1-lilinmao@kylinos.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 8903ae90afcb..d18734fe12e4 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4994,8 +4994,9 @@ static int rtl8169_resume(struct device *device) if (!device_may_wakeup(tp_to_dev(tp))) clk_prepare_enable(tp->clk); - /* Reportedly at least Asus X453MA truncates packets otherwise */ - if (tp->mac_version == RTL_GIGA_MAC_VER_37) + /* Some chip versions may truncate packets without this initialization */ + if (tp->mac_version == RTL_GIGA_MAC_VER_37 || + tp->mac_version == RTL_GIGA_MAC_VER_46) rtl_init_rxcfg(tp); return rtl8169_runtime_resume(device); From e4d0c909bf8328d986bf3aadba0c33a72b5ae30d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20-=202N?= Date: Thu, 9 Oct 2025 15:06:56 +0200 Subject: [PATCH 162/305] net: phy: bcm54811: Fix GMII/MII/MII-Lite selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Broadcom bcm54811 is hardware-strapped to select among RGMII and GMII/MII/MII-Lite modes. However, the corresponding bit, RGMII Enable in Miscellaneous Control Register must be also set to select desired RGMII or MII(-lite)/GMII mode. Fixes: 3117a11fff5af9e7 ("net: phy: bcm54811: PHY initialization") Signed-off-by: Kamil Horák - 2N Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251009130656.1308237-2-kamilh@axis.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/broadcom.c | 20 +++++++++++++++++++- include/linux/brcmphy.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3459a0e9d8b9..cb306f9e80cc 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -405,7 +405,7 @@ static int bcm5481x_set_brrmode(struct phy_device *phydev, bool on) static int bcm54811_config_init(struct phy_device *phydev) { struct bcm54xx_phy_priv *priv = phydev->priv; - int err, reg, exp_sync_ethernet; + int err, reg, exp_sync_ethernet, aux_rgmii_en; /* Enable CLK125 MUX on LED4 if ref clock is enabled. */ if (!(phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED)) { @@ -434,6 +434,24 @@ static int bcm54811_config_init(struct phy_device *phydev) if (err < 0) return err; + /* Enable RGMII if configured */ + if (phy_interface_is_rgmii(phydev)) + aux_rgmii_en = MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN | + MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + else + aux_rgmii_en = 0; + + /* Also writing Reserved bits 6:5 because the documentation requires + * them to be written to 0b11 + */ + err = bcm54xx_auxctl_write(phydev, + MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + MII_BCM54XX_AUXCTL_MISC_WREN | + aux_rgmii_en | + MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RSVD); + if (err < 0) + return err; + return bcm5481x_set_brrmode(phydev, priv->brr_mode); } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 15c35655f482..115a964f3006 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -137,6 +137,7 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x07 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN 0x0010 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RSVD 0x0060 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN 0x0080 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 From 21f4d45eba0b2dcae5dbc9e5e0ad08735c993f16 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 9 Oct 2025 16:02:19 +0100 Subject: [PATCH 163/305] net/ip6_tunnel: Prevent perpetual tunnel growth Similarly to ipv4 tunnel, ipv6 version updates dev->needed_headroom, too. While ipv4 tunnel headroom adjustment growth was limited in commit 5ae1e9922bbd ("net: ip_tunnel: prevent perpetual headroom growth"), ipv6 tunnel yet increases the headroom without any ceiling. Reflect ipv4 tunnel headroom adjustment limit on ipv6 version. Credits to Francesco Ruggeri, who was originally debugging this issue and wrote local Arista-specific patch and a reproducer. Fixes: 8eb30be0352d ("ipv6: Create ip6_tnl_xmit") Cc: Florian Westphal Cc: Francesco Ruggeri Signed-off-by: Dmitry Safonov Link: https://patch.msgid.link/20251009-ip6_tunnel-headroom-v2-1-8e4dbd8f7e35@arista.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 15 +++++++++++++++ net/ipv4/ip_tunnel.c | 14 -------------- net/ipv6/ip6_tunnel.c | 3 +-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 4314a97702ea..ecae35512b9b 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -611,6 +611,21 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, int headroom, bool reply); +static inline void ip_tunnel_adj_headroom(struct net_device *dev, + unsigned int headroom) +{ + /* we must cap headroom to some upperlimit, else pskb_expand_head + * will overflow header offsets in skb_headers_offset_update(). + */ + const unsigned int max_allowed = 512; + + if (headroom > max_allowed) + headroom = max_allowed; + + if (headroom > READ_ONCE(dev->needed_headroom)) + WRITE_ONCE(dev->needed_headroom, headroom); +} + int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); static inline int iptunnel_pull_offloads(struct sk_buff *skb) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index aaeb5d16f0c9..158a30ae7c5f 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -568,20 +568,6 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } -static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom) -{ - /* we must cap headroom to some upperlimit, else pskb_expand_head - * will overflow header offsets in skb_headers_offset_update(). - */ - static const unsigned int max_allowed = 512; - - if (headroom > max_allowed) - headroom = max_allowed; - - if (headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, headroom); -} - void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto, int tunnel_hlen) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3262e81223df..6405072050e0 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1257,8 +1257,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, */ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; - if (max_headroom > READ_ONCE(dev->needed_headroom)) - WRITE_ONCE(dev->needed_headroom, max_headroom); + ip_tunnel_adj_headroom(dev, max_headroom); err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) From a3f8c0a273120fd2638f03403e786c3de2382e72 Mon Sep 17 00:00:00 2001 From: Milena Olech Date: Thu, 9 Oct 2025 17:03:46 -0700 Subject: [PATCH 164/305] idpf: cleanup remaining SKBs in PTP flows When the driver requests Tx timestamp value, one of the first steps is to clone SKB using skb_get. It increases the reference counter for that SKB to prevent unexpected freeing by another component. However, there may be a case where the index is requested, SKB is assigned and never consumed by PTP flows - for example due to reset during running PTP apps. Add a check in release timestamping function to verify if the SKB assigned to Tx timestamp latch was freed, and release remaining SKBs. Fixes: 4901e83a94ef ("idpf: add Tx timestamp capabilities negotiation") Signed-off-by: Milena Olech Signed-off-by: Anton Nadezhdin Reviewed-by: Aleksandr Loktionov Tested-by: Samuel Salin Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-1-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_ptp.c | 3 +++ drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c index 142823af1f9e..3e1052d070cf 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c @@ -863,6 +863,9 @@ static void idpf_ptp_release_vport_tstamp(struct idpf_vport *vport) u64_stats_inc(&vport->tstamp_stats.flushed); list_del(&ptp_tx_tstamp->list_member); + if (ptp_tx_tstamp->skb) + consume_skb(ptp_tx_tstamp->skb); + kfree(ptp_tx_tstamp); } u64_stats_update_end(&vport->tstamp_stats.stats_sync); diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c index 8a2e0f8c5e36..61cedb6f2854 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c @@ -517,6 +517,7 @@ idpf_ptp_get_tstamp_value(struct idpf_vport *vport, shhwtstamps.hwtstamp = ns_to_ktime(tstamp); skb_tstamp_tx(ptp_tx_tstamp->skb, &shhwtstamps); consume_skb(ptp_tx_tstamp->skb); + ptp_tx_tstamp->skb = NULL; list_add(&ptp_tx_tstamp->list_member, &tx_tstamp_caps->latches_free); From 53f0eb62b4d23d40686f2dd51776b8220f2887bb Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 9 Oct 2025 17:03:47 -0700 Subject: [PATCH 165/305] ixgbevf: fix getting link speed data for E610 devices E610 adapters no longer use the VFLINKS register to read PF's link speed and linkup state. As a result VF driver cannot get actual link state and it incorrectly reports 10G which is the default option. It leads to a situation where even 1G adapters print 10G as actual link speed. The same happens when PF driver set speed different than 10G. Add new mailbox operation to let the VF driver request a PF driver to provide actual link data. Update the mailbox api to v1.6. Incorporate both ways of getting link status within the legacy ixgbe_check_mac_link_vf() function. Fixes: 4c44b450c69b ("ixgbevf: Add support for Intel(R) E610 device") Co-developed-by: Andrzej Wilczynski Signed-off-by: Andrzej Wilczynski Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Cc: stable@vger.kernel.org Signed-off-by: Jedrzej Jagielski Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-2-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbevf/defines.h | 1 + .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 +- drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 + drivers/net/ethernet/intel/ixgbevf/vf.c | 137 ++++++++++++++---- 4 files changed, 116 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index a9bc96f6399d..e177d1d58696 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -28,6 +28,7 @@ /* Link speed */ typedef u32 ixgbe_link_speed; +#define IXGBE_LINK_SPEED_UNKNOWN 0 #define IXGBE_LINK_SPEED_1GB_FULL 0x0020 #define IXGBE_LINK_SPEED_10GB_FULL 0x0080 #define IXGBE_LINK_SPEED_100_FULL 0x0008 diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 28e25641b167..92671638b428 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2275,6 +2275,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; static const int api[] = { + ixgbe_mbox_api_16, ixgbe_mbox_api_15, ixgbe_mbox_api_14, ixgbe_mbox_api_13, @@ -2294,7 +2295,8 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) idx++; } - if (hw->api_version >= ixgbe_mbox_api_15) { + /* Following is not supported by API 1.6, it is specific for 1.5 */ + if (hw->api_version == ixgbe_mbox_api_15) { hw->mbx.ops.init_params(hw); memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, sizeof(struct ixgbe_mbx_operations)); @@ -2651,6 +2653,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: if (adapter->xdp_prog && hw->mac.max_tx_queues == rss) rss = rss > 3 ? 2 : 1; @@ -4645,6 +4648,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); break; diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index 835bbcc5cc8e..c1494fd1f67b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -66,6 +66,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */ ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ + ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -102,6 +103,9 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ +/* mailbox API, version 1.6 VF requests */ +#define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index dcaef34b88b6..f05246fb5a74 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues) * is not supported for this device type. */ switch (hw->api_version) { + case ixgbe_mbox_api_16: case ixgbe_mbox_api_15: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: @@ -382,6 +383,7 @@ int ixgbevf_get_rss_key_locked(struct ixgbe_hw *hw, u8 *rss_key) * or if the operation is not supported for this device type. */ switch (hw->api_version) { + case ixgbe_mbox_api_16: case ixgbe_mbox_api_15: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: @@ -552,6 +554,7 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: break; default: return -EOPNOTSUPP; @@ -624,6 +627,48 @@ static s32 ixgbevf_hv_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state) return -EOPNOTSUPP; } +/** + * ixgbevf_get_pf_link_state - Get PF's link status + * @hw: pointer to the HW structure + * @speed: link speed + * @link_up: indicate if link is up/down + * + * Ask PF to provide link_up state and speed of the link. + * + * Return: IXGBE_ERR_MBX in the case of mailbox error, + * -EOPNOTSUPP if the op is not supported or 0 on success. + */ +static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up) +{ + u32 msgbuf[3] = {}; + int err; + + switch (hw->api_version) { + case ixgbe_mbox_api_16: + break; + default: + return -EOPNOTSUPP; + } + + msgbuf[0] = IXGBE_VF_GET_PF_LINK_STATE; + + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { + err = IXGBE_ERR_MBX; + *speed = IXGBE_LINK_SPEED_UNKNOWN; + /* No need to set @link_up to false as it will be done by + * ixgbe_check_mac_link_vf(). + */ + } else { + *speed = msgbuf[1]; + *link_up = msgbuf[2]; + } + + return err; +} + /** * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address * @hw: pointer to the HW structure @@ -658,6 +703,58 @@ static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, return err; } +/** + * ixgbe_read_vflinks - Read VFLINKS register + * @hw: pointer to the HW structure + * @speed: link speed + * @link_up: indicate if link is up/down + * + * Get linkup status and link speed from the VFLINKS register. + */ +static void ixgbe_read_vflinks(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up) +{ + u32 vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + + /* if link status is down no point in checking to see if PF is up */ + if (!(vflinks & IXGBE_LINKS_UP)) { + *link_up = false; + return; + } + + /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs + * before the link status is correct + */ + if (hw->mac.type == ixgbe_mac_82599_vf) { + for (int i = 0; i < 5; i++) { + udelay(100); + vflinks = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + + if (!(vflinks & IXGBE_LINKS_UP)) { + *link_up = false; + return; + } + } + } + + /* We reached this point so there's link */ + *link_up = true; + + switch (vflinks & IXGBE_LINKS_SPEED_82599) { + case IXGBE_LINKS_SPEED_10G_82599: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + break; + case IXGBE_LINKS_SPEED_1G_82599: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + case IXGBE_LINKS_SPEED_100_82599: + *speed = IXGBE_LINK_SPEED_100_FULL; + break; + default: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + } +} + /** * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub. * @hw: unused @@ -705,7 +802,6 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, struct ixgbe_mbx_info *mbx = &hw->mbx; struct ixgbe_mac_info *mac = &hw->mac; s32 ret_val = 0; - u32 links_reg; u32 in_msg = 0; /* If we were hit with a reset drop the link */ @@ -715,36 +811,14 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, if (!mac->get_link_status) goto out; - /* if link status is down no point in checking to see if pf is up */ - links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); - if (!(links_reg & IXGBE_LINKS_UP)) - goto out; - - /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs - * before the link status is correct - */ - if (mac->type == ixgbe_mac_82599_vf) { - int i; - - for (i = 0; i < 5; i++) { - udelay(100); - links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); - - if (!(links_reg & IXGBE_LINKS_UP)) - goto out; - } - } - - switch (links_reg & IXGBE_LINKS_SPEED_82599) { - case IXGBE_LINKS_SPEED_10G_82599: - *speed = IXGBE_LINK_SPEED_10GB_FULL; - break; - case IXGBE_LINKS_SPEED_1G_82599: - *speed = IXGBE_LINK_SPEED_1GB_FULL; - break; - case IXGBE_LINKS_SPEED_100_82599: - *speed = IXGBE_LINK_SPEED_100_FULL; - break; + if (hw->mac.type == ixgbe_mac_e610_vf) { + ret_val = ixgbevf_get_pf_link_state(hw, speed, link_up); + if (ret_val) + goto out; + } else { + ixgbe_read_vflinks(hw, speed, link_up); + if (*link_up == false) + goto out; } /* if the read failed it could just be a mailbox collision, best wait @@ -951,6 +1025,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: + case ixgbe_mbox_api_16: break; default: return 0; From f7f97cbc03a470ce405d48dedb7f135713caa0fa Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 9 Oct 2025 17:03:48 -0700 Subject: [PATCH 166/305] ixgbe: handle IXGBE_VF_GET_PF_LINK_STATE mailbox operation Update supported API version and provide handler for IXGBE_VF_GET_PF_LINK_STATE cmd. Simply put stored values of link speed and link_up from adapter context. Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Signed-off-by: Jedrzej Jagielski Link: https://lore.kernel.org/stable/20250828095227.1857066-3-jedrzej.jagielski%40intel.com Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-3-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h | 5 +++ .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 42 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index 4af149b63a39..f7256a339c99 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -50,6 +50,8 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_12, /* API version 1.2, linux/freebsd VF driver */ ixgbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */ ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ + ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ + ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -86,6 +88,9 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */ +/* mailbox API, version 1.6 VF requests */ +#define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 32ac1e020d91..b09271d61a4e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -510,6 +510,7 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: /* Version 1.1 supports jumbo frames on VFs if PF has * jumbo frames enabled which means legacy VFs are * disabled @@ -1046,6 +1047,7 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: adapter->vfinfo[vf].vf_api = api; return 0; default: @@ -1072,6 +1074,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: break; default: return -1; @@ -1112,6 +1115,7 @@ static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) /* verify the PF is supporting the correct API */ switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_16: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: case ixgbe_mbox_api_12: @@ -1145,6 +1149,7 @@ static int ixgbe_get_vf_rss_key(struct ixgbe_adapter *adapter, /* verify the PF is supporting the correct API */ switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_16: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: case ixgbe_mbox_api_12: @@ -1174,6 +1179,7 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, fallthrough; case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: break; default: return -EOPNOTSUPP; @@ -1244,6 +1250,7 @@ static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_12: case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: + case ixgbe_mbox_api_16: break; default: return -EOPNOTSUPP; @@ -1254,6 +1261,38 @@ static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter, return 0; } +/** + * ixgbe_send_vf_link_status - send link status data to VF + * @adapter: pointer to adapter struct + * @msgbuf: pointer to message buffers + * @vf: VF identifier + * + * Reply for IXGBE_VF_GET_PF_LINK_STATE mbox command sending link status data. + * + * Return: 0 on success or -EOPNOTSUPP when operation is not supported. + */ +static int ixgbe_send_vf_link_status(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + + switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_16: + if (hw->mac.type != ixgbe_mac_e610) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; + } + /* Simply provide stored values as watchdog & link status events take + * care of its freshness. + */ + msgbuf[1] = adapter->link_speed; + msgbuf[2] = adapter->link_up; + + return 0; +} + static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) { u32 mbx_size = IXGBE_VFMAILBOX_SIZE; @@ -1328,6 +1367,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) case IXGBE_VF_IPSEC_DEL: retval = ixgbe_ipsec_vf_del_sa(adapter, msgbuf, vf); break; + case IXGBE_VF_GET_PF_LINK_STATE: + retval = ixgbe_send_vf_link_status(adapter, msgbuf, vf); + break; default: e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]); retval = -EIO; From a7075f501bd33c93570af759b6f4302ef0175168 Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 9 Oct 2025 17:03:49 -0700 Subject: [PATCH 167/305] ixgbevf: fix mailbox API compatibility by negotiating supported features There was backward compatibility in the terms of mailbox API. Various drivers from various OSes supporting 10G adapters from Intel portfolio could easily negotiate mailbox API. This convention has been broken since introducing API 1.4. Commit 0062e7cc955e ("ixgbevf: add VF IPsec offload code") added support for IPSec which is specific only for the kernel ixgbe driver. None of the rest of the Intel 10G PF/VF drivers supports it. And actually lack of support was not included in the IPSec implementation - there were no such code paths. No possibility to negotiate support for the feature was introduced along with introduction of the feature itself. Commit 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") increasing API version to 1.5 did the same - it introduced code supported specifically by the PF ESX driver. It altered API version for the VF driver in the same time not touching the version defined for the PF ixgbe driver. It led to additional discrepancies, as the code provided within API 1.6 cannot be supported for Linux ixgbe driver as it causes crashes. The issue was noticed some time ago and mitigated by Jake within the commit d0725312adf5 ("ixgbevf: stop attempting IPSEC offload on Mailbox API 1.5"). As a result we have regression for IPsec support and after increasing API to version 1.6 ixgbevf driver stopped to support ESX MBX. To fix this mess add new mailbox op asking PF driver about supported features. Basing on a response determine whether to set support for IPSec and ESX-specific enhanced mailbox. New mailbox op, for compatibility purposes, must be added within new API revision, as API version of OOT PF & VF drivers is already increased to 1.6 and doesn't incorporate features negotiate op. Features negotiation mechanism gives possibility to be extended with new features when needed in the future. Reported-by: Jacob Keller Closes: https://lore.kernel.org/intel-wired-lan/20241101-jk-ixgbevf-mailbox-v1-5-fixes-v1-0-f556dc9a66ed@intel.com/ Fixes: 0062e7cc955e ("ixgbevf: add VF IPsec offload code") Fixes: 339f28964147 ("ixgbevf: Add support for new mailbox communication between PF and VF") Reviewed-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Cc: stable@vger.kernel.org Signed-off-by: Jedrzej Jagielski Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-4-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbevf/ipsec.c | 10 +++++ drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 7 +++ .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 32 ++++++++++++- drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 ++ drivers/net/ethernet/intel/ixgbevf/vf.c | 45 ++++++++++++++++++- drivers/net/ethernet/intel/ixgbevf/vf.h | 1 + 6 files changed, 96 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index 65580b9cb06f..fce35924ff8b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -273,6 +273,9 @@ static int ixgbevf_ipsec_add_sa(struct net_device *dev, adapter = netdev_priv(dev); ipsec = adapter->ipsec; + if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) + return -EOPNOTSUPP; + if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol for IPsec offload"); return -EINVAL; @@ -405,6 +408,9 @@ static void ixgbevf_ipsec_del_sa(struct net_device *dev, adapter = netdev_priv(dev); ipsec = adapter->ipsec; + if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) + return; + if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) { sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX; @@ -612,6 +618,10 @@ void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter) size_t size; switch (adapter->hw.api_version) { + case ixgbe_mbox_api_17: + if (!(adapter->pf_features & IXGBEVF_PF_SUP_IPSEC)) + return; + break; case ixgbe_mbox_api_14: break; default: diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 3a379e6a3a2a..039187607e98 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -363,6 +363,13 @@ struct ixgbevf_adapter { struct ixgbe_hw hw; u16 msg_enable; + u32 pf_features; +#define IXGBEVF_PF_SUP_IPSEC BIT(0) +#define IXGBEVF_PF_SUP_ESX_MBX BIT(1) + +#define IXGBEVF_SUPPORTED_FEATURES (IXGBEVF_PF_SUP_IPSEC | \ + IXGBEVF_PF_SUP_ESX_MBX) + struct ixgbevf_hw_stats stats; unsigned long state; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 92671638b428..d5ce20f47def 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2271,10 +2271,35 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; } +/** + * ixgbevf_set_features - Set features supported by PF + * @adapter: pointer to the adapter struct + * + * Negotiate with PF supported features and then set pf_features accordingly. + */ +static void ixgbevf_set_features(struct ixgbevf_adapter *adapter) +{ + u32 *pf_features = &adapter->pf_features; + struct ixgbe_hw *hw = &adapter->hw; + int err; + + err = hw->mac.ops.negotiate_features(hw, pf_features); + if (err && err != -EOPNOTSUPP) + netdev_dbg(adapter->netdev, + "PF feature negotiation failed.\n"); + + /* Address also pre API 1.7 cases */ + if (hw->api_version == ixgbe_mbox_api_14) + *pf_features |= IXGBEVF_PF_SUP_IPSEC; + else if (hw->api_version == ixgbe_mbox_api_15) + *pf_features |= IXGBEVF_PF_SUP_ESX_MBX; +} + static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; static const int api[] = { + ixgbe_mbox_api_17, ixgbe_mbox_api_16, ixgbe_mbox_api_15, ixgbe_mbox_api_14, @@ -2295,8 +2320,9 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) idx++; } - /* Following is not supported by API 1.6, it is specific for 1.5 */ - if (hw->api_version == ixgbe_mbox_api_15) { + ixgbevf_set_features(adapter); + + if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) { hw->mbx.ops.init_params(hw); memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops, sizeof(struct ixgbe_mbx_operations)); @@ -2654,6 +2680,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: if (adapter->xdp_prog && hw->mac.max_tx_queues == rss) rss = rss > 3 ? 2 : 1; @@ -4649,6 +4676,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); break; diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index c1494fd1f67b..a8ed23ee66aa 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -67,6 +67,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ + ixgbe_mbox_api_17, /* API version 1.7, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -106,6 +107,9 @@ enum ixgbe_pfvf_api_rev { /* mailbox API, version 1.6 VF requests */ #define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ +/* mailbox API, version 1.7 VF requests */ +#define IXGBE_VF_FEATURES_NEGOTIATE 0x12 /* get features supported by PF*/ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index f05246fb5a74..74d320879513 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -313,6 +313,7 @@ int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues) * is not supported for this device type. */ switch (hw->api_version) { + case ixgbe_mbox_api_17: case ixgbe_mbox_api_16: case ixgbe_mbox_api_15: case ixgbe_mbox_api_14: @@ -383,6 +384,7 @@ int ixgbevf_get_rss_key_locked(struct ixgbe_hw *hw, u8 *rss_key) * or if the operation is not supported for this device type. */ switch (hw->api_version) { + case ixgbe_mbox_api_17: case ixgbe_mbox_api_16: case ixgbe_mbox_api_15: case ixgbe_mbox_api_14: @@ -555,6 +557,7 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -EOPNOTSUPP; @@ -646,6 +649,7 @@ static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *spee switch (hw->api_version) { case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -EOPNOTSUPP; @@ -669,6 +673,42 @@ static int ixgbevf_get_pf_link_state(struct ixgbe_hw *hw, ixgbe_link_speed *spee return err; } +/** + * ixgbevf_negotiate_features_vf - negotiate supported features with PF driver + * @hw: pointer to the HW structure + * @pf_features: bitmask of features supported by PF + * + * Return: IXGBE_ERR_MBX in the case of mailbox error, + * -EOPNOTSUPP if the op is not supported or 0 on success. + */ +static int ixgbevf_negotiate_features_vf(struct ixgbe_hw *hw, u32 *pf_features) +{ + u32 msgbuf[2] = {}; + int err; + + switch (hw->api_version) { + case ixgbe_mbox_api_17: + break; + default: + return -EOPNOTSUPP; + } + + msgbuf[0] = IXGBE_VF_FEATURES_NEGOTIATE; + msgbuf[1] = IXGBEVF_SUPPORTED_FEATURES; + + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + ARRAY_SIZE(msgbuf)); + + if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) { + err = IXGBE_ERR_MBX; + *pf_features = 0x0; + } else { + *pf_features = msgbuf[1]; + } + + return err; +} + /** * ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address * @hw: pointer to the HW structure @@ -799,6 +839,7 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, bool *link_up, bool autoneg_wait_to_complete) { + struct ixgbevf_adapter *adapter = hw->back; struct ixgbe_mbx_info *mbx = &hw->mbx; struct ixgbe_mac_info *mac = &hw->mac; s32 ret_val = 0; @@ -825,7 +866,7 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, * until we are called again and don't report an error */ if (mbx->ops.read(hw, &in_msg, 1)) { - if (hw->api_version >= ixgbe_mbox_api_15) + if (adapter->pf_features & IXGBEVF_PF_SUP_ESX_MBX) mac->get_link_status = false; goto out; } @@ -1026,6 +1067,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, case ixgbe_mbox_api_14: case ixgbe_mbox_api_15: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return 0; @@ -1080,6 +1122,7 @@ static const struct ixgbe_mac_operations ixgbevf_mac_ops = { .setup_link = ixgbevf_setup_mac_link_vf, .check_link = ixgbevf_check_mac_link_vf, .negotiate_api_version = ixgbevf_negotiate_api_version_vf, + .negotiate_features = ixgbevf_negotiate_features_vf, .set_rar = ixgbevf_set_rar_vf, .update_mc_addr_list = ixgbevf_update_mc_addr_list_vf, .update_xcast_mode = ixgbevf_update_xcast_mode, diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 2d791bc26ae4..4f19b8900c29 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -26,6 +26,7 @@ struct ixgbe_mac_operations { s32 (*stop_adapter)(struct ixgbe_hw *); s32 (*get_bus_info)(struct ixgbe_hw *); s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api); + int (*negotiate_features)(struct ixgbe_hw *hw, u32 *pf_features); /* Link */ s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); From 823be089f9c8ab136ba382b516aedd3f7ac854bd Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Thu, 9 Oct 2025 17:03:50 -0700 Subject: [PATCH 168/305] ixgbe: handle IXGBE_VF_FEATURES_NEGOTIATE mbox cmd Send to VF information about features supported by the PF driver. Increase API version to 1.7. Reviewed-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Signed-off-by: Jedrzej Jagielski Tested-by: Rafal Romanowski Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-5-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h | 10 +++++ .../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 37 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index f7256a339c99..0334ed4b8fa3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -52,6 +52,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_14, /* API version 1.4, linux/freebsd VF driver */ ixgbe_mbox_api_15, /* API version 1.5, linux/freebsd VF driver */ ixgbe_mbox_api_16, /* API version 1.6, linux/freebsd VF driver */ + ixgbe_mbox_api_17, /* API version 1.7, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -91,6 +92,9 @@ enum ixgbe_pfvf_api_rev { /* mailbox API, version 1.6 VF requests */ #define IXGBE_VF_GET_PF_LINK_STATE 0x11 /* request PF to send link info */ +/* mailbox API, version 1.7 VF requests */ +#define IXGBE_VF_FEATURES_NEGOTIATE 0x12 /* get features supported by PF */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ @@ -101,6 +105,12 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */ #define IXGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */ +/* features negotiated between PF/VF */ +#define IXGBEVF_PF_SUP_IPSEC BIT(0) +#define IXGBEVF_PF_SUP_ESX_MBX BIT(1) + +#define IXGBE_SUPPORTED_FEATURES IXGBEVF_PF_SUP_IPSEC + struct ixgbe_hw; int ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index b09271d61a4e..ee133d6749b3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -511,6 +511,7 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: /* Version 1.1 supports jumbo frames on VFs if PF has * jumbo frames enabled which means legacy VFs are * disabled @@ -1048,6 +1049,7 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: adapter->vfinfo[vf].vf_api = api; return 0; default: @@ -1075,6 +1077,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -1; @@ -1115,6 +1118,7 @@ static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) /* verify the PF is supporting the correct API */ switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_17: case ixgbe_mbox_api_16: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: @@ -1149,6 +1153,7 @@ static int ixgbe_get_vf_rss_key(struct ixgbe_adapter *adapter, /* verify the PF is supporting the correct API */ switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_17: case ixgbe_mbox_api_16: case ixgbe_mbox_api_14: case ixgbe_mbox_api_13: @@ -1180,6 +1185,7 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -EOPNOTSUPP; @@ -1251,6 +1257,7 @@ static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter, case ixgbe_mbox_api_13: case ixgbe_mbox_api_14: case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: break; default: return -EOPNOTSUPP; @@ -1278,6 +1285,7 @@ static int ixgbe_send_vf_link_status(struct ixgbe_adapter *adapter, switch (adapter->vfinfo[vf].vf_api) { case ixgbe_mbox_api_16: + case ixgbe_mbox_api_17: if (hw->mac.type != ixgbe_mac_e610) return -EOPNOTSUPP; break; @@ -1293,6 +1301,32 @@ static int ixgbe_send_vf_link_status(struct ixgbe_adapter *adapter, return 0; } +/** + * ixgbe_negotiate_vf_features - negotiate supported features with VF driver + * @adapter: pointer to adapter struct + * @msgbuf: pointer to message buffers + * @vf: VF identifier + * + * Return: 0 on success or -EOPNOTSUPP when operation is not supported. + */ +static int ixgbe_negotiate_vf_features(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + u32 features = msgbuf[1]; + + switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_17: + break; + default: + return -EOPNOTSUPP; + } + + features &= IXGBE_SUPPORTED_FEATURES; + msgbuf[1] = features; + + return 0; +} + static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) { u32 mbx_size = IXGBE_VFMAILBOX_SIZE; @@ -1370,6 +1404,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) case IXGBE_VF_GET_PF_LINK_STATE: retval = ixgbe_send_vf_link_status(adapter, msgbuf, vf); break; + case IXGBE_VF_FEATURES_NEGOTIATE: + retval = ixgbe_negotiate_vf_features(adapter, msgbuf, vf); + break; default: e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]); retval = -EIO; From 5feef67b646d8f5064bac288e22204ffba2b9a4a Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Thu, 9 Oct 2025 17:03:51 -0700 Subject: [PATCH 169/305] ixgbe: fix too early devlink_free() in ixgbe_remove() Since ixgbe_adapter is embedded in devlink, calling devlink_free() prematurely in the ixgbe_remove() path can lead to UAF. Move devlink_free() to the end. KASAN report: BUG: KASAN: use-after-free in ixgbe_reset_interrupt_capability+0x140/0x180 [ixgbe] Read of size 8 at addr ffff0000adf813e0 by task bash/2095 CPU: 1 UID: 0 PID: 2095 Comm: bash Tainted: G S 6.17.0-rc2-tnguy.net-queue+ #1 PREEMPT(full) [...] Call trace: show_stack+0x30/0x90 (C) dump_stack_lvl+0x9c/0xd0 print_address_description.constprop.0+0x90/0x310 print_report+0x104/0x1f0 kasan_report+0x88/0x180 __asan_report_load8_noabort+0x20/0x30 ixgbe_reset_interrupt_capability+0x140/0x180 [ixgbe] ixgbe_clear_interrupt_scheme+0xf8/0x130 [ixgbe] ixgbe_remove+0x2d0/0x8c0 [ixgbe] pci_device_remove+0xa0/0x220 device_remove+0xb8/0x170 device_release_driver_internal+0x318/0x490 device_driver_detach+0x40/0x68 unbind_store+0xec/0x118 drv_attr_store+0x64/0xb8 sysfs_kf_write+0xcc/0x138 kernfs_fop_write_iter+0x294/0x440 new_sync_write+0x1fc/0x588 vfs_write+0x480/0x6a0 ksys_write+0xf0/0x1e0 __arm64_sys_write+0x70/0xc0 invoke_syscall.constprop.0+0xcc/0x280 el0_svc_common.constprop.0+0xa8/0x248 do_el0_svc+0x44/0x68 el0_svc+0x54/0x160 el0t_64_sync_handler+0xa0/0xe8 el0t_64_sync+0x1b0/0x1b8 Fixes: a0285236ab93 ("ixgbe: add initial devlink support") Signed-off-by: Koichiro Den Tested-by: Rinitha S Reviewed-by: Jedrzej Jagielski Reviewed-by: Aleksandr Loktionov Reviewed-by: Paul Menzel Signed-off-by: Jacob Keller Link: https://patch.msgid.link/20251009-jk-iwl-net-2025-10-01-v3-6-ef32a425b92a@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 90d4e57b1c93..ca1ccc630001 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -12101,7 +12101,6 @@ static void ixgbe_remove(struct pci_dev *pdev) devl_port_unregister(&adapter->devlink_port); devl_unlock(adapter->devlink); - devlink_free(adapter->devlink); ixgbe_stop_ipsec_offload(adapter); ixgbe_clear_interrupt_scheme(adapter); @@ -12137,6 +12136,8 @@ static void ixgbe_remove(struct pci_dev *pdev) if (disable_dev) pci_disable_device(pdev); + + devlink_free(adapter->devlink); } /** From 2c67301584f2671e320236df6bbe75ae09feb4d0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 11 Oct 2025 13:02:49 +0200 Subject: [PATCH 170/305] net: phy: realtek: Avoid PHYCR2 access if PHYCR2 not present The driver is currently checking for PHYCR2 register presence in rtl8211f_config_init(), but it does so after accessing PHYCR2 to disable EEE. This was introduced in commit bfc17c165835 ("net: phy: realtek: disable PHY-mode EEE"). Move the PHYCR2 presence test before the EEE disablement and simplify the code. Fixes: bfc17c165835 ("net: phy: realtek: disable PHY-mode EEE") Signed-off-by: Marek Vasut Reviewed-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20251011110309.12664-1-marek.vasut@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 82d8e1335215..a724b21b4fe7 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -633,26 +633,25 @@ static int rtl8211f_config_init(struct phy_device *phydev) str_enabled_disabled(val_rxdly)); } + if (!priv->has_phycr2) + return 0; + /* Disable PHY-mode EEE so LPI is passed to the MAC */ ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2, RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); if (ret) return ret; - if (priv->has_phycr2) { - ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, - RTL8211F_PHYCR2, RTL8211F_CLKOUT_EN, - priv->phycr2); - if (ret < 0) { - dev_err(dev, "clkout configuration failed: %pe\n", - ERR_PTR(ret)); - return ret; - } - - return genphy_soft_reset(phydev); + ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, + RTL8211F_PHYCR2, RTL8211F_CLKOUT_EN, + priv->phycr2); + if (ret < 0) { + dev_err(dev, "clkout configuration failed: %pe\n", + ERR_PTR(ret)); + return ret; } - return 0; + return genphy_soft_reset(phydev); } static int rtl821x_suspend(struct phy_device *phydev) From c065b6046b3493a878c2ceb810aed845431badb4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 13 Oct 2025 21:50:40 -0400 Subject: [PATCH 171/305] Use CONFIG_EXT4_FS instead of CONFIG_EXT3_FS in all of the defconfigs Commit d6ace46c82fd ("ext4: remove obsolete EXT3 config options") removed the obsolete EXT3_CONFIG options, since it had been over a decade since fs/ext3 had been removed. Unfortunately, there were a number of defconfigs that still used CONFIG_EXT3_FS which the cleanup commit didn't fix up. This led to a large number of defconfig test builds to fail. Oops. Fixes: d6ace46c82fd ("ext4: remove obsolete EXT3 config options") Signed-off-by: Theodore Ts'o --- arch/arc/configs/axs101_defconfig | 2 +- arch/arc/configs/axs103_defconfig | 2 +- arch/arc/configs/axs103_smp_defconfig | 2 +- arch/arc/configs/hsdk_defconfig | 2 +- arch/arc/configs/vdk_hs38_defconfig | 2 +- arch/arc/configs/vdk_hs38_smp_defconfig | 2 +- arch/arm/configs/axm55xx_defconfig | 2 +- arch/arm/configs/bcm2835_defconfig | 4 ++-- arch/arm/configs/davinci_all_defconfig | 2 +- arch/arm/configs/dove_defconfig | 4 ++-- arch/arm/configs/ep93xx_defconfig | 4 ++-- arch/arm/configs/imx_v6_v7_defconfig | 6 +++--- arch/arm/configs/ixp4xx_defconfig | 4 ++-- arch/arm/configs/mmp2_defconfig | 2 +- arch/arm/configs/moxart_defconfig | 2 +- arch/arm/configs/multi_v5_defconfig | 2 +- arch/arm/configs/mv78xx0_defconfig | 4 ++-- arch/arm/configs/mvebu_v5_defconfig | 2 +- arch/arm/configs/nhk8815_defconfig | 2 +- arch/arm/configs/omap1_defconfig | 2 +- arch/arm/configs/omap2plus_defconfig | 2 +- arch/arm/configs/orion5x_defconfig | 4 ++-- arch/arm/configs/pxa_defconfig | 6 +++--- arch/arm/configs/qcom_defconfig | 2 +- arch/arm/configs/rpc_defconfig | 2 +- arch/arm/configs/s3c6400_defconfig | 6 +++--- arch/arm/configs/sama7_defconfig | 2 +- arch/arm/configs/socfpga_defconfig | 2 +- arch/arm/configs/spear13xx_defconfig | 4 ++-- arch/arm/configs/spear3xx_defconfig | 4 ++-- arch/arm/configs/spear6xx_defconfig | 4 ++-- arch/arm/configs/spitz_defconfig | 4 ++-- arch/arm/configs/stm32_defconfig | 2 +- arch/arm/configs/tegra_defconfig | 6 +++--- arch/arm/configs/u8500_defconfig | 2 +- arch/arm/configs/vexpress_defconfig | 2 +- arch/hexagon/configs/comet_defconfig | 6 +++--- arch/loongarch/configs/loongson3_defconfig | 6 +++--- arch/m68k/configs/stmark2_defconfig | 6 +++--- arch/microblaze/configs/mmu_defconfig | 2 +- arch/mips/configs/bigsur_defconfig | 6 +++--- arch/mips/configs/cobalt_defconfig | 6 +++--- arch/mips/configs/decstation_64_defconfig | 6 +++--- arch/mips/configs/decstation_defconfig | 6 +++--- arch/mips/configs/decstation_r4k_defconfig | 6 +++--- arch/mips/configs/fuloong2e_defconfig | 2 +- arch/mips/configs/ip22_defconfig | 6 +++--- arch/mips/configs/ip27_defconfig | 6 +++--- arch/mips/configs/ip28_defconfig | 6 +++--- arch/mips/configs/ip30_defconfig | 6 +++--- arch/mips/configs/ip32_defconfig | 6 +++--- arch/mips/configs/jazz_defconfig | 2 +- arch/mips/configs/lemote2f_defconfig | 6 +++--- arch/mips/configs/loongson1b_defconfig | 6 +++--- arch/mips/configs/loongson1c_defconfig | 6 +++--- arch/mips/configs/loongson2k_defconfig | 6 +++--- arch/mips/configs/loongson3_defconfig | 6 +++--- arch/mips/configs/malta_defconfig | 2 +- arch/mips/configs/malta_kvm_defconfig | 2 +- arch/mips/configs/malta_qemu_32r6_defconfig | 2 +- arch/mips/configs/maltaaprp_defconfig | 2 +- arch/mips/configs/maltasmvp_defconfig | 6 +++--- arch/mips/configs/maltasmvp_eva_defconfig | 2 +- arch/mips/configs/maltaup_defconfig | 2 +- arch/mips/configs/maltaup_xpa_defconfig | 2 +- arch/mips/configs/mtx1_defconfig | 6 +++--- arch/mips/configs/rm200_defconfig | 2 +- arch/openrisc/configs/or1klitex_defconfig | 2 +- arch/openrisc/configs/virt_defconfig | 4 ++-- arch/parisc/configs/generic-32bit_defconfig | 4 ++-- arch/parisc/configs/generic-64bit_defconfig | 4 ++-- arch/sh/configs/ap325rxa_defconfig | 6 +++--- arch/sh/configs/apsh4a3a_defconfig | 2 +- arch/sh/configs/apsh4ad0a_defconfig | 2 +- arch/sh/configs/ecovec24_defconfig | 6 +++--- arch/sh/configs/edosk7760_defconfig | 2 +- arch/sh/configs/espt_defconfig | 2 +- arch/sh/configs/landisk_defconfig | 2 +- arch/sh/configs/lboxre2_defconfig | 2 +- arch/sh/configs/magicpanelr2_defconfig | 4 ++-- arch/sh/configs/r7780mp_defconfig | 2 +- arch/sh/configs/r7785rp_defconfig | 2 +- arch/sh/configs/rsk7264_defconfig | 2 +- arch/sh/configs/rsk7269_defconfig | 2 +- arch/sh/configs/sdk7780_defconfig | 4 ++-- arch/sh/configs/sdk7786_defconfig | 2 +- arch/sh/configs/se7343_defconfig | 2 +- arch/sh/configs/se7712_defconfig | 2 +- arch/sh/configs/se7721_defconfig | 2 +- arch/sh/configs/se7722_defconfig | 2 +- arch/sh/configs/se7724_defconfig | 6 +++--- arch/sh/configs/sh03_defconfig | 4 ++-- arch/sh/configs/sh2007_defconfig | 2 +- arch/sh/configs/sh7757lcr_defconfig | 2 +- arch/sh/configs/sh7763rdp_defconfig | 2 +- arch/sh/configs/sh7785lcr_32bit_defconfig | 2 +- arch/sh/configs/sh7785lcr_defconfig | 2 +- arch/sh/configs/shx3_defconfig | 2 +- arch/sh/configs/titan_defconfig | 4 ++-- arch/sh/configs/ul2_defconfig | 2 +- arch/sh/configs/urquell_defconfig | 2 +- arch/sparc/configs/sparc64_defconfig | 6 +++--- arch/xtensa/configs/audio_kc705_defconfig | 2 +- arch/xtensa/configs/cadence_csp_defconfig | 2 +- arch/xtensa/configs/generic_kc705_defconfig | 2 +- arch/xtensa/configs/nommu_kc705_defconfig | 2 +- arch/xtensa/configs/smp_lx200_defconfig | 2 +- arch/xtensa/configs/virt_defconfig | 2 +- arch/xtensa/configs/xip_kc705_defconfig | 2 +- 109 files changed, 182 insertions(+), 182 deletions(-) diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index a7cd526dd7ca..f930396d9dae 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index afa6a348f444..6b779dee5ea0 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -86,7 +86,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 2bfa6371953c..a89b50d5369d 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -88,7 +88,7 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 1558e8e87767..1b8b2a098cda 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -77,7 +77,7 @@ CONFIG_DMADEVICES=y CONFIG_DW_AXI_DMAC=y CONFIG_IIO=y CONFIG_TI_ADC108S102=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index 03d9ac20baa9..b7120523e09a 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -74,7 +74,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_SERIAL=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index c09488992f13..4077abd5980c 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -81,7 +81,7 @@ CONFIG_MMC_DW=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig index 516689dc6cf1..9b263ea9a878 100644 --- a/arch/arm/configs/axm55xx_defconfig +++ b/arch/arm/configs/axm55xx_defconfig @@ -194,7 +194,7 @@ CONFIG_MAILBOX=y CONFIG_PL320_MBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index 27dc3bf6b124..4a8ac09843d7 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -154,8 +154,8 @@ CONFIG_PWM_BCM2835=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_FANOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index e2ddaca0f89d..673408a10888 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -228,7 +228,7 @@ CONFIG_PWM=y CONFIG_PWM_TIECAP=m CONFIG_PWM_TIEHRPWM=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index d76eb12d29a7..bb6c4748bfc8 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -95,8 +95,8 @@ CONFIG_RTC_DRV_MV=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 2248afaf35b5..7f3756d8b086 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -103,8 +103,8 @@ CONFIG_RTC_DRV_EP93XX=y CONFIG_DMADEVICES=y CONFIG_EP93XX_DMA=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 9a57763a8d38..0d55056c6f82 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -436,9 +436,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_AUTOFS_FS=y diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 3cb995b9616a..81199dddcde7 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -158,8 +158,8 @@ CONFIG_IXP4XX_NPE=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_OVERLAY_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig index 842a989baa27..f67e9cda73e2 100644 --- a/arch/arm/configs/mmp2_defconfig +++ b/arch/arm/configs/mmp2_defconfig @@ -53,7 +53,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MAX8925=y # CONFIG_RESET_CONTROLLER is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index fa06d98e43fc..e2d9f3610063 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -113,7 +113,7 @@ CONFIG_RTC_DRV_MOXART=y CONFIG_DMADEVICES=y CONFIG_MOXART_DMA=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index b523bc246c09..59b020e66a0b 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -268,7 +268,7 @@ CONFIG_PWM_ATMEL=m CONFIG_PWM_ATMEL_HLCDC_PWM=m CONFIG_PWM_ATMEL_TCB=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index 3343f72de7ea..55f4ab67a306 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -91,8 +91,8 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index 23dbb80fcc2e..d1742a7cae6a 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -168,7 +168,7 @@ CONFIG_MV_XOR=y CONFIG_STAGING=y CONFIG_FB_XGI=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index ea28ed8991b4..696b4fbc2412 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -116,7 +116,7 @@ CONFIG_IIO_ST_ACCEL_3AXIS=y CONFIG_PWM=y CONFIG_PWM_STMPE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 661e5d6894bd..24c54bf1e243 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -184,7 +184,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_OMAP=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 939913ed9a73..8f443c20872b 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -679,7 +679,7 @@ CONFIG_TWL4030_USB=m CONFIG_COUNTER=m CONFIG_TI_EQEP=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_SECURITY=y CONFIG_FANOTIFY=y CONFIG_QUOTA=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 62b9c6102789..c28426250ec3 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -115,8 +115,8 @@ CONFIG_RTC_DRV_M48T86=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_EXT4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 1a80602c1284..4b988a9e2768 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -580,9 +580,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index ec52ccece0ca..5a0513290547 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -295,7 +295,7 @@ CONFIG_INTERCONNECT_QCOM_MSM8974=m CONFIG_INTERCONNECT_QCOM_SDX55=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 24f1fa868230..46df453e224e 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -77,7 +77,7 @@ CONFIG_SOUND_VIDC=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_PCF8583=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/arm/configs/s3c6400_defconfig b/arch/arm/configs/s3c6400_defconfig index a37e6ac40825..4c635818973c 100644 --- a/arch/arm/configs/s3c6400_defconfig +++ b/arch/arm/configs/s3c6400_defconfig @@ -53,9 +53,9 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_S3C=y CONFIG_PWM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_CRAMFS=y diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig index e14720a9a5ac..e2ad9a05566f 100644 --- a/arch/arm/configs/sama7_defconfig +++ b/arch/arm/configs/sama7_defconfig @@ -201,7 +201,7 @@ CONFIG_MCHP_EIC=y CONFIG_RESET_CONTROLLER=y CONFIG_NVMEM_MICROCHIP_OTPC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_AUTOFS_FS=m CONFIG_VFAT_FS=y diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index 294906c8f16e..f2e42846b116 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -136,7 +136,7 @@ CONFIG_FPGA_REGION=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/arm/configs/spear13xx_defconfig b/arch/arm/configs/spear13xx_defconfig index a8f992fdb30d..8b19af1ea67c 100644 --- a/arch/arm/configs/spear13xx_defconfig +++ b/arch/arm/configs/spear13xx_defconfig @@ -84,8 +84,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=y CONFIG_MSDOS_FS=m diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig index 8dc5a388759c..b4e4b96a98af 100644 --- a/arch/arm/configs/spear3xx_defconfig +++ b/arch/arm/configs/spear3xx_defconfig @@ -67,8 +67,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig index 4e9e1a6ff381..7083b1bd8573 100644 --- a/arch/arm/configs/spear6xx_defconfig +++ b/arch/arm/configs/spear6xx_defconfig @@ -53,8 +53,8 @@ CONFIG_DMATEST=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index ac2a0f998c73..395df2f9dc8e 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -193,8 +193,8 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +# CONFIG_EXT4_FS_XATTR is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index dcd9c316072e..82190b155b14 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -69,7 +69,7 @@ CONFIG_STM32_MDMA=y CONFIG_IIO=y CONFIG_STM32_ADC_CORE=y CONFIG_STM32_ADC=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index ba863b445417..ab477ca13f89 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -319,9 +319,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index 0f55815eecb3..f3bc967a0f25 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -175,7 +175,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index cdb6065e04fd..b9454f6954f8 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig @@ -120,7 +120,7 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index c6108f000288..b132752693a9 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -46,10 +46,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 34eaee0384c9..f3e2c588e5df 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -929,9 +929,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 7787a4dd7c3c..f3268fed02fc 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -72,9 +72,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 176314f3c9aa..fbbdcb394ca2 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -73,7 +73,7 @@ CONFIG_FB_XILINX=y CONFIG_UIO=y CONFIG_UIO_PDRV_GENIRQ=y CONFIG_UIO_DMEM_GENIRQ=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_CRAMFS=y diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index 97d2cd997285..349e9e0b4f54 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -144,9 +144,9 @@ CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index b0b551efac7c..6ee9ee391fdc 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -59,9 +59,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index 85a4472cb058..52a63dd7aac7 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -133,9 +133,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index a3b2c8da2dde..59fb7ee5eeb0 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index a476717b8a6a..8be1cb433e95 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -129,9 +129,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index cdedbb8a8f53..b6fe3c962464 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -173,7 +173,7 @@ CONFIG_USB_ISIGHTFW=m CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_AUTOFS_FS=y diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 2decf8b98d31..e123848f94ab 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -232,9 +232,9 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_DS1286=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_QUOTA=y diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 5d079941fd20..1c10242b148b 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -272,9 +272,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/ip28_defconfig b/arch/mips/configs/ip28_defconfig index 6db21e498faa..755cbf20f5a5 100644 --- a/arch/mips/configs/ip28_defconfig +++ b/arch/mips/configs/ip28_defconfig @@ -49,9 +49,9 @@ CONFIG_WATCHDOG=y CONFIG_INDYDOG=y # CONFIG_VGA_CONSOLE is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set diff --git a/arch/mips/configs/ip30_defconfig b/arch/mips/configs/ip30_defconfig index a4524e785469..718f3060d9fa 100644 --- a/arch/mips/configs/ip30_defconfig +++ b/arch/mips/configs/ip30_defconfig @@ -143,9 +143,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index d8ac11427f69..7568838eb08b 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -89,9 +89,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 65adb538030d..a790c2610fd3 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -69,7 +69,7 @@ CONFIG_FB_G364=y CONFIG_FRAMEBUFFER_CONSOLE=y # CONFIG_HWMON is not set CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 5038a27d035f..8d3f20ed19b5 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -226,9 +226,9 @@ CONFIG_MMC=m CONFIG_LEDS_CLASS=y CONFIG_STAGING=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_XFS_FS=m diff --git a/arch/mips/configs/loongson1b_defconfig b/arch/mips/configs/loongson1b_defconfig index 68207b31dc20..a64a39447963 100644 --- a/arch/mips/configs/loongson1b_defconfig +++ b/arch/mips/configs/loongson1b_defconfig @@ -94,9 +94,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/loongson1c_defconfig b/arch/mips/configs/loongson1c_defconfig index c3910a9dee9e..86d7f64a164a 100644 --- a/arch/mips/configs/loongson1c_defconfig +++ b/arch/mips/configs/loongson1c_defconfig @@ -95,9 +95,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig index 0cc665d3ea34..aec1fd1902eb 100644 --- a/arch/mips/configs/loongson2k_defconfig +++ b/arch/mips/configs/loongson2k_defconfig @@ -298,9 +298,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 240efff37d98..575aaf242361 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -348,9 +348,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_POSIX_ACL=y CONFIG_QUOTA=y diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 9fcbac829920..81704ec67f09 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -313,7 +313,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 19102386a81c..82a97f58bce1 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 1b98f6945c2d..accb471a1d93 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 7b8905cb3400..6bda67c5f68f 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -149,7 +149,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 8249f6a51895..e4082537f80f 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -148,9 +148,9 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 21cb37668763..58f5af45fa98 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -152,7 +152,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 3df9cd669683..9bfef7de0d1c 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -148,7 +148,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 1dd07c9d1812..0f9ef20744f9 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -319,7 +319,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_JFS_FS=m CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index e4bcdb64df6c..b4dc2255a81c 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -594,9 +594,9 @@ CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 39a2419e1f3e..b507dc4dddd4 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -307,7 +307,7 @@ CONFIG_USB_SISUSBVGA=m CONFIG_USB_LD=m CONFIG_USB_TEST=m CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_AUTOFS_FS=m diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig index 3e849d25838a..fb1eb9a68bd6 100644 --- a/arch/openrisc/configs/or1klitex_defconfig +++ b/arch/openrisc/configs/or1klitex_defconfig @@ -38,7 +38,7 @@ CONFIG_MMC_LITEX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_LITEX_SOC_CONTROLLER=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_EXFAT_FS=y diff --git a/arch/openrisc/configs/virt_defconfig b/arch/openrisc/configs/virt_defconfig index c1b69166c500..01d685f3fb17 100644 --- a/arch/openrisc/configs/virt_defconfig +++ b/arch/openrisc/configs/virt_defconfig @@ -94,8 +94,8 @@ CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 94928d114d4c..52031bde9f17 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -232,8 +232,8 @@ CONFIG_AUXDISPLAY=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y CONFIG_QFMT_V2=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index d8cd7f858b2a..1aec04c09d0b 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -251,8 +251,8 @@ CONFIG_STAGING=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_BTRFS_FS=m CONFIG_QUOTA=y diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index b6f36c938f1d..336dbacd89bd 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -81,10 +81,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 9c2644443c4d..59daf99ea745 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -60,7 +60,7 @@ CONFIG_FONT_8x16=y CONFIG_LOGO=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index 137573610ec4..df2a669ea9d8 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -88,7 +88,7 @@ CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index e76694aace25..dd7e54c451d6 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -109,10 +109,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index f427a95bcd21..711db47b65ba 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -87,7 +87,7 @@ CONFIG_SND_SOC=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index da176f100e00..f8cad1e7a333 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -59,7 +59,7 @@ CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_AUTOFS_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 924bb3233b0b..08342ceee32e 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -93,7 +93,7 @@ CONFIG_USB_EMI62=m CONFIG_USB_EMI26=m CONFIG_USB_SISUSBVGA=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_ISO9660_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 0307bb2be79f..96a21173522d 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -49,7 +49,7 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 93b9aa32dc7c..af7f777b20be 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -64,9 +64,9 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index f28b8c4181c2..11f210517f76 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -74,7 +74,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index 3a4239f20ff1..ae367d7a14a8 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -69,7 +69,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index e4ef259425c4..3aba0102304f 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig @@ -59,7 +59,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_NFS_FS=y diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index e0d1560b2bfd..f82f280fc55a 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig @@ -43,7 +43,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_NFS_FS=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 9870d16d9711..3b51195bf1b5 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -102,9 +102,9 @@ CONFIG_LEDS_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 07894f13441e..ebb3f4420ae8 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -161,7 +161,7 @@ CONFIG_STAGING=y # CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_XFS_FS=y diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 75db12fb9ad1..6ef546ee8a32 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -84,7 +84,7 @@ CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_ISP116X_HCD=y CONFIG_UIO=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 8770a72e6a63..4cecf9c06a65 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -83,7 +83,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index b15c6406a0e8..c28057b70ad7 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -107,7 +107,7 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig index 5327a2f70980..88bfd953ef89 100644 --- a/arch/sh/configs/se7722_defconfig +++ b/arch/sh/configs/se7722_defconfig @@ -44,7 +44,7 @@ CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 9501e69eb886..e1b2616ef921 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -110,10 +110,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 4d75c92cac10..306e1661fbf5 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -57,9 +57,9 @@ CONFIG_WATCHDOG=y CONFIG_SH_WDT=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index cc6292b3235a..889daa5d2faa 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -95,7 +95,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_DMADEVICES=y CONFIG_TIMB_DMA=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index 48a0f9beb116..25e9d22779b3 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -64,7 +64,7 @@ CONFIG_MMC=y CONFIG_MMC_SDHI=y CONFIG_MMC_SH_MMCIF=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index b77b3313157e..85ec00b7dbd2 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -61,7 +61,7 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_AUTOFS_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 44f9b2317f09..e860a20d3f0f 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -113,7 +113,7 @@ CONFIG_RTC_DRV_RS5C372=y CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index aec74b0e7003..33c98b4a2adb 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -90,7 +90,7 @@ CONFIG_USB_TEST=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 9a0df5ea3866..3169e4dc7004 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -84,7 +84,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 8ef72b8dbcd3..6bff00038072 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -215,9 +215,9 @@ CONFIG_USB_SERIAL_PL2303=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +# CONFIG_EXT4_FS_XATTR is not set CONFIG_XFS_FS=m CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=m diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index 103b81ec1ffb..b89eb8f5cc5c 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -66,7 +66,7 @@ CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 00ef62133b04..60cb716ef195 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -114,7 +114,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_RTC_DRV_GENERIC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_BTRFS_FS=y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 7a7c4dec2925..200640b93e05 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -187,10 +187,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index f2af1a32c9c7..dc942bbac69f 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -103,7 +103,7 @@ CONFIG_SND_SIMPLE_CARD=y # CONFIG_USB_SUPPORT is not set CONFIG_COMMON_CLK_CDCE706=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index 88ed5284e21c..81a057f25f21 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -80,7 +80,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index 4427907becca..3ee7e1c56556 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -90,7 +90,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 5828228522ba..c6e96f0aa700 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -91,7 +91,7 @@ CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 326966ca7831..373d42b9e510 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -94,7 +94,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig index e37048985b47..72628d31e87a 100644 --- a/arch/xtensa/configs/virt_defconfig +++ b/arch/xtensa/configs/virt_defconfig @@ -76,7 +76,7 @@ CONFIG_LOGO=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_INPUT=y # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig index ee47438f9b51..5d6013ea70fc 100644 --- a/arch/xtensa/configs/xip_kc705_defconfig +++ b/arch/xtensa/configs/xip_kc705_defconfig @@ -82,7 +82,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FANOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y From ca88ecdce5f51874a7c151809bd2c936ee0d3805 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 8 Oct 2025 22:35:15 +0100 Subject: [PATCH 172/305] arm64: Revamp HCR_EL2.E2H RES1 detection We currently have two ways to identify CPUs that only implement FEAT_VHE and not FEAT_E2H0: - either they advertise it via ID_AA64MMFR4_EL1.E2H0, - or the HCR_EL2.E2H bit is RAO/WI However, there is a third category of "cpus" that fall between these two cases: on CPUs that do not implement FEAT_FGT, it is IMPDEF whether an access to ID_AA64MMFR4_EL1 can trap to EL2 when the register value is zero. A consequence of this is that on systems such as Neoverse V2, a NV guest cannot reliably detect that it is in a VHE-only configuration (E2H is writable, and ID_AA64MMFR0_EL1 is 0), despite the hypervisor's best effort to repaint the id register. Replace the RAO/WI test by a sequence that makes use of the VHE register remnapping between EL1 and EL2 to detect this situation, and work out whether we get the VHE behaviour even after having set HCR_EL2.E2H to 0. This solves the NV problem, and provides a more reliable acid test for CPUs that do not completely follow the letter of the architecture while providing a RES1 behaviour for HCR_EL2.E2H. Suggested-by: Mark Rutland Acked-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Oliver Upton Tested-by: Jan Kotas Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/15A85F2B-1A0C-4FA7-9FE4-EEC2203CC09E@global.cadence.com --- arch/arm64/include/asm/el2_setup.h | 38 +++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b37da3ee8529..99a7c0235e6d 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -24,22 +24,48 @@ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it * can reset into an UNKNOWN state and might not read as 1 until it has * been initialized explicitly. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but - * don't advertise it (they predate this relaxation). - * * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H * indicating whether the CPU is running in E2H mode. */ mrs_s x1, SYS_ID_AA64MMFR4_EL1 sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH cmp x1, #0 - b.ge .LnVHE_\@ + b.lt .LnE2H0_\@ + /* + * Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised + * as such via ID_AA64MMFR4_EL1.E2H0: + * + * - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to + * have HCR_EL2.E2H implemented as RAO/WI. + * + * - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest + * reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV + * guests on these hosts can write to HCR_EL2.E2H without + * trapping to the hypervisor, but these writes have no + * functional effect. + * + * Handle both cases by checking for an essential VHE property + * (system register remapping) to decide whether we're + * effectively VHE-only or not. + */ + msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE + isb + mov x1, #1 // Write something to FAR_EL1 + msr far_el1, x1 + isb + mov x1, #2 // Try to overwrite it via FAR_EL2 + msr far_el2, x1 + isb + mrs x1, far_el1 // If we see the latest write in FAR_EL1, + cmp x1, #2 // we can safely assume we are VHE only. + b.ne .LnVHE_\@ // Otherwise, we know that nVHE works. + +.LnE2H0_\@: orr x0, x0, #HCR_E2H -.LnVHE_\@: msr_hcr_el2 x0 isb +.LnVHE_\@: .endm .macro __init_el2_sctlr From 095232711f23179053ca26bcf046ca121a91a465 Mon Sep 17 00:00:00 2001 From: Francesco Valla Date: Fri, 3 Oct 2025 12:33:03 +0200 Subject: [PATCH 173/305] drm/draw: fix color truncation in drm_draw_fill24 The color parameter passed to drm_draw_fill24() was truncated to 16 bits, leading to an incorrect color drawn to the target iosys_map. Fix this behavior, widening the parameter to 32 bits. Fixes: 31fa2c1ca0b2 ("drm/panic: Move drawing functions to drm_draw") Signed-off-by: Francesco Valla Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20251003-drm_draw_fill24_fix-v1-1-8fb7c1c2a893@valla.it Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_draw.c | 2 +- drivers/gpu/drm/drm_draw_internal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_draw.c b/drivers/gpu/drm/drm_draw.c index 9dc0408fbbea..5b956229c82f 100644 --- a/drivers/gpu/drm/drm_draw.c +++ b/drivers/gpu/drm/drm_draw.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL(drm_draw_fill16); void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, - u16 color) + u32 color) { unsigned int y, x; diff --git a/drivers/gpu/drm/drm_draw_internal.h b/drivers/gpu/drm/drm_draw_internal.h index f121ee7339dc..20cb404e23ea 100644 --- a/drivers/gpu/drm/drm_draw_internal.h +++ b/drivers/gpu/drm/drm_draw_internal.h @@ -47,7 +47,7 @@ void drm_draw_fill16(struct iosys_map *dmap, unsigned int dpitch, void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, - u16 color); + u32 color); void drm_draw_fill32(struct iosys_map *dmap, unsigned int dpitch, unsigned int height, unsigned int width, From 2616222e423398bb374ffcb5d23dea4ba2c3e524 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 10 Oct 2025 12:21:42 +0530 Subject: [PATCH 174/305] amd-xgbe: Avoid spurious link down messages during interface toggle During interface toggle operations (ifdown/ifup), the driver currently resets the local helper variable 'phy_link' to -1. This causes the link state machine to incorrectly interpret the state as a link change event, resulting in spurious "Link is down" messages being logged when the interface is brought back up. Preserve the phy_link state across interface toggles to avoid treating the -1 sentinel value as a legitimate link state transition. Fixes: 88131a812b16 ("amd-xgbe: Perform phy connect/disconnect at dev open/stop") Signed-off-by: Raju Rangoju Reviewed-by: Dawid Osuchowski Link: https://patch.msgid.link/20251010065142.1189310-1-Raju.Rangoju@amd.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 1 - drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index f0989aa01855..4dc631af7933 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1080,7 +1080,6 @@ static void xgbe_free_rx_data(struct xgbe_prv_data *pdata) static int xgbe_phy_reset(struct xgbe_prv_data *pdata) { - pdata->phy_link = -1; pdata->phy_speed = SPEED_UNKNOWN; return pdata->phy_if.phy_reset(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 1a37ec45e650..7675bb98f029 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1555,6 +1555,7 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata) pdata->phy.duplex = DUPLEX_FULL; } + pdata->phy_link = 0; pdata->phy.link = 0; pdata->phy.pause_autoneg = pdata->pause_autoneg; From 7f38a1487555604bc4e210fa7cc9b1bce981c40e Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sun, 12 Oct 2025 07:20:01 -0700 Subject: [PATCH 175/305] drm/rockchip: vop2: use correct destination rectangle height check The vop2_plane_atomic_check() function incorrectly checks drm_rect_width(dest) twice instead of verifying both width and height. Fix the second condition to use drm_rect_height(dest) so that invalid destination rectangles with height < 4 are correctly rejected. Fixes: 604be85547ce ("drm/rockchip: Add VOP2 driver") Signed-off-by: Alok Tiwari Reviewed-by: Andy Yan Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20251012142005.660727-1-alok.a.tiwari@oracle.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index b50927a824b4..7ec7bea5e38e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1031,7 +1031,7 @@ static int vop2_plane_atomic_check(struct drm_plane *plane, return format; if (drm_rect_width(src) >> 16 < 4 || drm_rect_height(src) >> 16 < 4 || - drm_rect_width(dest) < 4 || drm_rect_width(dest) < 4) { + drm_rect_width(dest) < 4 || drm_rect_height(dest) < 4) { drm_err(vop2->drm, "Invalid size: %dx%d->%dx%d, min size is 4x4\n", drm_rect_width(src) >> 16, drm_rect_height(src) >> 16, drm_rect_width(dest), drm_rect_height(dest)); From 62685ab071de7c39499212bff19f1b5bc0148bc7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 1 Oct 2025 15:24:49 +0200 Subject: [PATCH 176/305] uprobe: Move arch_uprobe_optimize right after handlers execution It's less confusing to optimize uprobe right after handlers execution and before we do the check for changed ip register to avoid situations where changed ip register would skip uprobe optimization. Suggested-by: Linus Torvalds Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov --- kernel/events/uprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8709c69118b5..f11ceb8be8c4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2765,6 +2765,9 @@ static void handle_swbp(struct pt_regs *regs) handler_chain(uprobe, regs); + /* Try to optimize after first hit. */ + arch_uprobe_optimize(&uprobe->arch, bp_vaddr); + /* * If user decided to take execution elsewhere, it makes little sense * to execute the original instruction, so let's skip it. @@ -2772,9 +2775,6 @@ static void handle_swbp(struct pt_regs *regs) if (instruction_pointer(regs) != bp_vaddr) goto out; - /* Try to optimize after first hit. */ - arch_uprobe_optimize(&uprobe->arch, bp_vaddr); - if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; From ebfc8542ad62d066771e46c8aa30f5624b89cad8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:42 +0300 Subject: [PATCH 177/305] perf/core: Fix address filter match with backing files It was reported that Intel PT address filters do not work in Docker containers. That relates to the use of overlayfs. overlayfs records the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. In order for an address filter to match, it must compare to the user file inode. There is an existing helper file_user_inode() for that situation. Use file_user_inode() instead of file_inode() to get the inode for address filter matching. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record --buildid-mmap -e intel_pt//u --filter 'filter * @ /root/test/merged/cat' -- /root/test/merged/cat /proc/self/maps ... 55d61d246000-55d61d2e1000 r-xp 00018000 00:1a 3418 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB perf.data ] # perf buildid-cache --add /root/test/merged/cat Before: Address filter does not match so there are no control flow packets # perf script --itrace=e # perf script --itrace=b | wc -l 0 # perf script -D | grep 'TIP.PGE' | wc -l 0 # After: Address filter does match so there are control flow packets # perf script --itrace=e # perf script --itrace=b | wc -l 235 # perf script -D | grep 'TIP.PGE' | wc -l 57 # With respect to stable kernels, overlayfs mmap function ovl_mmap() was added in v4.19 but file_user_inode() was not added until v6.8 and never back-ported to stable kernels. FMODE_BACKING that it depends on was added in v6.5. This issue has gone largely unnoticed, so back-porting before v6.8 is probably not worth it, so put 6.8 as the stable kernel prerequisite version, although in practice the next long term kernel is 6.12. Closes: https://lore.kernel.org/linux-perf-users/aBCwoq7w8ohBRQCh@fremen.lan Reported-by: Edd Barrett Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org # 6.8 --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7541f6f85fcb..cd63ec84e386 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9492,7 +9492,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, if (!filter->path.dentry) return false; - if (d_inode(filter->path.dentry) != file_inode(file)) + if (d_inode(filter->path.dentry) != file_user_inode(file)) return false; if (filter->offset > offset + size) From 8818f507a9391019a3ec7c57b1a32e4b386e48a5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:43 +0300 Subject: [PATCH 178/305] perf/core: Fix MMAP event path names with backing files Some file systems like FUSE-based ones or overlayfs may record the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. Since commit def3ae83da02f ("fs: store real path instead of fake path in backing file f_path"), file_path() no longer returns the user file path when applied to a backing file. There is an existing helper file_user_path() for that situation. Use file_user_path() instead of file_path() to get the path for MMAP and MMAP2 events. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record -e intel_pt//u -- /root/test/merged/cat /proc/self/maps ... 55b0ba399000-55b0ba434000 r-xp 00018000 00:1a 3419 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.060 MB perf.data ] # Before: File name is wrong (/cat), so decoding fails: # perf script --no-itrace --show-mmap-events cat 367 [016] 100.491492: PERF_RECORD_MMAP2 367/367: [0x55b0ba399000(0x9b000) @ 0x18000 00:02 3419 489959280]: r-xp /cat ... # perf script --itrace=e | wc -l Warning: 19 instruction trace errors 19 # After: File name is correct (/root/test/merged/cat), so decoding is ok: # perf script --no-itrace --show-mmap-events cat 364 [016] 72.153006: PERF_RECORD_MMAP2 364/364: [0x55ce4003d000(0x9b000) @ 0x18000 00:02 3419 3132534314]: r-xp /root/test/merged/cat # perf script --itrace=e # perf script --itrace=e | wc -l 0 # Fixes: def3ae83da02f ("fs: store real path instead of fake path in backing file f_path") Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index cd63ec84e386..7b5c2373a8d7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9416,7 +9416,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ - name = file_path(file, buf, PATH_MAX - sizeof(u64)); + name = d_path(file_user_path(file), buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { name = "//toolong"; goto cpy_name; From fa4f4bae893fbce8a3edfff1ab7ece0c01dc1328 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:44 +0300 Subject: [PATCH 179/305] perf/core: Fix MMAP2 event device with backing files Some file systems like FUSE-based ones or overlayfs may record the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. That causes perf to misreport the device major/minor numbers of the file system of the file, and the generation of the file, and potentially other inode details. There is an existing helper file_user_inode() for that situation. Use file_user_inode() instead of file_inode() to get the inode for MMAP2 events. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record -e cycles:u -- /root/test/merged/cat /proc/self/maps ... 55b2c91d0000-55b2c926b000 r-xp 00018000 00:1a 3419 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.004 MB perf.data (5 samples) ] # # stat /root/test/merged/cat File: /root/test/merged/cat Size: 1127792 Blocks: 2208 IO Block: 4096 regular file Device: 0,26 Inode: 3419 Links: 1 Access: (0755/-rwxr-xr-x) Uid: ( 0/ root) Gid: ( 0/ root) Access: 2025-09-08 12:23:59.453309624 +0000 Modify: 2025-09-08 12:23:59.454309624 +0000 Change: 2025-09-08 12:23:59.454309624 +0000 Birth: 2025-09-08 12:23:59.453309624 +0000 Before: Device reported 00:02 differs from stat output and /proc/self/maps # perf script --show-mmap-events | grep /root/test/merged/cat cat 377 [-01] 243.078558: PERF_RECORD_MMAP2 377/377: [0x55b2c91d0000(0x9b000) @ 0x18000 00:02 3419 2068525940]: r-xp /root/test/merged/cat After: Device reported 00:1a is the same as stat output and /proc/self/maps # perf script --show-mmap-events | grep /root/test/merged/cat cat 362 [-01] 127.755167: PERF_RECORD_MMAP2 362/362: [0x55ba6e781000(0x9b000) @ 0x18000 00:1a 3419 0]: r-xp /root/test/merged/cat With respect to stable kernels, overlayfs mmap function ovl_mmap() was added in v4.19 but file_user_inode() was not added until v6.8 and never back-ported to stable kernels. FMODE_BACKING that it depends on was added in v6.5. This issue has gone largely unnoticed, so back-porting before v6.8 is probably not worth it, so put 6.8 as the stable kernel prerequisite version, although in practice the next long term kernel is 6.12. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org # 6.8 --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7b5c2373a8d7..177e57c1a362 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9403,7 +9403,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) flags |= MAP_HUGETLB; if (file) { - struct inode *inode; + const struct inode *inode; dev_t dev; buf = kmalloc(PATH_MAX, GFP_KERNEL); @@ -9421,7 +9421,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) name = "//toolong"; goto cpy_name; } - inode = file_inode(vma->vm_file); + inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; gen = inode->i_generation; From 6c26c055523d915afb8d18e7277848eff66a3085 Mon Sep 17 00:00:00 2001 From: Xinpeng Sun Date: Thu, 9 Oct 2025 11:31:08 +0800 Subject: [PATCH 180/305] HID: intel-thc-hid: intel-quicki2c: Fix wrong type casting The type definition of qcdev->i2c_max_frame_size is already u32, so remove the unnecessary type casting le16_to_cpu. Signed-off-by: Xinpeng Sun Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509280841.pxmgBzKW-lkp@intel.com/ Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c index 8433a991e7f4..0156ab391778 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c @@ -466,7 +466,7 @@ static void quicki2c_dma_adv_enable(struct quicki2c_device *qcdev) dev_warn(qcdev->dev, "Max frame size is smaller than hid max input length!"); thc_i2c_set_rx_max_size(qcdev->thc_hw, - le16_to_cpu(qcdev->i2c_max_frame_size)); + qcdev->i2c_max_frame_size); } thc_i2c_rx_max_size_enable(qcdev->thc_hw, true); } From 8fe2cd8ec84b3592b57f40b080f9d5aeebd553af Mon Sep 17 00:00:00 2001 From: Even Xu Date: Fri, 19 Sep 2025 15:09:39 +0800 Subject: [PATCH 181/305] HID: intel-thc-hid: Intel-quickspi: switch first interrupt from level to edge detection The original implementation used level detection for the first interrupt after device reset to avoid potential interrupt line noise and missed interrupts during the initialization phase. However, this approach introduced unintended side effects when tested with certain touch panels, including: - Delayed hardware interrupt response - Multiple spurious interrupt triggers Switching back to edge detection for the first interrupt resolves these issues while maintaining reliable interrupt handling. Extensive testing across multiple platforms with touch panels from various vendors confirms this change introduces no regressions. [jkosina@suse.com: properly capitalize shortlog] Fixes: 9d8d51735a3a ("HID: intel-thc-hid: intel-quickspi: Add HIDSPI protocol implementation") Tested-by: Rui Zhang Signed-off-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c index e6ba2ddcc9cb..16f780bc879b 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c @@ -280,8 +280,7 @@ int reset_tic(struct quickspi_device *qsdev) qsdev->reset_ack = false; - /* First interrupt uses level trigger to avoid missing interrupt */ - thc_int_trigger_type_select(qsdev->thc_hw, false); + thc_int_trigger_type_select(qsdev->thc_hw, true); ret = acpi_tic_reset(qsdev); if (ret) From 50f1f782f8d621a90108340c632bcb6ab4307d2e Mon Sep 17 00:00:00 2001 From: Abhishek Tamboli Date: Wed, 24 Sep 2025 10:07:20 +0530 Subject: [PATCH 182/305] HID: intel-thc-hid: intel-quickspi: Add ARL PCI Device Id's Add the missing PCI ID for the quickspi device used on the Lenovo Yoga Pro 9i 16IAH10. Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=220567 Signed-off-by: Abhishek Tamboli Reviewed-by: Even Xu Signed-off-by: Jiri Kosina --- drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c | 6 ++++++ drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c index 84314989dc53..14cabd5dc6dd 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c +++ b/drivers/hid/intel-thc-hid/intel-quickspi/pci-quickspi.c @@ -33,6 +33,10 @@ struct quickspi_driver_data ptl = { .max_packet_size_value = MAX_PACKET_SIZE_VALUE_LNL, }; +struct quickspi_driver_data arl = { + .max_packet_size_value = MAX_PACKET_SIZE_VALUE_MTL, +}; + /* THC QuickSPI ACPI method to get device properties */ /* HIDSPI Method: {6e2ac436-0fcf-41af-a265-b32a220dcfab} */ static guid_t hidspi_guid = @@ -978,6 +982,8 @@ static const struct pci_device_id quickspi_pci_tbl[] = { {PCI_DEVICE_DATA(INTEL, THC_PTL_U_DEVICE_ID_SPI_PORT2, &ptl), }, {PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_SPI_PORT1, &ptl), }, {PCI_DEVICE_DATA(INTEL, THC_WCL_DEVICE_ID_SPI_PORT2, &ptl), }, + {PCI_DEVICE_DATA(INTEL, THC_ARL_DEVICE_ID_SPI_PORT1, &arl), }, + {PCI_DEVICE_DATA(INTEL, THC_ARL_DEVICE_ID_SPI_PORT2, &arl), }, {} }; MODULE_DEVICE_TABLE(pci, quickspi_pci_tbl); diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h index f3532d866749..c30e1a42eb09 100644 --- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h +++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-dev.h @@ -21,6 +21,8 @@ #define PCI_DEVICE_ID_INTEL_THC_PTL_U_DEVICE_ID_SPI_PORT2 0xE44B #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_SPI_PORT1 0x4D49 #define PCI_DEVICE_ID_INTEL_THC_WCL_DEVICE_ID_SPI_PORT2 0x4D4B +#define PCI_DEVICE_ID_INTEL_THC_ARL_DEVICE_ID_SPI_PORT1 0x7749 +#define PCI_DEVICE_ID_INTEL_THC_ARL_DEVICE_ID_SPI_PORT2 0x774B /* HIDSPI special ACPI parameters DSM methods */ #define ACPI_QUICKSPI_REVISION_NUM 2 From 362f21536966d7039da1de762f28f4ad44565acc Mon Sep 17 00:00:00 2001 From: Deepak Sharma Date: Fri, 26 Sep 2025 20:28:11 +0530 Subject: [PATCH 183/305] HID: cp2112: Add parameter validation to data length Syzkaller reported a stack OOB access in cp2112_write_req caused by lack of parameter validation for the user input in I2C SMBUS ioctl in cp2112 driver Add the parameter validation for the data->block[0] to be bounded by I2C_SMBUS_BLOCK_MAX + the additional compatibility padding [jkosina@suse.com: fix whitespace damage] Reported-by: syzbot+7617e19c8a59edfbd879@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7617e19c8a59edfbd879 Tested-by: syzbot+7617e19c8a59edfbd879@syzkaller.appspotmail.com Signed-off-by: Deepak Sharma Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 5a95ea3bec98..803b883ae875 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -689,7 +689,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, count = cp2112_write_read_req(buf, addr, read_length, command, NULL, 0); } else { - count = cp2112_write_req(buf, addr, command, + /* Copy starts from data->block[1] so the length can + * be at max I2C_SMBUS_CLOCK_MAX + 1 + */ + + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_req(buf, addr, command, data->block + 1, data->block[0]); } @@ -700,7 +707,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, I2C_SMBUS_BLOCK_MAX, command, NULL, 0); } else { - count = cp2112_write_req(buf, addr, command, + /* data_length here is data->block[0] + 1 + * so make sure that the data->block[0] is + * less than or equals I2C_SMBUS_BLOCK_MAX + 1 + */ + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_req(buf, addr, command, data->block, data->block[0] + 1); } @@ -709,7 +723,14 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, size = I2C_SMBUS_BLOCK_DATA; read_write = I2C_SMBUS_READ; - count = cp2112_write_read_req(buf, addr, I2C_SMBUS_BLOCK_MAX, + /* data_length is data->block[0] + 1, so + * so data->block[0] should be less than or + * equal to the I2C_SMBUS_BLOCK_MAX + 1 + */ + if (data->block[0] > I2C_SMBUS_BLOCK_MAX + 1) + count = -EINVAL; + else + count = cp2112_write_read_req(buf, addr, I2C_SMBUS_BLOCK_MAX, command, data->block, data->block[0] + 1); break; From c5705a2a4aa35350e504b72a94b5c71c3754833c Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 10 Oct 2025 13:42:39 -0700 Subject: [PATCH 184/305] Octeontx2-af: Fix missing error code in cgx_probe() When CGX fails mapping to NIX, set the error code to -ENODEV, currently err is zero and that is treated as success path. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aLAdlCg2_Yv7Y-3h@stanley.mountain/ Fixes: d280233fc866 ("Octeontx2-af: Fix NIX X2P calibration failures") Signed-off-by: Harshit Mogalapalli Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251010204239.94237-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index d374a4454836..ec0e11c77cbf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1981,6 +1981,7 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) !is_cgx_mapped_to_nix(pdev->subsystem_device, cgx->cgx_id)) { dev_notice(dev, "CGX %d not mapped to NIX, skipping probe\n", cgx->cgx_id); + err = -ENODEV; goto err_release_regions; } From 0be4253bf878d9aaa2b96031ac8683fceeb81480 Mon Sep 17 00:00:00 2001 From: Tristan Lobb Date: Sun, 28 Sep 2025 18:25:43 +0200 Subject: [PATCH 185/305] HID: quirks: avoid Cooler Master MM712 dongle wakeup bug The Cooler Master Mice Dongle includes a vendor defined HID interface alongside its mouse interface. Not polling it will cause the mouse to stop responding to polls on any interface once woken up again after going into power saving mode. Add the HID_QUIRK_ALWAYS_POLL quirk alongside the Cooler Master VID and the Dongle's PID. Signed-off-by: Tristan Lobb Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5721b8414bbd..d05a62bbafff 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -342,6 +342,9 @@ #define USB_DEVICE_ID_CODEMERCS_IOW_FIRST 0x1500 #define USB_DEVICE_ID_CODEMERCS_IOW_LAST 0x15ff +#define USB_VENDOR_ID_COOLER_MASTER 0x2516 +#define USB_DEVICE_ID_COOLER_MASTER_MICE_DONGLE 0x01b7 + #define USB_VENDOR_ID_CORSAIR 0x1b1c #define USB_DEVICE_ID_CORSAIR_K90 0x1b02 #define USB_DEVICE_ID_CORSAIR_K70R 0x1b09 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index ffd034566e2e..d7105a839598 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -57,6 +57,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, + { HID_USB_DEVICE(USB_VENDOR_ID_COOLER_MASTER, USB_DEVICE_ID_COOLER_MASTER_MICE_DONGLE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, From 1141ed52348d3df82d3fd2316128b3fc6203a68c Mon Sep 17 00:00:00 2001 From: Oleg Makarenko Date: Mon, 29 Sep 2025 18:46:11 +0300 Subject: [PATCH 186/305] HID: quirks: Add ALWAYS_POLL quirk for VRS R295 steering wheel This patch adds ALWAYS_POLL quirk for the VRS R295 steering wheel joystick. This device reboots itself every 8-10 seconds if it is not polled. Signed-off-by: Oleg Makarenko Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d05a62bbafff..0723b4b1c9ec 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1435,6 +1435,7 @@ #define USB_VENDOR_ID_VRS 0x0483 #define USB_DEVICE_ID_VRS_DFP 0xa355 +#define USB_DEVICE_ID_VRS_R295 0xa44c #define USB_VENDOR_ID_VTL 0x0306 #define USB_DEVICE_ID_VTL_MULTITOUCH_FF3F 0xff3f diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index d7105a839598..bcd4bccf1a7c 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -207,6 +207,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_WP5540), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_VRS, USB_DEVICE_ID_VRS_R295), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT }, From 327cd4b68b4398b6c24f10eb2b2533ffbfc10185 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Sat, 11 Oct 2025 15:05:18 +0800 Subject: [PATCH 187/305] usbnet: Fix using smp_processor_id() in preemptible code warnings Syzbot reported the following warning: BUG: using smp_processor_id() in preemptible [00000000] code: dhcpcd/2879 caller is usbnet_skb_return+0x74/0x490 drivers/net/usb/usbnet.c:331 CPU: 1 UID: 0 PID: 2879 Comm: dhcpcd Not tainted 6.15.0-rc4-syzkaller-00098-g615dca38c2ea #0 PREEMPT(voluntary) Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x16c/0x1f0 lib/dump_stack.c:120 check_preemption_disabled+0xd0/0xe0 lib/smp_processor_id.c:49 usbnet_skb_return+0x74/0x490 drivers/net/usb/usbnet.c:331 usbnet_resume_rx+0x4b/0x170 drivers/net/usb/usbnet.c:708 usbnet_change_mtu+0x1be/0x220 drivers/net/usb/usbnet.c:417 __dev_set_mtu net/core/dev.c:9443 [inline] netif_set_mtu_ext+0x369/0x5c0 net/core/dev.c:9496 netif_set_mtu+0xb0/0x160 net/core/dev.c:9520 dev_set_mtu+0xae/0x170 net/core/dev_api.c:247 dev_ifsioc+0xa31/0x18d0 net/core/dev_ioctl.c:572 dev_ioctl+0x223/0x10e0 net/core/dev_ioctl.c:821 sock_do_ioctl+0x19d/0x280 net/socket.c:1204 sock_ioctl+0x42f/0x6a0 net/socket.c:1311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl fs/ioctl.c:892 [inline] __x64_sys_ioctl+0x190/0x200 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x260 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f For historical and portability reasons, the netif_rx() is usually run in the softirq or interrupt context, this commit therefore add local_bh_disable/enable() protection in the usbnet_resume_rx(). Fixes: 43daa96b166c ("usbnet: Stop RX Q on MTU change") Link: https://syzkaller.appspot.com/bug?id=81f55dfa587ee544baaaa5a359a060512228c1e1 Suggested-by: Jakub Kicinski Signed-off-by: Zqiang Link: https://patch.msgid.link/20251011070518.7095-1-qiang.zhang@linux.dev Signed-off-by: Paolo Abeni --- drivers/net/usb/usbnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 511c4154cf74..bf01f2728531 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -702,6 +702,7 @@ void usbnet_resume_rx(struct usbnet *dev) struct sk_buff *skb; int num = 0; + local_bh_disable(); clear_bit(EVENT_RX_PAUSED, &dev->flags); while ((skb = skb_dequeue(&dev->rxq_pause)) != NULL) { @@ -710,6 +711,7 @@ void usbnet_resume_rx(struct usbnet *dev) } queue_work(system_bh_wq, &dev->bh_work); + local_bh_enable(); netif_dbg(dev, rx_status, dev->net, "paused rx queue disabled, %d skbs requeued\n", num); From 1d64624243af8329b4b219d8c39e28ea448f9929 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 6 Oct 2025 18:05:31 -0700 Subject: [PATCH 188/305] HID: core: Add printk_ratelimited variants to hid_warn() etc hid_warn_ratelimited() is needed. Add the others as part of the block. Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- include/linux/hid.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/hid.h b/include/linux/hid.h index e1b673ad7457..a4ddb94e3ee5 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1292,4 +1292,15 @@ void hid_quirks_exit(__u16 bus); #define hid_dbg_once(hid, fmt, ...) \ dev_dbg_once(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_err_ratelimited(hid, fmt, ...) \ + dev_err_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_notice_ratelimited(hid, fmt, ...) \ + dev_notice_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_warn_ratelimited(hid, fmt, ...) \ + dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_info_ratelimited(hid, fmt, ...) \ + dev_info_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_dbg_ratelimited(hid, fmt, ...) \ + dev_dbg_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) + #endif From b73bc6a51f0c0066912c7e181acee41091c70fe6 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 6 Oct 2025 18:05:32 -0700 Subject: [PATCH 189/305] HID: nintendo: Wait longer for initial probe Some third-party controllers, such as the PB Tails CHOC, won't always respond quickly on startup. Since this packet is needed for probe, and only once during probe, let's just wait an extra second, which makes connecting consistent. Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index fb4985988615..e3e54f1df44f 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -2420,7 +2420,7 @@ static int joycon_read_info(struct joycon_ctlr *ctlr) struct joycon_input_report *report; req.subcmd_id = JC_SUBCMD_REQ_DEV_INFO; - ret = joycon_send_subcmd(ctlr, &req, 0, HZ); + ret = joycon_send_subcmd(ctlr, &req, 0, 2 * HZ); if (ret) { hid_err(ctlr->hdev, "Failed to get joycon info; ret=%d\n", ret); return ret; From b8874720b2f33a06ff1d4cf3827e7ec1195cb360 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Mon, 6 Oct 2025 18:05:33 -0700 Subject: [PATCH 190/305] HID: nintendo: Rate limit IMU compensation message Some controllers are very bad at updating the IMU, leading to these messages spamming the syslog. Rate-limiting them helps with this a bit. Signed-off-by: Vicki Pfau Signed-off-by: Jiri Kosina --- drivers/hid/hid-nintendo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index e3e54f1df44f..c2849a541f65 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -1455,10 +1455,10 @@ static void joycon_parse_imu_report(struct joycon_ctlr *ctlr, ctlr->imu_avg_delta_ms; ctlr->imu_timestamp_us += 1000 * ctlr->imu_avg_delta_ms; if (dropped_pkts > JC_IMU_DROPPED_PKT_WARNING) { - hid_warn(ctlr->hdev, + hid_warn_ratelimited(ctlr->hdev, "compensating for %u dropped IMU reports\n", dropped_pkts); - hid_warn(ctlr->hdev, + hid_warn_ratelimited(ctlr->hdev, "delta=%u avg_delta=%u\n", delta, ctlr->imu_avg_delta_ms); } From 75527d61d60d493d1eb064f335071a20ca581f54 Mon Sep 17 00:00:00 2001 From: Yi Cong Date: Sat, 11 Oct 2025 16:24:15 +0800 Subject: [PATCH 191/305] r8152: add error handling in rtl8152_driver_init rtl8152_driver_init() is missing the error handling. When rtl8152_driver registration fails, rtl8152_cfgselector_driver should be deregistered. Fixes: ec51fbd1b8a2 ("r8152: add USB device driver for config selection") Cc: stable@vger.kernel.org Signed-off-by: Yi Cong Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251011082415.580740-1-yicongsrfy@163.com [pabeni@redhat.com: clarified the commit message] Signed-off-by: Paolo Abeni --- drivers/net/usb/r8152.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 44cba7acfe7d..a22d4bb2cf3b 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10122,7 +10122,12 @@ static int __init rtl8152_driver_init(void) ret = usb_register_device_driver(&rtl8152_cfgselector_driver, THIS_MODULE); if (ret) return ret; - return usb_register(&rtl8152_driver); + + ret = usb_register(&rtl8152_driver); + if (ret) + usb_deregister_device_driver(&rtl8152_cfgselector_driver); + + return ret; } static void __exit rtl8152_driver_exit(void) From 083a4f3f3cc7d107728c8f297e4f6276f0876b2d Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Mon, 13 Oct 2025 20:54:57 +0000 Subject: [PATCH 192/305] HID: Kconfig: Fix build error from CONFIG_HID_HAPTIC Temporarily change CONFIG_HID_HAPTIC to be bool instead of tristate, until we implement a permanent solution. Recently the CONFIG_HID_HAPTIC Kconfig option was reported as causing the following build errors: MODPOST Module.symvers ERROR: modpost: "hid_haptic_init" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_pressure_increase" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_check_pressure_unit" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_input_configured" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_input_mapping" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_feature_mapping" [drivers/hid/hid-multitouch.ko] undefined! ERROR: modpost: "hid_haptic_pressure_reset" [drivers/hid/hid-multitouch.ko] undefined! make[3]: *** [/home/thl/var/linux.dev/scripts/Makefile.modpost:147: Module.symvers] Error 1 when the kernel is compiled with the following configuration: CONFIG_HID=y CONFIG_HID_MULTITOUCH=m CONFIG_HID_HAPTIC=m To resolve this, temporarily change the CONFIG_HID_HAPTIC option to be bool, until we arrive at a permanent solution to enable CONFIG_HID_HAPTIC to be tristate. For a more detailed discussion, see [1]. [1]: https://lore.kernel.org/linux-input/auypydfkhx2eg7vp764way4batdilzc35inqda3exwzs3tk3ff@oagat6g46zto/ Signed-off-by: Jonathan Denose Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 7ff85c7200e5..986de05a9787 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -93,7 +93,7 @@ config HID_GENERIC If unsure, say Y. config HID_HAPTIC - tristate "Haptic touchpad support" + bool "Haptic touchpad support" default n help Support for touchpads with force sensors and haptic actuators instead of a From 295ce1eb36ae47dc862d6c8a1012618a25516208 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 11 Oct 2025 11:57:42 +0000 Subject: [PATCH 193/305] tcp: fix tcp_tso_should_defer() vs large RTT Neal reported that using neper tcp_stream with TCP_TX_DELAY set to 50ms would often lead to flows stuck in a small cwnd mode, regardless of the congestion control. While tcp_stream sets TCP_TX_DELAY too late after the connect(), it highlighted two kernel bugs. The following heuristic in tcp_tso_should_defer() seems wrong for large RTT: delta = tp->tcp_clock_cache - head->tstamp; /* If next ACK is likely to come too late (half srtt), do not defer */ if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0) goto send_now; If next ACK is expected to come in more than 1 ms, we should not defer because we prefer a smooth ACK clocking. While blamed commit was a step in the good direction, it was not generic enough. Another patch fixing TCP_TX_DELAY for established flows will be proposed when net-next reopens. Fixes: 50c8339e9299 ("tcp: tso: restore IW10 after TSO autosizing") Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Tested-by: Neal Cardwell Link: https://patch.msgid.link/20251011115742.1245771-1-edumazet@google.com [pabeni@redhat.com: fixed whitespace issue] Signed-off-by: Paolo Abeni --- net/ipv4/tcp_output.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bb3576ac0ad7..b94efb3050d2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2369,7 +2369,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, u32 max_segs) { const struct inet_connection_sock *icsk = inet_csk(sk); - u32 send_win, cong_win, limit, in_flight; + u32 send_win, cong_win, limit, in_flight, threshold; + u64 srtt_in_ns, expected_ack, how_far_is_the_ack; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *head; int win_divisor; @@ -2431,9 +2432,19 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, head = tcp_rtx_queue_head(sk); if (!head) goto send_now; - delta = tp->tcp_clock_cache - head->tstamp; - /* If next ACK is likely to come too late (half srtt), do not defer */ - if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0) + + srtt_in_ns = (u64)(NSEC_PER_USEC >> 3) * tp->srtt_us; + /* When is the ACK expected ? */ + expected_ack = head->tstamp + srtt_in_ns; + /* How far from now is the ACK expected ? */ + how_far_is_the_ack = expected_ack - tp->tcp_clock_cache; + + /* If next ACK is likely to come too late, + * ie in more than min(1ms, half srtt), do not defer. + */ + threshold = min(srtt_in_ns >> 1, NSEC_PER_MSEC); + + if ((s64)(how_far_is_the_ack - threshold) > 0) goto send_now; /* Ok, it looks like it is advisable to defer. From bd5afca115f181c85f992d42a57cd497bc823ccb Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 12 Oct 2025 11:19:44 +0200 Subject: [PATCH 194/305] net: airoha: Take into account out-of-order tx completions in airoha_dev_xmit() Completion napi can free out-of-order tx descriptors if hw QoS is enabled and packets with different priority are queued to same DMA ring. Take into account possible out-of-order reports checking if the tx queue is full using circular buffer head/tail pointer instead of the number of queued packets. Fixes: 23020f0493270 ("net: airoha: Introduce ethernet support for EN7581 SoC") Suggested-by: Simon Horman Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251012-airoha-tx-busy-queue-v2-1-a600b08bab2d@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/airoha/airoha_eth.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 833dd911980b..433a646e9831 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1873,6 +1873,20 @@ static u32 airoha_get_dsa_tag(struct sk_buff *skb, struct net_device *dev) #endif } +static bool airoha_dev_tx_queue_busy(struct airoha_queue *q, u32 nr_frags) +{ + u32 tail = q->tail <= q->head ? q->tail + q->ndesc : q->tail; + u32 index = q->head + nr_frags; + + /* completion napi can free out-of-order tx descriptors if hw QoS is + * enabled and packets with different priorities are queued to the same + * DMA ring. Take into account possible out-of-order reports checking + * if the tx queue is full using circular buffer head/tail pointers + * instead of the number of queued packets. + */ + return index >= tail; +} + static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -1926,7 +1940,7 @@ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb, txq = netdev_get_tx_queue(dev, qid); nr_frags = 1 + skb_shinfo(skb)->nr_frags; - if (q->queued + nr_frags > q->ndesc) { + if (airoha_dev_tx_queue_busy(q, nr_frags)) { /* not enough space in the queue */ netif_tx_stop_queue(txq); spin_unlock_bh(&q->lock); From a7cdc2086c19e435d4cec3f9393b5f46899c0468 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 28 Sep 2025 22:01:18 +0100 Subject: [PATCH 195/305] HID: hid-debug: Fix spelling mistake "Rechargable" -> "Rechargeable" There is a spelling mistake in HID description. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 7107071c7c51..337d2dc81b4c 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -2523,7 +2523,7 @@ static const struct hid_usage_entry hid_usage_table[] = { { 0x85, 0x0088, "iDeviceName" }, { 0x85, 0x0089, "iDeviceChemistry" }, { 0x85, 0x008a, "ManufacturerData" }, - { 0x85, 0x008b, "Rechargable" }, + { 0x85, 0x008b, "Rechargeable" }, { 0x85, 0x008c, "WarningCapacityLimit" }, { 0x85, 0x008d, "CapacityGranularity1" }, { 0x85, 0x008e, "CapacityGranularity2" }, From ee6e44dfe6e50b4a5df853d933a96bdff5309e6e Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Fri, 10 Oct 2025 00:17:27 +0530 Subject: [PATCH 196/305] sched/deadline: Stop dl_server before CPU goes offline IBM CI tool reported kernel warning[1] when running a CPU removal operation through drmgr[2]. i.e "drmgr -c cpu -r -q 1" WARNING: CPU: 0 PID: 0 at kernel/sched/cpudeadline.c:219 cpudl_set+0x58/0x170 NIP [c0000000002b6ed8] cpudl_set+0x58/0x170 LR [c0000000002b7cb8] dl_server_timer+0x168/0x2a0 Call Trace: [c000000002c2f8c0] init_stack+0x78c0/0x8000 (unreliable) [c0000000002b7cb8] dl_server_timer+0x168/0x2a0 [c00000000034df84] __hrtimer_run_queues+0x1a4/0x390 [c00000000034f624] hrtimer_interrupt+0x124/0x300 [c00000000002a230] timer_interrupt+0x140/0x320 Git bisects to: commit 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck") This happens since: - dl_server hrtimer gets enqueued close to cpu offline, when kthread_park enqueues a fair task. - CPU goes offline and drmgr removes it from cpu_present_mask. - hrtimer fires and warning is hit. Fix it by stopping the dl_server before CPU is marked dead. [1]: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com/ [2]: https://github.com/ibm-power-utilities/powerpc-utils/tree/next/src/drmgr [sshegde: wrote the changelog and tested it] Fixes: 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck") Closes: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com Signed-off-by: Peter Zijlstra (Intel) Reported-by: Venkat Rao Bagalkote Signed-off-by: Shrikanth Hegde Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marek Szyprowski Tested-by: Shrikanth Hegde --- kernel/sched/core.c | 2 ++ kernel/sched/deadline.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 198d2dd45f59..f1ebf67b48e2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8571,10 +8571,12 @@ int sched_cpu_dying(unsigned int cpu) sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { WARN(true, "Dying CPU not properly vacated!"); dump_rq_tasks(rq, KERN_WARNING); } + dl_server_stop(&rq->fair_server); rq_unlock_irqrestore(rq, &rf); calc_load_migrate(rq); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 615411a0a881..7b7671060bf9 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1582,6 +1582,9 @@ void dl_server_start(struct sched_dl_entity *dl_se) if (!dl_server(dl_se) || dl_se->dl_server_active) return; + if (WARN_ON_ONCE(!cpu_online(cpu_of(rq)))) + return; + dl_se->dl_server_active = 1; enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) From 17e3e88ed0b6318fde0d1c14df1a804711cab1b5 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 8 Oct 2025 15:12:14 +0200 Subject: [PATCH 197/305] sched/fair: Fix pelt lost idle time detection The check for some lost idle pelt time should be always done when pick_next_task_fair() fails to pick a task and not only when we call it from the fair fast-path. The case happens when the last running task on rq is a RT or DL task. When the latter goes to sleep and the /Sum of util_sum of the rq is at the max value, we don't account the lost of idle time whereas we should. Fixes: 67692435c411 ("sched: Rework pick_next_task() slow-path") Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/fair.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bc0b7ce8a65d..cee1793e8277 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8920,21 +8920,21 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf return p; idle: - if (!rf) - return NULL; + if (rf) { + new_tasks = sched_balance_newidle(rq, rf); - new_tasks = sched_balance_newidle(rq, rf); + /* + * Because sched_balance_newidle() releases (and re-acquires) + * rq->lock, it is possible for any higher priority task to + * appear. In that case we must re-start the pick_next_entity() + * loop. + */ + if (new_tasks < 0) + return RETRY_TASK; - /* - * Because sched_balance_newidle() releases (and re-acquires) rq->lock, it is - * possible for any higher priority task to appear. In that case we - * must re-start the pick_next_entity() loop. - */ - if (new_tasks < 0) - return RETRY_TASK; - - if (new_tasks > 0) - goto again; + if (new_tasks > 0) + goto again; + } /* * rq is about to be idle, check if we need to update the From ae11e08c3d0c78d08dac4cea30bf39ede2130b03 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:15 +0300 Subject: [PATCH 198/305] i2c: Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Acked-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-amd-mp2.h | 1 - drivers/i2c/busses/i2c-at91-core.c | 1 - drivers/i2c/busses/i2c-at91-master.c | 1 - drivers/i2c/busses/i2c-cadence.c | 1 - drivers/i2c/busses/i2c-davinci.c | 2 -- drivers/i2c/busses/i2c-designware-master.c | 1 - drivers/i2c/busses/i2c-hix5hd2.c | 1 - drivers/i2c/busses/i2c-i801.c | 1 - drivers/i2c/busses/i2c-img-scb.c | 3 --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ---- drivers/i2c/busses/i2c-imx.c | 3 --- drivers/i2c/busses/i2c-mv64xxx.c | 1 - drivers/i2c/busses/i2c-nvidia-gpu.c | 1 - drivers/i2c/busses/i2c-omap.c | 3 --- drivers/i2c/busses/i2c-qcom-cci.c | 2 -- drivers/i2c/busses/i2c-qcom-geni.c | 1 - drivers/i2c/busses/i2c-qup.c | 3 --- drivers/i2c/busses/i2c-riic.c | 2 -- drivers/i2c/busses/i2c-rzv2m.c | 1 - drivers/i2c/busses/i2c-sprd.c | 2 -- drivers/i2c/busses/i2c-stm32f7.c | 5 ----- drivers/i2c/busses/i2c-xiic.c | 1 - 22 files changed, 41 deletions(-) diff --git a/drivers/i2c/busses/i2c-amd-mp2.h b/drivers/i2c/busses/i2c-amd-mp2.h index 018a42de8b1e..9b7e9494dd12 100644 --- a/drivers/i2c/busses/i2c-amd-mp2.h +++ b/drivers/i2c/busses/i2c-amd-mp2.h @@ -207,7 +207,6 @@ static inline void amd_mp2_pm_runtime_get(struct amd_mp2_dev *mp2_dev) static inline void amd_mp2_pm_runtime_put(struct amd_mp2_dev *mp2_dev) { - pm_runtime_mark_last_busy(&mp2_dev->pci_dev->dev); pm_runtime_put_autosuspend(&mp2_dev->pci_dev->dev); } diff --git a/drivers/i2c/busses/i2c-at91-core.c b/drivers/i2c/busses/i2c-at91-core.c index edc047e3e535..b64adef778d4 100644 --- a/drivers/i2c/busses/i2c-at91-core.c +++ b/drivers/i2c/busses/i2c-at91-core.c @@ -313,7 +313,6 @@ static int __maybe_unused at91_twi_resume_noirq(struct device *dev) return ret; } - pm_runtime_mark_last_busy(dev); pm_request_autosuspend(dev); at91_init_twi_bus(twi_dev); diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 59795c1c24ff..894cedbca99f 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -717,7 +717,6 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) ret = (ret < 0) ? ret : num; out: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 697d095afbe4..0fb728ade92e 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1128,7 +1128,6 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, cdns_i2c_set_mode(CDNS_I2C_MODE_SLAVE, id); #endif - pm_runtime_mark_last_busy(id->dev); pm_runtime_put_autosuspend(id->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 6a3d4e9e07f4..a773ba082321 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -543,7 +543,6 @@ i2c_davinci_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ret = num; out: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; @@ -821,7 +820,6 @@ static int davinci_i2c_probe(struct platform_device *pdev) if (r) goto err_unuse_clocks; - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index c7a72c28786c..41e9b5ecad20 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -901,7 +901,6 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) i2c_dw_release_lock(dev); done_nolock: - pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c index 5358f5ddf924..95ab910b80c0 100644 --- a/drivers/i2c/busses/i2c-hix5hd2.c +++ b/drivers/i2c/busses/i2c-hix5hd2.c @@ -373,7 +373,6 @@ static int hix5hd2_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_autosuspend(priv->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index cba992fa6557..57fbec1259be 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -930,7 +930,6 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, */ iowrite8(SMBHSTSTS_INUSE_STS | STATUS_FLAGS, SMBHSTSTS(priv)); - pm_runtime_mark_last_busy(&priv->pci_dev->dev); pm_runtime_put_autosuspend(&priv->pci_dev->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index a454f9f25146..88192c25c44c 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -1131,7 +1131,6 @@ static int img_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, break; } - pm_runtime_mark_last_busy(adap->dev.parent); pm_runtime_put_autosuspend(adap->dev.parent); return i2c->msg_status ? i2c->msg_status : num; @@ -1165,7 +1164,6 @@ static int img_i2c_init(struct img_i2c *i2c) "Unknown hardware revision (%d.%d.%d.%d)\n", (rev >> 24) & 0xff, (rev >> 16) & 0xff, (rev >> 8) & 0xff, rev & 0xff); - pm_runtime_mark_last_busy(i2c->adap.dev.parent); pm_runtime_put_autosuspend(i2c->adap.dev.parent); return -EINVAL; } @@ -1317,7 +1315,6 @@ static int img_i2c_init(struct img_i2c *i2c) /* Perform a synchronous sequence to reset the bus */ ret = img_i2c_reset_bus(i2c); - pm_runtime_mark_last_busy(i2c->adap.dev.parent); pm_runtime_put_autosuspend(i2c->adap.dev.parent); return ret; diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 03b5a7e8c361..2a0962a0b441 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -363,7 +363,6 @@ static int lpi2c_imx_master_enable(struct lpi2c_imx_struct *lpi2c_imx) return 0; rpm_put: - pm_runtime_mark_last_busy(lpi2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(lpi2c_imx->adapter.dev.parent); return ret; @@ -377,7 +376,6 @@ static int lpi2c_imx_master_disable(struct lpi2c_imx_struct *lpi2c_imx) temp &= ~MCR_MEN; writel(temp, lpi2c_imx->base + LPI2C_MCR); - pm_runtime_mark_last_busy(lpi2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(lpi2c_imx->adapter.dev.parent); return 0; @@ -1462,7 +1460,6 @@ static int lpi2c_imx_probe(struct platform_device *pdev) if (ret) goto rpm_disable; - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); dev_info(&lpi2c_imx->adapter.dev, "LPI2C adapter registered\n"); @@ -1564,7 +1561,6 @@ static int lpi2c_suspend(struct device *dev) static int lpi2c_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 60f5c790ad7c..dcce882f3eba 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1637,7 +1637,6 @@ static int i2c_imx_xfer(struct i2c_adapter *adapter, result = i2c_imx_xfer_common(adapter, msgs, num, false); - pm_runtime_mark_last_busy(i2c_imx->adapter.dev.parent); pm_runtime_put_autosuspend(i2c_imx->adapter.dev.parent); return result; @@ -1822,7 +1821,6 @@ static int i2c_imx_probe(struct platform_device *pdev) if (ret < 0) goto clk_notifier_unregister; - pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); dev_dbg(&i2c_imx->adapter.dev, "claimed irq %d\n", irq); @@ -1928,7 +1926,6 @@ static int i2c_imx_suspend(struct device *dev) static int i2c_imx_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 8fc26a511320..1acba628e16c 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -766,7 +766,6 @@ mv64xxx_i2c_xfer_core(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) drv_data->num_msgs = 0; drv_data->msgs = NULL; - pm_runtime_mark_last_busy(&adap->dev); pm_runtime_put_autosuspend(&adap->dev); return ret; diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c index 541d808d62d0..14c059b03945 100644 --- a/drivers/i2c/busses/i2c-nvidia-gpu.c +++ b/drivers/i2c/busses/i2c-nvidia-gpu.c @@ -216,7 +216,6 @@ static int gpu_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (status2 < 0) dev_err(i2cd->dev, "i2c stop failed %d\n", status2); } - pm_runtime_mark_last_busy(i2cd->dev); pm_runtime_put_autosuspend(i2cd->dev); return status; } diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 5fcc9f6c33e5..d9f590f0c384 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -828,7 +828,6 @@ omap_i2c_xfer_common(struct i2c_adapter *adap, struct i2c_msg msgs[], int num, omap->set_mpu_wkup_lat(omap->dev, -1); out: - pm_runtime_mark_last_busy(omap->dev); pm_runtime_put_autosuspend(omap->dev); return r; } @@ -1510,7 +1509,6 @@ omap_i2c_probe(struct platform_device *pdev) dev_info(omap->dev, "bus %d rev%d.%d at %d kHz\n", adap->nr, major, minor, omap->speed); - pm_runtime_mark_last_busy(omap->dev); pm_runtime_put_autosuspend(omap->dev); return 0; @@ -1605,7 +1603,6 @@ static int omap_i2c_suspend(struct device *dev) static int omap_i2c_resume(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index a3afa11a71a1..e631d79baf14 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -450,7 +450,6 @@ static int cci_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) ret = num; err: - pm_runtime_mark_last_busy(cci->dev); pm_runtime_put_autosuspend(cci->dev); return ret; @@ -508,7 +507,6 @@ static int __maybe_unused cci_suspend(struct device *dev) static int __maybe_unused cci_resume(struct device *dev) { cci_resume_runtime(dev); - pm_runtime_mark_last_busy(dev); pm_request_autosuspend(dev); return 0; diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 95a577764d5c..43fdd89b8beb 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -714,7 +714,6 @@ static int geni_i2c_xfer(struct i2c_adapter *adap, else ret = geni_i2c_fifo_xfer(gi2c, msgs, num); - pm_runtime_mark_last_busy(gi2c->se.dev); pm_runtime_put_autosuspend(gi2c->se.dev); gi2c->cur = NULL; gi2c->err = 0; diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index fc348924d522..a0e076fc5f36 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1139,7 +1139,6 @@ static int qup_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(qup->dev); pm_runtime_put_autosuspend(qup->dev); return ret; @@ -1624,7 +1623,6 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap, if (ret == 0) ret = num; out: - pm_runtime_mark_last_busy(qup->dev); pm_runtime_put_autosuspend(qup->dev); return ret; @@ -1991,7 +1989,6 @@ static int qup_i2c_suspend(struct device *device) static int qup_i2c_resume(struct device *device) { qup_i2c_pm_resume_runtime(device); - pm_runtime_mark_last_busy(device); pm_request_autosuspend(device); return 0; } diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c index b0ee9ac45a97..3e8f126cb7f7 100644 --- a/drivers/i2c/busses/i2c-riic.c +++ b/drivers/i2c/busses/i2c-riic.c @@ -206,7 +206,6 @@ static int riic_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) } out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return riic->err ?: num; @@ -452,7 +451,6 @@ static int riic_init_hw(struct riic_dev *riic) riic_clear_set_bit(riic, ICCR1_IICRST, 0, RIIC_ICCR1); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return 0; } diff --git a/drivers/i2c/busses/i2c-rzv2m.c b/drivers/i2c/busses/i2c-rzv2m.c index b0e9c0b62429..238714850673 100644 --- a/drivers/i2c/busses/i2c-rzv2m.c +++ b/drivers/i2c/busses/i2c-rzv2m.c @@ -372,7 +372,6 @@ static int rzv2m_i2c_xfer(struct i2c_adapter *adap, ret = num; out: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 26ec34b19ad5..1b490525d8dd 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -302,7 +302,6 @@ static int sprd_i2c_xfer(struct i2c_adapter *i2c_adap, ret = sprd_i2c_handle_msg(i2c_adap, &msgs[im++], 1); err_msg: - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return ret < 0 ? ret : im; @@ -559,7 +558,6 @@ static int sprd_i2c_probe(struct platform_device *pdev) goto err_rpm_put; } - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index e6815f6cae78..dc69ed934ec8 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1761,7 +1761,6 @@ static int stm32f7_i2c_xfer_core(struct i2c_adapter *i2c_adap, } pm_free: - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return (ret < 0) ? ret : num; @@ -1870,7 +1869,6 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, } pm_free: - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; } @@ -1977,7 +1975,6 @@ static int stm32f7_i2c_reg_slave(struct i2c_client *slave) if (!stm32f7_i2c_is_slave_registered(i2c_dev)) stm32f7_i2c_enable_wakeup(i2c_dev, false); - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); return ret; @@ -2015,7 +2012,6 @@ static int stm32f7_i2c_unreg_slave(struct i2c_client *slave) stm32f7_i2c_enable_wakeup(i2c_dev, false); } - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; @@ -2328,7 +2324,6 @@ static int stm32f7_i2c_probe(struct platform_device *pdev) dev_info(i2c_dev->dev, "STM32F7 I2C-%d bus adapter\n", adap->nr); - pm_runtime_mark_last_busy(i2c_dev->dev); pm_runtime_put_autosuspend(i2c_dev->dev); return 0; diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 607026c921d6..28015d77599d 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -1349,7 +1349,6 @@ static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) mutex_unlock(&i2c->lock); out: - pm_runtime_mark_last_busy(i2c->dev); pm_runtime_put_autosuspend(i2c->dev); return err; } From 72f437e674e54f1c143dccc67e5556d8d5acb241 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Oct 2025 23:23:10 +0200 Subject: [PATCH 199/305] i2c: usbio: Add ACPI device-id for MTL-CVF devices Add "INTC10D2" ACPI device-id for MTL-CVF devices, like the Dell Latitude 7450. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2368506 Signed-off-by: Hans de Goede Acked-by: Sakari Ailus Acked-by: Israel Cepeda Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-usbio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-usbio.c b/drivers/i2c/busses/i2c-usbio.c index d42f9ab6e9a5..e7799abf6787 100644 --- a/drivers/i2c/busses/i2c-usbio.c +++ b/drivers/i2c/busses/i2c-usbio.c @@ -27,6 +27,7 @@ static const struct acpi_device_id usbio_i2c_acpi_hids[] = { { "INTC1008" }, /* MTL */ { "INTC10B3" }, /* ARL */ { "INTC10B6" }, /* LNL */ + { "INTC10D2" }, /* MTL-CVF */ { "INTC10E3" }, /* PTL */ { } }; From 867537094124b0736ca2a40193de94fc5dc0b8d3 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Mon, 13 Oct 2025 16:31:18 -0500 Subject: [PATCH 200/305] dt-bindings: i2c: Convert apm,xgene-slimpro-i2c to DT schema Convert APM X-Gene slimpro-i2c binding to DT schema format. It's a straight-forward conversion. Signed-off-by: Rob Herring (Arm) Signed-off-by: Wolfram Sang --- .../bindings/i2c/apm,xgene-slimpro-i2c.yaml | 36 +++++++++++++++++++ .../bindings/i2c/i2c-xgene-slimpro.txt | 15 -------- 2 files changed, 36 insertions(+), 15 deletions(-) create mode 100644 Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt diff --git a/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml b/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml new file mode 100644 index 000000000000..9460c64071f2 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/apm,xgene-slimpro-i2c.yaml @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/apm,xgene-slimpro-i2c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: APM X-Gene SLIMpro Mailbox I2C + +maintainers: + - Khuong Dinh + +description: + An I2C controller accessed over the "SLIMpro" mailbox. + +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + +properties: + compatible: + const: apm,xgene-slimpro-i2c + + mboxes: + maxItems: 1 + +required: + - compatible + - mboxes + +unevaluatedProperties: false + +examples: + - | + i2c { + compatible = "apm,xgene-slimpro-i2c"; + mboxes = <&mailbox 0>; + }; diff --git a/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt b/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt deleted file mode 100644 index f6b2c20cfbf6..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-xgene-slimpro.txt +++ /dev/null @@ -1,15 +0,0 @@ -APM X-Gene SLIMpro Mailbox I2C Driver - -An I2C controller accessed over the "SLIMpro" mailbox. - -Required properties : - - - compatible : should be "apm,xgene-slimpro-i2c" - - mboxes : use the label reference for the mailbox as the first parameter. - The second parameter is the channel number. - -Example : - i2cslimpro { - compatible = "apm,xgene-slimpro-i2c"; - mboxes = <&mailbox 0>; - }; From 4f86eb0a38bc719ba966f155071a6f0594327f34 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Mon, 13 Oct 2025 16:00:39 +0800 Subject: [PATCH 201/305] selftests: net: check jq command is supported The jq command is used in vlan_bridge_binding.sh, if it is not supported, the test will spam the following log. # ./vlan_bridge_binding.sh: line 51: jq: command not found # ./vlan_bridge_binding.sh: line 51: jq: command not found # ./vlan_bridge_binding.sh: line 51: jq: command not found # ./vlan_bridge_binding.sh: line 51: jq: command not found # ./vlan_bridge_binding.sh: line 51: jq: command not found # TEST: Test bridge_binding on->off when lower down [FAIL] # Got operstate of , expected 0 The rtnetlink.sh has the same problem. It makes sense to check if jq is installed before running these tests. After this patch, the vlan_bridge_binding.sh skipped if jq is not supported: # timeout set to 3600 # selftests: net: vlan_bridge_binding.sh # TEST: jq not installed [SKIP] Fixes: dca12e9ab760 ("selftests: net: Add a VLAN bridge binding selftest") Fixes: 6a414fd77f61 ("selftests: rtnetlink: Add an address proto test") Signed-off-by: Wang Liang Reviewed-by: Hangbin Liu Link: https://patch.msgid.link/20251013080039.3035898-1-wangliang74@huawei.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/rtnetlink.sh | 2 ++ tools/testing/selftests/net/vlan_bridge_binding.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index dbf77513f617..163a084d525d 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1466,6 +1466,8 @@ usage: ${0##*/} OPTS EOF } +require_command jq + #check for needed privileges if [ "$(id -u)" -ne 0 ];then end_test "SKIP: Need root privileges" diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh index db481af9b6b3..e8c02c64e03a 100755 --- a/tools/testing/selftests/net/vlan_bridge_binding.sh +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -249,6 +249,8 @@ test_binding_toggle_off_when_upper_down() do_test_binding_off : "on->off when upper down" } +require_command jq + trap defer_scopes_cleanup EXIT setup_prepare tests_run From fd6e385528d8f85993b7bfc6430576136bb14c65 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Tue, 7 Oct 2025 13:57:50 +0200 Subject: [PATCH 202/305] accel/qaic: Fix bootlog initialization ordering As soon as we queue MHI buffers to receive the bootlog from the device, we could be receiving data. Therefore all the resources needed to process that data need to be setup prior to queuing the buffers. We currently initialize some of the resources after queuing the buffers which creates a race between the probe() and any data that comes back from the device. If the uninitialized resources are accessed, we could see page faults. Fix the init ordering to close the race. Fixes: 5f8df5c6def6 ("accel/qaic: Add bootlog debugfs") Signed-off-by: Jeffrey Hugo Signed-off-by: Youssef Samir Reviewed-by: Jeff Hugo Reviewed-by: Carl Vanderlip Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20251007115750.332169-1-youssef.abdulrahman@oss.qualcomm.com --- drivers/accel/qaic/qaic_debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c index a991b8198dc4..8dc4fe5bb560 100644 --- a/drivers/accel/qaic/qaic_debugfs.c +++ b/drivers/accel/qaic/qaic_debugfs.c @@ -218,6 +218,9 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d if (ret) goto destroy_workqueue; + dev_set_drvdata(&mhi_dev->dev, qdev); + qdev->bootlog_ch = mhi_dev; + for (i = 0; i < BOOTLOG_POOL_SIZE; i++) { msg = devm_kzalloc(&qdev->pdev->dev, sizeof(*msg), GFP_KERNEL); if (!msg) { @@ -233,8 +236,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d goto mhi_unprepare; } - dev_set_drvdata(&mhi_dev->dev, qdev); - qdev->bootlog_ch = mhi_dev; return 0; mhi_unprepare: From 11f08c30a3e4157305ba692f1d44cca5fc9a8fca Mon Sep 17 00:00:00 2001 From: Youssef Samir Date: Tue, 7 Oct 2025 14:23:20 +0200 Subject: [PATCH 203/305] accel/qaic: Treat remaining == 0 as error in find_and_map_user_pages() Currently, if find_and_map_user_pages() takes a DMA xfer request from the user with a length field set to 0, or in a rare case, the host receives QAIC_TRANS_DMA_XFER_CONT from the device where resources->xferred_dma_size is equal to the requested transaction size, the function will return 0 before allocating an sgt or setting the fields of the dma_xfer struct. In that case, encode_addr_size_pairs() will try to access the sgt which will lead to a general protection fault. Return an EINVAL in case the user provides a zero-sized ALP, or the device requests continuation after all of the bytes have been transferred. Fixes: 96d3c1cadedb ("accel/qaic: Clean up integer overflow checking in map_user_pages()") Signed-off-by: Youssef Samir Signed-off-by: Youssef Samir Reviewed-by: Jeff Hugo Reviewed-by: Carl Vanderlip Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20251007122320.339654-1-youssef.abdulrahman@oss.qualcomm.com --- drivers/accel/qaic/qaic_control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c index d8bdab69f800..b86a8e48e731 100644 --- a/drivers/accel/qaic/qaic_control.c +++ b/drivers/accel/qaic/qaic_control.c @@ -407,7 +407,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev, return -EINVAL; remaining = in_trans->size - resources->xferred_dma_size; if (remaining == 0) - return 0; + return -EINVAL; if (check_add_overflow(xfer_start_addr, remaining, &end)) return -EINVAL; From 52e59f7740ba23bbb664914967df9a00208ca10c Mon Sep 17 00:00:00 2001 From: Pranjal Ramajor Asha Kanojiya Date: Tue, 7 Oct 2025 08:18:37 +0200 Subject: [PATCH 204/305] accel/qaic: Synchronize access to DBC request queue head & tail pointer Two threads of the same process can potential read and write parallelly to head and tail pointers of the same DBC request queue. This could lead to a race condition and corrupt the DBC request queue. Fixes: ff13be830333 ("accel/qaic: Add datapath") Signed-off-by: Pranjal Ramajor Asha Kanojiya Signed-off-by: Youssef Samir Reviewed-by: Jeff Hugo Reviewed-by: Carl Vanderlip [jhugo: Add fixes tag] Signed-off-by: Jeff Hugo Link: https://lore.kernel.org/r/20251007061837.206132-1-youssef.abdulrahman@oss.qualcomm.com --- drivers/accel/qaic/qaic.h | 2 ++ drivers/accel/qaic/qaic_data.c | 12 ++++++++++-- drivers/accel/qaic/qaic_drv.c | 3 +++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index c31081e42cee..820d133236dd 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -97,6 +97,8 @@ struct dma_bridge_chan { * response queue's head and tail pointer of this DBC. */ void __iomem *dbc_base; + /* Synchronizes access to Request queue's head and tail pointer */ + struct mutex req_lock; /* Head of list where each node is a memory handle queued in request queue */ struct list_head xfer_list; /* Synchronizes DBC readers during cleanup */ diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index 797289e9d780..c4f117edb266 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -1356,13 +1356,17 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr goto release_ch_rcu; } + ret = mutex_lock_interruptible(&dbc->req_lock); + if (ret) + goto release_ch_rcu; + head = readl(dbc->dbc_base + REQHP_OFF); tail = readl(dbc->dbc_base + REQTP_OFF); if (head == U32_MAX || tail == U32_MAX) { /* PCI link error */ ret = -ENODEV; - goto release_ch_rcu; + goto unlock_req_lock; } queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail); @@ -1370,11 +1374,12 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc, head, &tail); if (ret) - goto release_ch_rcu; + goto unlock_req_lock; /* Finalize commit to hardware */ submit_ts = ktime_get_ns(); writel(tail, dbc->dbc_base + REQTP_OFF); + mutex_unlock(&dbc->req_lock); update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts, submit_ts, queue_level); @@ -1382,6 +1387,9 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr if (datapath_polling) schedule_work(&dbc->poll_work); +unlock_req_lock: + if (ret) + mutex_unlock(&dbc->req_lock); release_ch_rcu: srcu_read_unlock(&dbc->ch_lock, rcu_id); unlock_dev_srcu: diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index e31bcb0ecfc9..e162f4b8a262 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -454,6 +454,9 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, return NULL; init_waitqueue_head(&qdev->dbc[i].dbc_release); INIT_LIST_HEAD(&qdev->dbc[i].bo_lists); + ret = drmm_mutex_init(drm, &qdev->dbc[i].req_lock); + if (ret) + return NULL; } return qdev; From 7e091add9c433bab6912228799bf508e2414acc3 Mon Sep 17 00:00:00 2001 From: Martin George Date: Mon, 8 Sep 2025 22:54:57 +0530 Subject: [PATCH 205/305] nvme-auth: update sc_c in host response The sc_c field is currently not updated in the host response to the controller challenge leading to failures while attempting secure channel concatenation. Fix this by adding a new sc_c variable to the dhchap queue context structure which is appropriately set during negotiate and then used in the host response. Fixes: e88a7595b57f ("nvme-tcp: request secure channel concatenation") Signed-off-by: Martin George Signed-off-by: Prashanth Adurthi Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 012fcfc79a73..a01178caf15b 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -36,6 +36,7 @@ struct nvme_dhchap_queue_context { u8 status; u8 dhgroup_id; u8 hash_id; + u8 sc_c; size_t hash_len; u8 c1[64]; u8 c2[64]; @@ -154,6 +155,8 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl, data->auth_protocol[0].dhchap.idlist[34] = NVME_AUTH_DHGROUP_6144; data->auth_protocol[0].dhchap.idlist[35] = NVME_AUTH_DHGROUP_8192; + chap->sc_c = data->sc_c; + return size; } @@ -489,7 +492,7 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl, ret = crypto_shash_update(shash, buf, 2); if (ret) goto out; - memset(buf, 0, sizeof(buf)); + *buf = chap->sc_c; ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; @@ -500,6 +503,7 @@ static int nvme_auth_dhchap_setup_host_response(struct nvme_ctrl *ctrl, strlen(ctrl->opts->host->nqn)); if (ret) goto out; + memset(buf, 0, sizeof(buf)); ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; From df90f6cd29d8c77be6de4f9adf9cbe42ce2f0016 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 14 Oct 2025 10:40:57 +0200 Subject: [PATCH 206/305] slab: fix clearing freelist in free_deferred_objects() defer_free() links pending objects using the slab's freelist offset which is fine as they are not free yet. free_deferred_objects() then clears this pointer to avoid confusing the debugging consistency checks that may be enabled for the cache. However, with CONFIG_SLAB_FREELIST_HARDENED, even the NULL pointer needs to be encoded appropriately using set_freepointer(), otherwise it's decoded as something else and triggers the consistency checks, as found by the kernel test robot. Use set_freepointer() to prevent the issue. Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") Reported-and-tested-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202510101652.7921fdc6-lkp@intel.com Acked-by: Alexei Starovoitov Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- mm/slub.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index b1f15598fbfd..13ae4491136a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6443,15 +6443,16 @@ static void free_deferred_objects(struct irq_work *work) slab = virt_to_slab(x); s = slab->slab_cache; + /* Point 'x' back to the beginning of allocated object */ + x -= s->offset; + /* * We used freepointer in 'x' to link 'x' into df->objects. * Clear it to NULL to avoid false positive detection * of "Freepointer corruption". */ - *(void **)x = NULL; + set_freepointer(s, x, NULL); - /* Point 'x' back to the beginning of allocated object */ - x -= s->offset; __slab_free(s, slab, x, x, 1, _THIS_IP_); } From 7f9ee5fc97e14682e36fe22ae2654c07e4998b82 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Tue, 14 Oct 2025 17:30:37 +0530 Subject: [PATCH 207/305] bpf: test_run: Fix ctx leak in bpf_prog_test_run_xdp error path Fix a memory leak in bpf_prog_test_run_xdp() where the context buffer allocated by bpf_ctx_init() is not freed when the function returns early due to a data size check. On the failing path: ctx = bpf_ctx_init(...); if (kattr->test.data_size_in - meta_sz < ETH_HLEN) return -EINVAL; The early return bypasses the cleanup label that kfree()s ctx, leading to a leak detectable by kmemleak under fuzzing. Change the return to jump to the existing free_ctx label. Fixes: fe9544ed1a2e ("bpf: Support specifying linear xdp packet data size for BPF_PROG_TEST_RUN") Reported-by: BPF Runtime Fuzzer (BRF) Signed-off-by: Shardul Bankar Signed-off-by: Martin KaFai Lau Acked-by: Jiri Olsa Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20251014120037.1981316-1-shardulsb08@gmail.com --- net/bpf/test_run.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfb03ee0bb62..1782e83de2cb 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1269,7 +1269,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, goto free_ctx; if (kattr->test.data_size_in - meta_sz < ETH_HLEN) - return -EINVAL; + goto free_ctx; data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom); if (IS_ERR(data)) { From 2e41e5a91a37202ff6743c3ae5329e106aeb1c6c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:53 -0700 Subject: [PATCH 208/305] cxl/acpi: Fix setup of memory resource in cxl_acpi_set_cache_size() In order to compare the resource against the HMAT memory target, the resource needs to be memory type. Change the DEFINE_RES() macro to DEFINE_RES_MEM() in order to set the correct resource type. hmat_get_extended_linear_cache_size() uses resource_contains() internally. This causes a regression for platforms with the extended linear cache enabled as the comparison always fails and the cache size is not set. User visible impact is that when 'cxl list' is issued, a CXL region with extended linear cache support will only report half the size of the actual size. And this also breaks MCE reporting of the memory region due to incorrect offset calculation for the memory. [dj: Fixup commit log suggested by djbw] [dj: Fixup stable address for cc] Fixes: 12b3d697c812 ("cxl: Remove core/acpi.c and cxl core dependency on ACPI") Cc: stable@vger.kernel.org Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d7a5539d07d4..bd2e282ca93a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -348,7 +348,7 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) struct resource res; int nid, rc; - res = DEFINE_RES(start, size, 0); + res = DEFINE_RES_MEM(start, size); nid = phys_to_target_node(start); rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); From 0f6f1982cb28abf1b8a3a8ba906e2c6ade6a70e8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:54 -0700 Subject: [PATCH 209/305] cxl: Set range param for region_res_match_cxl_range() as const The function takes two parameters and compares them. The second parameter should be const since no modification should be done to it. Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e14c1d305b22..858d4678628d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -839,7 +839,7 @@ static int match_free_decoder(struct device *dev, const void *data) } static bool region_res_match_cxl_range(const struct cxl_region_params *p, - struct range *range) + const struct range *range) { if (!p->res) return false; From f4d027921c811ff7fc16e4d03c6bbbf4347cf37a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:55 -0700 Subject: [PATCH 210/305] cxl: Fix match_region_by_range() to use region_res_match_cxl_range() match_region_by_range() is not using the helper function that also takes extended linear cache size into account when comparing regions. This causes a x2 region to show up as 2 partial incomplete regions rather than a single CXL region with extended linear cache support. Replace the open coded compare logic with the proper helper function for comparison. User visible impact is that when 'cxl list' is issued, no activa CXL region(s) are shown. There may be multiple idle regions present. No actual active CXL region is present in the kernel. [dj: Fix stable address] Fixes: 0ec9849b6333 ("acpi/hmat / cxl: Add extended linear cache support for CXL") Cc: stable@vger.kernel.org Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 858d4678628d..57ed85e332d3 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3398,10 +3398,7 @@ static int match_region_by_range(struct device *dev, const void *data) p = &cxlr->params; guard(rwsem_read)(&cxl_rwsem.region); - if (p->res && p->res->start == r->start && p->res->end == r->end) - return 1; - - return 0; + return region_res_match_cxl_range(p, r); } static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, From 257c4b03a2f7d8c15f79c79b09a561af9734f6c4 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 14 Oct 2025 00:31:04 -0700 Subject: [PATCH 211/305] cxl/region: Use %pa printk format to emit resource_size_t KASAN reports a stack-out-of-bounds access in validate_region_offset() while running the cxl-poison.sh unit test because the printk format specifier, %pr format, is not a match for the resource_size_t type of the variables. %pr expects struct resource pointers and attempts to dereference the structure fields, reading beyond the bounds of the stack variables. Since these messages emit an 'A exceeds B' type of message, keep the resource_size_t's and use the %pa specifier to be architecture safe. BUG: KASAN: stack-out-of-bounds in resource_string.isra.0+0xe9a/0x1690 [] Read of size 8 at addr ffff88800a7afb40 by task bash/1397 ... [] The buggy address belongs to stack of task bash/1397 [] and is located at offset 56 in frame: [] validate_region_offset+0x0/0x1c0 [cxl_core] Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 57ed85e332d3..b06fee1978ba 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3663,14 +3663,14 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset) if (offset < p->cache_size) { dev_err(&cxlr->dev, - "Offset %#llx is within extended linear cache %pr\n", + "Offset %#llx is within extended linear cache %pa\n", offset, &p->cache_size); return -EINVAL; } region_size = resource_size(p->res); if (offset >= region_size) { - dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n", + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n", offset, ®ion_size); return -EINVAL; } From 469276c06affdfd2d9e88c9f228bb81119ec1a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 14 Oct 2025 19:36:02 +0300 Subject: [PATCH 212/305] PCI: Revert early bridge resource set up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit a43ac325c7cb ("PCI: Set up bridge resources earlier") moved bridge window resources set up earlier than before. The change was necessary to support another change that got pulled on the last minute due to breaking s390 and other systems. The presence of valid bridge window resources earlier than before allows pci_assign_unassigned_root_bus_resources() call from pci_host_probe() assign the bridge windows. Some host bridges, however, have to wait first for the link up event before they can enumerate successfully (see e.g. qcom_pcie_global_irq_thread()) and thus the bus has not been enumerated yet while calling pci_host_probe(). Calling pci_assign_unassigned_root_bus_resources() without results from enumeration can result in sizing bridge windows with too small sizes which cannot be later corrected after the enumeration has completed because bridge windows have become pinned in place by the other resources. Interestingly, it seems pci_read_bridge_bases() is not called at all in the problematic case and the bridge window resource type setup is done by pci_bridge_check_ranges() and sizing by the usual resource fitting logic. The root problem behind all this looks pretty generic. If resource fitting is called too early, the hotplug reservation and old size lower bounding cause the bridge windows to be assigned without children but with non-zero size, which leads to these pinning problems. As such, this can likely be solved on the general level but the solution does not look trivial. As the commit a43ac325c7cb ("PCI: Set up bridge resources earlier") was prequisite for other change that did not end up into kernel yet, revert it to resolve the resource assignment failures and give time to code and test a generic solution. Fixes: a43ac325c7cb ("PCI: Set up bridge resources earlier") Reported-by: Val Packett Link: https://lore.kernel.org/r/017ff8df-511c-4da8-b3cf-edf2cb7f1a67@packett.cool Reported-by: Guenter Roeck Link: https://lore.kernel.org/r/df266709-a9b3-4fd8-af3a-c22eb3c9523a@roeck-us.net Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251014163602.17138-1-ilpo.jarvinen@linux.intel.com --- drivers/pci/probe.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c83e75a0ec12..0ce98e18b5a8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -538,14 +538,10 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) } if (io) { bridge->io_window = 1; - pci_read_bridge_io(bridge, - pci_resource_n(bridge, PCI_BRIDGE_IO_WINDOW), - true); + pci_read_bridge_io(bridge, &res, true); } - pci_read_bridge_mmio(bridge, - pci_resource_n(bridge, PCI_BRIDGE_MEM_WINDOW), - true); + pci_read_bridge_mmio(bridge, &res, true); /* * DECchip 21050 pass 2 errata: the bridge may miss an address @@ -583,10 +579,7 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) bridge->pref_64_window = 1; } - pci_read_bridge_mmio_pref(bridge, - pci_resource_n(bridge, - PCI_BRIDGE_PREF_MEM_WINDOW), - true); + pci_read_bridge_mmio_pref(bridge, &res, true); } void pci_read_bridge_bases(struct pci_bus *child) From df5a1f4aeb6ff5e7c5ac47d16a347f03509dd441 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 13 Oct 2025 20:03:47 +0200 Subject: [PATCH 213/305] MAINTAINERS: add myself as maintainer for b53 I wrote the original OpenWrt driver that Florian used as the base for the dsa driver, I might as well take responsibility for it. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20251013180347.133246-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3a27901781c2..8a213950e37e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4804,6 +4804,7 @@ F: drivers/net/ethernet/broadcom/b44.* BROADCOM B53/SF2 ETHERNET SWITCH DRIVER M: Florian Fainelli +M: Jonas Gorski L: netdev@vger.kernel.org L: openwrt-devel@lists.openwrt.org (subscribers-only) S: Supported From a4bbb493a3247ef32f6191fd8b2a0657139f8e08 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 14 Oct 2025 14:38:49 -0700 Subject: [PATCH 214/305] cxl/trace: Subtract to find an hpa_alias0 in cxl_poison events Traces of cxl_poison events include an hpa_alias0 field if the poison address is in a region configured with an ELC, Extended Linear Cache. Since the ELC always comes first in the region, the calculation needs to subtract the ELC size from the calculated HPA address. Fixes: 8c520c5f1e76 ("cxl: Add extended linear cache address alias emission for cxl events") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Signed-off-by: Dave Jiang --- drivers/cxl/core/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a53ec4798b12..a972e4ef1936 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -1068,7 +1068,7 @@ TRACE_EVENT(cxl_poison, __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, __entry->dpa); if (__entry->hpa != ULLONG_MAX && cxlr->params.cache_size) - __entry->hpa_alias0 = __entry->hpa + + __entry->hpa_alias0 = __entry->hpa - cxlr->params.cache_size; else __entry->hpa_alias0 = ULLONG_MAX; From e603a342cf7ecd64ef8f36207dfe1caacb9e2583 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 14 Oct 2025 13:20:37 -0700 Subject: [PATCH 215/305] selftests/bpf: make arg_parsing.c more robust to crashes We started getting a crash in BPF CI, which seems to originate from test_parse_test_list_file() test and is happening at this line: ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); One way we can crash there is if set.cnt zero, which is checked for with ASSERT_EQ() above, but we proceed after this regardless of the outcome. Instead of crashing, we should bail out with test failure early. Similarly, if parse_test_list_file() fails, we shouldn't be even looking at set, so bail even earlier if ASSERT_OK() fails. Fixes: 64276f01dce8 ("selftests/bpf: Test_progs can read test lists from file") Signed-off-by: Andrii Nakryiko Tested-by: Ihor Solodrai Link: https://lore.kernel.org/r/20251014202037.72922-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/arg_parsing.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c index bb143de68875..fbf0d9c2f58b 100644 --- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -146,9 +146,12 @@ static void test_parse_test_list_file(void) init_test_filter_set(&set); - ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"); + if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file")) + goto out_fclose; + + if (!ASSERT_EQ(set.cnt, 4, "test count")) + goto out_free_set; - ASSERT_EQ(set.cnt, 4, "test count"); ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count"); ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name"); @@ -158,8 +161,8 @@ static void test_parse_test_list_file(void) ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name"); ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name"); +out_free_set: free_test_filter_set(&set); - out_fclose: fclose(fp); out_remove: From 7f0fddd817ba6daebea1445ae9fab4b6d2294fa8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 Oct 2025 20:50:52 +0200 Subject: [PATCH 216/305] net: core: fix lockdep splat on device unregister Since blamed commit, unregister_netdevice_many_notify() takes the netdev mutex if the device needs it. If the device list is too long, this will lock more device mutexes than lockdep can handle: unshare -n \ bash -c 'for i in $(seq 1 100);do ip link add foo$i type dummy;done' BUG: MAX_LOCK_DEPTH too low! turning off the locking correctness validator. depth: 48 max: 48! 48 locks held by kworker/u16:1/69: #0: ..148 ((wq_completion)netns){+.+.}-{0:0}, at: process_one_work #1: ..d40 (net_cleanup_work){+.+.}-{0:0}, at: process_one_work #2: ..bd0 (pernet_ops_rwsem){++++}-{4:4}, at: cleanup_net #3: ..aa8 (rtnl_mutex){+.+.}-{4:4}, at: default_device_exit_batch #4: ..cb0 (&dev_instance_lock_key#3){+.+.}-{4:4}, at: unregister_netdevice_many_notify [..] Add a helper to close and then unlock a list of net_devices. Devices that are not up have to be skipped - netif_close_many always removes them from the list without any other actions taken, so they'd remain in locked state. Close devices whenever we've used up half of the tracking slots or we processed entire list without hitting the limit. Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Signed-off-by: Florian Westphal Link: https://patch.msgid.link/20251013185052.14021-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- net/core/dev.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index a64cef2c537e..2acfa44927da 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -12176,6 +12176,35 @@ static void dev_memory_provider_uninstall(struct net_device *dev) } } +/* devices must be UP and netdev_lock()'d */ +static void netif_close_many_and_unlock(struct list_head *close_head) +{ + struct net_device *dev, *tmp; + + netif_close_many(close_head, false); + + /* ... now unlock them */ + list_for_each_entry_safe(dev, tmp, close_head, close_list) { + netdev_unlock(dev); + list_del_init(&dev->close_list); + } +} + +static void netif_close_many_and_unlock_cond(struct list_head *close_head) +{ +#ifdef CONFIG_LOCKDEP + /* We can only track up to MAX_LOCK_DEPTH locks per task. + * + * Reserve half the available slots for additional locks possibly + * taken by notifiers and (soft)irqs. + */ + unsigned int limit = MAX_LOCK_DEPTH / 2; + + if (lockdep_depth(current) > limit) + netif_close_many_and_unlock(close_head); +#endif +} + void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh) { @@ -12208,17 +12237,18 @@ void unregister_netdevice_many_notify(struct list_head *head, /* If device is running, close it first. Start with ops locked... */ list_for_each_entry(dev, head, unreg_list) { + if (!(dev->flags & IFF_UP)) + continue; if (netdev_need_ops_lock(dev)) { list_add_tail(&dev->close_list, &close_head); netdev_lock(dev); } + netif_close_many_and_unlock_cond(&close_head); } - netif_close_many(&close_head, true); - /* ... now unlock them and go over the rest. */ + netif_close_many_and_unlock(&close_head); + /* ... now go over the rest. */ list_for_each_entry(dev, head, unreg_list) { - if (netdev_need_ops_lock(dev)) - netdev_unlock(dev); - else + if (!netdev_need_ops_lock(dev)) list_add_tail(&dev->close_list, &close_head); } netif_close_many(&close_head, true); From 82ebecdc74ff555daf70b811d854b1f32a296bea Mon Sep 17 00:00:00 2001 From: Jaehun Gou Date: Tue, 14 Oct 2025 22:01:46 +0900 Subject: [PATCH 217/305] exfat: fix improper check of dentry.stream.valid_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We found an infinite loop bug in the exFAT file system that can lead to a Denial-of-Service (DoS) condition. When a dentry in an exFAT filesystem is malformed, the following system calls — SYS_openat, SYS_ftruncate, and SYS_pwrite64 — can cause the kernel to hang. Root cause analysis shows that the size validation code in exfat_find() does not check whether dentry.stream.valid_size is negative. As a result, the system calls mentioned above can succeed and eventually trigger the DoS issue. This patch adds a check for negative dentry.stream.valid_size to prevent this vulnerability. Co-developed-by: Seunghun Han Signed-off-by: Seunghun Han Co-developed-by: Jihoon Kwon Signed-off-by: Jihoon Kwon Signed-off-by: Jaehun Gou Signed-off-by: Namjae Jeon --- fs/exfat/namei.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 7eb9c67fd35f..2364b49f050a 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -642,10 +642,14 @@ static int exfat_find(struct inode *dir, const struct qstr *qname, info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); - info->size = le64_to_cpu(ep2->dentry.stream.valid_size); info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size); info->size = le64_to_cpu(ep2->dentry.stream.size); + if (info->valid_size < 0) { + exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size); + return -EIO; + } + if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) { exfat_fs_error(sb, "data size is invalid(%lld)", info->size); return -EIO; From 6f719373b943a955fee6fc2012aed207b65e2854 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 14 Oct 2025 10:46:34 +0200 Subject: [PATCH 218/305] drm/ast: Blank with VGACR17 sync enable, always clear VGACRB6 sync off Blank the display by disabling sync pulses with VGACR17<7>. Unblank by reenabling them. This VGA setting should be supported by all Aspeed hardware. Ast currently blanks via sync-off bits in VGACRB6. Not all BMCs handle VGACRB6 correctly. After disabling sync during a reboot, some BMCs do not reenable it after the soft reset. The display output remains dark. When the display is off during boot, some BMCs set the sync-off bits in VGACRB6, so the display remains dark. Observed with Blackbird AST2500 BMCs. Clearing the sync-off bits unconditionally fixes these issues. Also do not modify VGASR1's SD bit for blanking, as it only disables GPU access to video memory. v2: - init vgacrb6 correctly (Jocelyn) Signed-off-by: Thomas Zimmermann Fixes: ce3d99c83495 ("drm: Call drm_atomic_helper_shutdown() at shutdown time for misc drivers") Tested-by: Nick Bowler Reported-by: Nick Bowler Closes: https://lore.kernel.org/dri-devel/wpwd7rit6t4mnu6kdqbtsnk5bhftgslio6e2jgkz6kgw6cuvvr@xbfswsczfqsi/ Cc: Douglas Anderson Cc: Dave Airlie Cc: Thomas Zimmermann Cc: Jocelyn Falempe Cc: dri-devel@lists.freedesktop.org Cc: # v6.7+ Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20251014084743.18242-1-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_mode.c | 18 ++++++++++-------- drivers/gpu/drm/ast/ast_reg.h | 1 + 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index b4e8edc7c767..30b011ed0a05 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -836,22 +836,24 @@ ast_crtc_helper_atomic_flush(struct drm_crtc *crtc, static void ast_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct ast_device *ast = to_ast_device(crtc->dev); + u8 vgacr17 = 0x00; + u8 vgacrb6 = 0xff; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, 0x00); - ast_set_index_reg_mask(ast, AST_IO_VGASRI, 0x01, 0xdf, 0x00); + vgacr17 |= AST_IO_VGACR17_SYNC_ENABLE; + vgacrb6 &= ~(AST_IO_VGACRB6_VSYNC_OFF | AST_IO_VGACRB6_HSYNC_OFF); + + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x17, 0x7f, vgacr17); + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, vgacrb6); } static void ast_crtc_helper_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); struct ast_device *ast = to_ast_device(crtc->dev); - u8 vgacrb6; + u8 vgacr17 = 0xff; - ast_set_index_reg_mask(ast, AST_IO_VGASRI, 0x01, 0xdf, AST_IO_VGASR1_SD); - - vgacrb6 = AST_IO_VGACRB6_VSYNC_OFF | - AST_IO_VGACRB6_HSYNC_OFF; - ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xb6, 0xfc, vgacrb6); + vgacr17 &= ~AST_IO_VGACR17_SYNC_ENABLE; + ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0x17, 0x7f, vgacr17); /* * HW cursors require the underlying primary plane and CRTC to diff --git a/drivers/gpu/drm/ast/ast_reg.h b/drivers/gpu/drm/ast/ast_reg.h index e15adaf3a80e..30578e3b07e4 100644 --- a/drivers/gpu/drm/ast/ast_reg.h +++ b/drivers/gpu/drm/ast/ast_reg.h @@ -29,6 +29,7 @@ #define AST_IO_VGAGRI (0x4E) #define AST_IO_VGACRI (0x54) +#define AST_IO_VGACR17_SYNC_ENABLE BIT(7) /* called "Hardware reset" in docs */ #define AST_IO_VGACR80_PASSWORD (0xa8) #define AST_IO_VGACR99_VGAMEM_RSRV_MASK GENMASK(1, 0) #define AST_IO_VGACRA1_VGAIO_DISABLED BIT(1) From 2d8636119b92970ba135c3c4da87d24dbfdeb8ca Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Wed, 15 Oct 2025 16:34:54 +0900 Subject: [PATCH 219/305] exfat: fix out-of-bounds in exfat_nls_to_ucs2() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the len argument value passed to exfat_ioctl_set_volume_label() from exfat_nls_to_utf16() is passed 1 too large, an out-of-bounds read occurs when dereferencing p_cstring in exfat_nls_to_ucs2() later. And because of the NLS_NAME_OVERLEN macro, another error occurs when creating a file with a period at the end using utf8 and other iocharsets. So to avoid this, you should remove the code that uses NLS_NAME_OVERLEN macro and make the len argument value be the length of the label string, but with a maximum length of FSLABEL_MAX - 1. Reported-by: syzbot+98cc76a76de46b3714d4@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=98cc76a76de46b3714d4 Fixes: d01579d590f7 ("exfat: Add support for FS_IOC_{GET,SET}FSLABEL") Suggested-by: Pali Rohár Signed-off-by: Jeongjun Park Signed-off-by: Namjae Jeon --- fs/exfat/exfat_fs.h | 1 - fs/exfat/file.c | 7 ++++--- fs/exfat/namei.c | 2 +- fs/exfat/nls.c | 3 --- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 329697c89d09..38210fb6901c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -29,7 +29,6 @@ enum exfat_error_mode { enum { NLS_NAME_NO_LOSSY = 0, /* no lossy */ NLS_NAME_LOSSY = 1 << 0, /* just detected incorrect filename(s) */ - NLS_NAME_OVERLEN = 1 << 1, /* the length is over than its limit */ }; #define EXFAT_HASH_BITS 8 diff --git a/fs/exfat/file.c b/fs/exfat/file.c index f246cf439588..adc37b4d7fc2 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -509,8 +509,8 @@ static int exfat_ioctl_get_volume_label(struct super_block *sb, unsigned long ar static int exfat_ioctl_set_volume_label(struct super_block *sb, unsigned long arg) { - int ret = 0, lossy; - char label[FSLABEL_MAX]; + int ret = 0, lossy, label_len; + char label[FSLABEL_MAX] = {0}; struct exfat_uni_name uniname; if (!capable(CAP_SYS_ADMIN)) @@ -520,8 +520,9 @@ static int exfat_ioctl_set_volume_label(struct super_block *sb, return -EFAULT; memset(&uniname, 0, sizeof(uniname)); + label_len = strnlen(label, FSLABEL_MAX - 1); if (label[0]) { - ret = exfat_nls_to_utf16(sb, label, FSLABEL_MAX, + ret = exfat_nls_to_utf16(sb, label, label_len, &uniname, &lossy); if (ret < 0) return ret; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 2364b49f050a..745dce29ddb5 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -442,7 +442,7 @@ static int __exfat_resolve_path(struct inode *inode, const unsigned char *path, return namelen; /* return error value */ if ((lossy && !lookup) || !namelen) - return (lossy & NLS_NAME_OVERLEN) ? -ENAMETOOLONG : -EINVAL; + return -EINVAL; return 0; } diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 8243d94ceaf4..57db08a5271c 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -616,9 +616,6 @@ static int exfat_nls_to_ucs2(struct super_block *sb, unilen++; } - if (p_cstring[i] != '\0') - lossy |= NLS_NAME_OVERLEN; - *uniname = '\0'; p_uniname->name_len = unilen; p_uniname->name_hash = exfat_calc_chksum16(upname, unilen << 1, 0, From 5fb750e8a9ae123b2034771b864b8a21dbef65cd Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Oct 2025 17:07:00 -0700 Subject: [PATCH 220/305] bpf: Replace bpf_map_kmalloc_node() with kmalloc_nolock() to allocate bpf_async_cb structures. The following kmemleak splat: [ 8.105530] kmemleak: Trying to color unknown object at 0xff11000100e918c0 as Black [ 8.106521] Call Trace: [ 8.106521] [ 8.106521] dump_stack_lvl+0x4b/0x70 [ 8.106521] kvfree_call_rcu+0xcb/0x3b0 [ 8.106521] ? hrtimer_cancel+0x21/0x40 [ 8.106521] bpf_obj_free_fields+0x193/0x200 [ 8.106521] htab_map_update_elem+0x29c/0x410 [ 8.106521] bpf_prog_cfc8cd0f42c04044_overwrite_cb+0x47/0x4b [ 8.106521] bpf_prog_8c30cd7c4db2e963_overwrite_timer+0x65/0x86 [ 8.106521] bpf_prog_test_run_syscall+0xe1/0x2a0 happens due to the combination of features and fixes, but mainly due to commit 6d78b4473cdb ("bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init()") It's using __GFP_HIGH, which instructs slub/kmemleak internals to skip kmemleak_alloc_recursive() on allocation, so subsequent kfree_rcu()-> kvfree_call_rcu()->kmemleak_ignore() complains with the above splat. To fix this imbalance, replace bpf_map_kmalloc_node() with kmalloc_nolock() and kfree_rcu() with call_rcu() + kfree_nolock() to make sure that the objects allocated with kmalloc_nolock() are freed with kfree_nolock() rather than the implicit kfree() that kfree_rcu() uses internally. Note, the kmalloc_nolock() happens under bpf_spin_lock_irqsave(), so it will always fail in PREEMPT_RT. This is not an issue at the moment, since bpf_timers are disabled in PREEMPT_RT. In the future bpf_spin_lock will be replaced with state machine similar to bpf_task_work. Fixes: 6d78b4473cdb ("bpf: Tell memcg to use allow_spinning=false path in bpf_timer_init()") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Reviewed-by: Shakeel Butt Acked-by: Harry Yoo Acked-by: Vlastimil Babka Cc: linux-mm@kvack.org Link: https://lore.kernel.org/bpf/20251015000700.28988-1-alexei.starovoitov@gmail.com --- include/linux/bpf.h | 4 ++++ kernel/bpf/helpers.c | 25 ++++++++++++++----------- kernel/bpf/syscall.c | 15 +++++++++++++++ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a98c83346134..d808253f2e94 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2499,6 +2499,8 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid, #ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); +void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, + int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags); @@ -2511,6 +2513,8 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, */ #define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ kmalloc_node(_size, _flags, _node) +#define bpf_map_kmalloc_nolock(_map, _size, _flags, _node) \ + kmalloc_nolock(_size, _flags, _node) #define bpf_map_kzalloc(_map, _size, _flags) \ kzalloc(_size, _flags) #define bpf_map_kvcalloc(_map, _n, _size, _flags) \ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index c9fab9a356df..8eb117c52817 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1215,13 +1215,20 @@ static void bpf_wq_work(struct work_struct *work) rcu_read_unlock_trace(); } +static void bpf_async_cb_rcu_free(struct rcu_head *rcu) +{ + struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu); + + kfree_nolock(cb); +} + static void bpf_wq_delete_work(struct work_struct *work) { struct bpf_work *w = container_of(work, struct bpf_work, delete_work); cancel_work_sync(&w->work); - kfree_rcu(w, cb.rcu); + call_rcu(&w->cb.rcu, bpf_async_cb_rcu_free); } static void bpf_timer_delete_work(struct work_struct *work) @@ -1230,13 +1237,13 @@ static void bpf_timer_delete_work(struct work_struct *work) /* Cancel the timer and wait for callback to complete if it was running. * If hrtimer_cancel() can be safely called it's safe to call - * kfree_rcu(t) right after for both preallocated and non-preallocated + * call_rcu() right after for both preallocated and non-preallocated * maps. The async->cb = NULL was already done and no code path can see * address 't' anymore. Timer if armed for existing bpf_hrtimer before * bpf_timer_cancel_and_free will have been cancelled. */ hrtimer_cancel(&t->timer); - kfree_rcu(t, cb.rcu); + call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); } static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, @@ -1270,11 +1277,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u goto out; } - /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until - * kmalloc_nolock() is available, avoid locking issues by using - * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM). - */ - cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); + cb = bpf_map_kmalloc_nolock(map, size, 0, map->numa_node); if (!cb) { ret = -ENOMEM; goto out; @@ -1315,7 +1318,7 @@ static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u * or pinned in bpffs. */ WRITE_ONCE(async->cb, NULL); - kfree(cb); + kfree_nolock(cb); ret = -EPERM; } out: @@ -1580,7 +1583,7 @@ void bpf_timer_cancel_and_free(void *val) * timer _before_ calling us, such that failing to cancel it here will * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. * Therefore, we _need_ to cancel any outstanding timers before we do - * kfree_rcu, even though no more timers can be armed. + * call_rcu, even though no more timers can be armed. * * Moreover, we need to schedule work even if timer does not belong to * the calling callback_fn, as on two different CPUs, we can end up in a @@ -1607,7 +1610,7 @@ void bpf_timer_cancel_and_free(void *val) * completion. */ if (hrtimer_try_to_cancel(&t->timer) >= 0) - kfree_rcu(t, cb.rcu); + call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); else queue_work(system_dfl_wq, &t->cb.delete_work); } else { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2a9456a3e730..8a129746bd6c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -520,6 +520,21 @@ void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, return ptr; } +void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, + int node) +{ + struct mem_cgroup *memcg, *old_memcg; + void *ptr; + + memcg = bpf_map_get_memcg(map); + old_memcg = set_active_memcg(memcg); + ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); + set_active_memcg(old_memcg); + mem_cgroup_put(memcg); + + return ptr; +} + void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) { struct mem_cgroup *memcg, *old_memcg; From 6fced056d2cc8d01b326e6fcfabaacb9850b71a4 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Sun, 12 Oct 2025 00:47:59 +0800 Subject: [PATCH 221/305] smb/server: fix possible memory leak in smb2_read() Memory leak occurs when ksmbd_vfs_read() fails. Fix this by adding the missing kvfree(). Co-developed-by: ChenXiaoSong Signed-off-by: ChenXiaoSong Signed-off-by: ZhangGuoDong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index ab1d45fcebde..e81e615f322a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6824,6 +6824,7 @@ int smb2_read(struct ksmbd_work *work) nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf); if (nbytes < 0) { + kvfree(aux_payload_buf); err = nbytes; goto out; } From 379510a815cb2e64eb0a379cb62295d6ade65df0 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Sun, 12 Oct 2025 00:51:36 +0800 Subject: [PATCH 222/305] smb/server: fix possible refcount leak in smb2_sess_setup() Reference count of ksmbd_session will leak when session need reconnect. Fix this by adding the missing ksmbd_user_session_put(). Co-developed-by: ChenXiaoSong Signed-off-by: ChenXiaoSong Signed-off-by: ZhangGuoDong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index e81e615f322a..b731d9b09408 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1806,6 +1806,7 @@ int smb2_sess_setup(struct ksmbd_work *work) if (ksmbd_conn_need_reconnect(conn)) { rc = -EFAULT; + ksmbd_user_session_put(sess); sess = NULL; goto out_err; } From 88f170814fea74911ceab798a43cbd7c5599bed4 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Wed, 15 Oct 2025 09:25:46 +0200 Subject: [PATCH 223/305] ksmbd: fix recursive locking in RPC handle list access Since commit 305853cce3794 ("ksmbd: Fix race condition in RPC handle list access"), ksmbd_session_rpc_method() attempts to lock sess->rpc_lock. This causes hung connections / tasks when a client attempts to open a named pipe. Using Samba's rpcclient tool: $ rpcclient //192.168.1.254 -U user%password $ rpcclient $> srvinfo Kernel side: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/0:0 state:D stack:0 pid:5021 tgid:5021 ppid:2 flags:0x00200000 Workqueue: ksmbd-io handle_ksmbd_work Call trace: __schedule from schedule+0x3c/0x58 schedule from schedule_preempt_disabled+0xc/0x10 schedule_preempt_disabled from rwsem_down_read_slowpath+0x1b0/0x1d8 rwsem_down_read_slowpath from down_read+0x28/0x30 down_read from ksmbd_session_rpc_method+0x18/0x3c ksmbd_session_rpc_method from ksmbd_rpc_open+0x34/0x68 ksmbd_rpc_open from ksmbd_session_rpc_open+0x194/0x228 ksmbd_session_rpc_open from create_smb2_pipe+0x8c/0x2c8 create_smb2_pipe from smb2_open+0x10c/0x27ac smb2_open from handle_ksmbd_work+0x238/0x3dc handle_ksmbd_work from process_scheduled_works+0x160/0x25c process_scheduled_works from worker_thread+0x16c/0x1e8 worker_thread from kthread+0xa8/0xb8 kthread from ret_from_fork+0x14/0x38 Exception stack(0x8529ffb0 to 0x8529fff8) The task deadlocks because the lock is already held: ksmbd_session_rpc_open down_write(&sess->rpc_lock) ksmbd_rpc_open ksmbd_session_rpc_method down_read(&sess->rpc_lock) <-- deadlock Adjust ksmbd_session_rpc_method() callers to take the lock when necessary. Fixes: 305853cce3794 ("ksmbd: Fix race condition in RPC handle list access") Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/user_session.c | 7 ++----- fs/smb/server/smb2pdu.c | 9 ++++++++- fs/smb/server/transport_ipc.c | 12 ++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 6fa025374f2f..1c181ef99929 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -147,14 +147,11 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id) int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id) { struct ksmbd_session_rpc *entry; - int method; - down_read(&sess->rpc_lock); + lockdep_assert_held(&sess->rpc_lock); entry = xa_load(&sess->rpc_handle_list, id); - method = entry ? entry->method : 0; - up_read(&sess->rpc_lock); - return method; + return entry ? entry->method : 0; } void ksmbd_session_destroy(struct ksmbd_session *sess) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index b731d9b09408..f901ae18e68a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4626,8 +4626,15 @@ static int smb2_get_info_file_pipe(struct ksmbd_session *sess, * pipe without opening it, checking error condition here */ id = req->VolatileFileId; - if (!ksmbd_session_rpc_method(sess, id)) + + lockdep_assert_not_held(&sess->rpc_lock); + + down_read(&sess->rpc_lock); + if (!ksmbd_session_rpc_method(sess, id)) { + up_read(&sess->rpc_lock); return -ENOENT; + } + up_read(&sess->rpc_lock); ksmbd_debug(SMB, "FileInfoClass %u, FileId 0x%llx\n", req->FileInfoClass, req->VolatileFileId); diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 2aa1b29bea08..46f87fd1ce1c 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -825,6 +825,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle if (!msg) return NULL; + lockdep_assert_not_held(&sess->rpc_lock); + + down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; @@ -833,6 +836,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle req->flags |= KSMBD_RPC_WRITE_METHOD; req->payload_sz = payload_sz; memcpy(req->payload, payload, payload_sz); + up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); @@ -849,6 +853,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle) if (!msg) return NULL; + lockdep_assert_not_held(&sess->rpc_lock); + + down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; @@ -856,6 +863,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle) req->flags |= rpc_context_flags(sess); req->flags |= KSMBD_RPC_READ_METHOD; req->payload_sz = 0; + up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); @@ -876,6 +884,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle if (!msg) return NULL; + lockdep_assert_not_held(&sess->rpc_lock); + + down_read(&sess->rpc_lock); msg->type = KSMBD_EVENT_RPC_REQUEST; req = (struct ksmbd_rpc_command *)msg->payload; req->handle = handle; @@ -884,6 +895,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle req->flags |= KSMBD_RPC_IOCTL_METHOD; req->payload_sz = payload_sz; memcpy(req->payload, payload, payload_sz); + up_read(&sess->rpc_lock); resp = ipc_msg_send_request(msg, req->handle); ipc_msg_free(msg); From b0432201a11b3caaeca6c03f2b3e399275b2e489 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Sun, 12 Oct 2025 21:10:30 +0200 Subject: [PATCH 224/305] smb: client: let destroy_mr_list() keep smbdirect_mr_io memory if registered If a smbdirect_mr_io structure if still visible to callers of smbd_register_mr() we can't free the related memory when the connection is disconnected! Otherwise smbd_deregister_mr() will crash. Now we use a mutex and refcounting in order to keep the memory around if the connection is disconnected. It means smbd_deregister_mr() can be called at any later time to free the memory, which is no longer referenced by nor referencing the connection. It also means smbd_destroy() no longer needs to wait for mr_io.used.count to become 0. Fixes: 050b8c374019 ("smbd: Make upper layer decide when to destroy the transport") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 146 +++++++++++++++++++++++++++++++++----- 1 file changed, 127 insertions(+), 19 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index c3330e43488f..77de85d7cdc3 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1624,19 +1624,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "free receive buffers\n"); destroy_receive_buffers(sc); - /* - * For performance reasons, memory registration and deregistration - * are not locked by srv_mutex. It is possible some processes are - * blocked on transport srv_mutex while holding memory registration. - * Release the transport srv_mutex to allow them to hit the failure - * path when sending data, and then release memory registrations. - */ log_rdma_event(INFO, "freeing mr list\n"); - while (atomic_read(&sc->mr_io.used.count)) { - cifs_server_unlock(server); - msleep(1000); - cifs_server_lock(server); - } destroy_mr_list(sc); ib_free_cq(sc->ib.send_cq); @@ -2352,6 +2340,46 @@ static void smbd_mr_recovery_work(struct work_struct *work) } } +static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr) +{ + struct smbdirect_socket *sc = mr->socket; + + lockdep_assert_held(&mr->mutex); + + if (mr->state == SMBDIRECT_MR_DISABLED) + return; + + if (mr->mr) + ib_dereg_mr(mr->mr); + if (mr->sgt.nents) + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); + kfree(mr->sgt.sgl); + + mr->mr = NULL; + mr->sgt.sgl = NULL; + mr->sgt.nents = 0; + + mr->state = SMBDIRECT_MR_DISABLED; +} + +static void smbd_mr_free_locked(struct kref *kref) +{ + struct smbdirect_mr_io *mr = + container_of(kref, struct smbdirect_mr_io, kref); + + lockdep_assert_held(&mr->mutex); + + /* + * smbd_mr_disable_locked() should already be called! + */ + if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED)) + smbd_mr_disable_locked(mr); + + mutex_unlock(&mr->mutex); + mutex_destroy(&mr->mutex); + kfree(mr); +} + static void destroy_mr_list(struct smbdirect_socket *sc) { struct smbdirect_mr_io *mr, *tmp; @@ -2365,13 +2393,31 @@ static void destroy_mr_list(struct smbdirect_socket *sc) spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); list_for_each_entry_safe(mr, tmp, &all_list, list) { - if (mr->mr) - ib_dereg_mr(mr->mr); - if (mr->sgt.nents) - ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); - kfree(mr->sgt.sgl); + mutex_lock(&mr->mutex); + + smbd_mr_disable_locked(mr); list_del(&mr->list); - kfree(mr); + mr->socket = NULL; + + /* + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock. + * + * If the mr is still registered it will + * be dangling (detached from the connection + * waiting for smbd_deregister_mr() to be + * called in order to free the memory. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); } } @@ -2402,6 +2448,9 @@ static int allocate_mr_list(struct smbdirect_socket *sc) goto kzalloc_mr_failed; } + kref_init(&mr->kref); + mutex_init(&mr->mutex); + mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type, sp->max_frmr_depth); @@ -2434,6 +2483,7 @@ static int allocate_mr_list(struct smbdirect_socket *sc) kcalloc_sgl_failed: ib_dereg_mr(mr->mr); ib_alloc_mr_failed: + mutex_destroy(&mr->mutex); kfree(mr); kzalloc_mr_failed: destroy_mr_list(sc); @@ -2471,6 +2521,7 @@ static struct smbdirect_mr_io *get_mr(struct smbdirect_socket *sc) list_for_each_entry(ret, &sc->mr_io.all.list, list) { if (ret->state == SMBDIRECT_MR_READY) { ret->state = SMBDIRECT_MR_REGISTERED; + kref_get(&ret->kref); spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); atomic_dec(&sc->mr_io.ready.count); atomic_inc(&sc->mr_io.used.count); @@ -2535,6 +2586,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, return NULL; } + mutex_lock(&mr->mutex); + mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; mr->need_invalidate = need_invalidate; mr->sgt.nents = 0; @@ -2578,8 +2631,16 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, * on the next ib_post_send when we actually send I/O to remote peer */ rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); - if (!rc) + if (!rc) { + /* + * get_mr() gave us a reference + * via kref_get(&mr->kref), we keep that and let + * the caller use smbd_deregister_mr() + * to remove it again. + */ + mutex_unlock(&mr->mutex); return mr; + } log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", rc, reg_wr->key); @@ -2596,6 +2657,25 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, smbd_disconnect_rdma_connection(sc); + /* + * get_mr() gave us a reference + * via kref_get(&mr->kref), we need to remove it again + * on error. + * + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); + return NULL; } @@ -2624,6 +2704,15 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) { struct smbdirect_socket *sc = mr->socket; + mutex_lock(&mr->mutex); + if (mr->state == SMBDIRECT_MR_DISABLED) + goto put_kref; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + smbd_mr_disable_locked(mr); + goto put_kref; + } + if (mr->need_invalidate) { struct ib_send_wr *wr = &mr->inv_wr; int rc; @@ -2640,6 +2729,7 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); + smbd_mr_disable_locked(mr); smbd_disconnect_rdma_connection(sc); goto done; } @@ -2671,6 +2761,24 @@ void smbd_deregister_mr(struct smbdirect_mr_io *mr) done: if (atomic_dec_and_test(&sc->mr_io.used.count)) wake_up(&sc->mr_io.cleanup.wait_queue); + +put_kref: + /* + * No kref_put_mutex() as it's already locked. + * + * If smbd_mr_free_locked() is called + * and the mutex is unlocked and mr is gone, + * in that case kref_put() returned 1. + * + * If kref_put() returned 0 we know that + * smbd_mr_free_locked() didn't + * run. Not by us nor by anyone else, as we + * still hold the mutex, so we need to unlock + * and keep the mr in SMBDIRECT_MR_READY or + * SMBDIRECT_MR_ERROR state. + */ + if (!kref_put(&mr->kref, smbd_mr_free_locked)) + mutex_unlock(&mr->mutex); } static bool smb_set_sge(struct smb_extract_to_rdma *rdma, From d877470b59910b5c50383d634dda3782386bba51 Mon Sep 17 00:00:00 2001 From: ZhangGuoDong Date: Mon, 13 Oct 2025 00:17:30 +0800 Subject: [PATCH 225/305] smb: move some duplicate definitions to common/cifsglob.h In order to maintain the code more easily, move duplicate definitions to new common header file. Co-developed-by: ChenXiaoSong Signed-off-by: ChenXiaoSong Signed-off-by: ZhangGuoDong Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 19 +------------------ fs/smb/common/cifsglob.h | 30 ++++++++++++++++++++++++++++++ fs/smb/server/smb_common.h | 14 +------------- 3 files changed, 32 insertions(+), 31 deletions(-) create mode 100644 fs/smb/common/cifsglob.h diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 8f6f567d7474..c5034cf9ac9e 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -24,6 +24,7 @@ #include "cifsacl.h" #include #include +#include "../common/cifsglob.h" #include "../common/smb2pdu.h" #include "smb2pdu.h" #include @@ -702,12 +703,6 @@ get_rfc1002_length(void *buf) return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } -static inline void -inc_rfc1001_len(void *buf, int count) -{ - be32_add_cpu((__be32 *)buf, count); -} - struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; @@ -1021,8 +1016,6 @@ compare_mid(__u16 mid, const struct smb_hdr *smb) #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) -#define CIFS_DEFAULT_IOSIZE (1024 * 1024) - /* * Windows only supports a max of 60kb reads and 65535 byte writes. Default to * those values when posix extensions aren't in force. In actuality here, we @@ -2148,30 +2141,20 @@ extern mempool_t cifs_io_request_pool; extern mempool_t cifs_io_subrequest_pool; /* Operations for different SMB versions */ -#define SMB1_VERSION_STRING "1.0" -#define SMB20_VERSION_STRING "2.0" #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; #endif /* CIFS_ALLOW_INSECURE_LEGACY */ -#define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; -#define SMBDEFAULT_VERSION_STRING "default" extern struct smb_version_values smbdefault_values; -#define SMB3ANY_VERSION_STRING "3" extern struct smb_version_values smb3any_values; -#define SMB30_VERSION_STRING "3.0" extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; -#define SMB302_VERSION_STRING "3.02" -#define ALT_SMB302_VERSION_STRING "3.0.2" /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ extern struct smb_version_values smb302_values; -#define SMB311_VERSION_STRING "3.1.1" -#define ALT_SMB311_VERSION_STRING "3.11" extern struct smb_version_operations smb311_operations; extern struct smb_version_values smb311_values; diff --git a/fs/smb/common/cifsglob.h b/fs/smb/common/cifsglob.h new file mode 100644 index 000000000000..00fd215e3eb5 --- /dev/null +++ b/fs/smb/common/cifsglob.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * + * Copyright (C) International Business Machines Corp., 2002,2008 + * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) + * + */ +#ifndef _COMMON_CIFS_GLOB_H +#define _COMMON_CIFS_GLOB_H + +static inline void inc_rfc1001_len(void *buf, int count) +{ + be32_add_cpu((__be32 *)buf, count); +} + +#define SMB1_VERSION_STRING "1.0" +#define SMB20_VERSION_STRING "2.0" +#define SMB21_VERSION_STRING "2.1" +#define SMBDEFAULT_VERSION_STRING "default" +#define SMB3ANY_VERSION_STRING "3" +#define SMB30_VERSION_STRING "3.0" +#define SMB302_VERSION_STRING "3.02" +#define ALT_SMB302_VERSION_STRING "3.0.2" +#define SMB311_VERSION_STRING "3.1.1" +#define ALT_SMB311_VERSION_STRING "3.11" + +#define CIFS_DEFAULT_IOSIZE (1024 * 1024) + +#endif /* _COMMON_CIFS_GLOB_H */ diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index d742ba754348..863716207a0d 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -10,6 +10,7 @@ #include "glob.h" #include "nterr.h" +#include "../common/cifsglob.h" #include "../common/smb2pdu.h" #include "smb2pdu.h" @@ -26,16 +27,8 @@ #define SMB311_PROT 6 #define BAD_PROT 0xFFFF -#define SMB1_VERSION_STRING "1.0" -#define SMB20_VERSION_STRING "2.0" -#define SMB21_VERSION_STRING "2.1" -#define SMB30_VERSION_STRING "3.0" -#define SMB302_VERSION_STRING "3.02" -#define SMB311_VERSION_STRING "3.1.1" - #define SMB_ECHO_INTERVAL (60 * HZ) -#define CIFS_DEFAULT_IOSIZE (64 * 1024) #define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */ #define MAX_STREAM_PROT_LEN 0x00FFFFFF @@ -464,9 +457,4 @@ static inline unsigned int get_rfc1002_len(void *buf) { return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } - -static inline void inc_rfc1001_len(void *buf, int count) -{ - be32_add_cpu((__be32 *)buf, count); -} #endif /* __SMB_COMMON_H__ */ From dc96cefef0d3032c69e46a21b345c60e56b18934 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 15 Oct 2025 09:48:27 +0800 Subject: [PATCH 226/305] blk-mq: fix stale tag depth for shared sched tags in blk_mq_update_nr_requests() Commit 7f2799c546db ("blk-mq: cleanup shared tags case in blk_mq_update_nr_requests()") moves blk_mq_tag_update_sched_shared_tags() before q->nr_requests is updated, however, it's still using the old q->nr_requests to resize tag depth. Fix this problem by passing in expected new tag depth. Fixes: 7f2799c546db ("blk-mq: cleanup shared tags case in blk_mq_update_nr_requests()") Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Reviewed-by: Nilay Shroff Reported-by: Chris Mason Link: https://lore.kernel.org/linux-block/20251014130507.4187235-2-clm@meta.com/ Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 2 +- block/blk-mq-tag.c | 5 +++-- block/blk-mq.c | 2 +- block/blk-mq.h | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d06bb137a743..e0bed16485c3 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -557,7 +557,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e, if (blk_mq_is_shared_tags(flags)) { /* Shared tags are stored at index 0 in @et->tags. */ q->sched_shared_tags = et->tags[0]; - blk_mq_tag_update_sched_shared_tags(q); + blk_mq_tag_update_sched_shared_tags(q, et->nr_requests); } queue_for_each_hw_ctx(q, hctx, i) { diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c7a4d4b9cc87..5b664dbdf655 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -622,10 +622,11 @@ void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags); } -void blk_mq_tag_update_sched_shared_tags(struct request_queue *q) +void blk_mq_tag_update_sched_shared_tags(struct request_queue *q, + unsigned int nr) { sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags, - q->nr_requests - q->tag_set->reserved_tags); + nr - q->tag_set->reserved_tags); } /** diff --git a/block/blk-mq.c b/block/blk-mq.c index 09f579414161..d626d32f6e57 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4941,7 +4941,7 @@ struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q, * tags can't grow, see blk_mq_alloc_sched_tags(). */ if (q->elevator) - blk_mq_tag_update_sched_shared_tags(q); + blk_mq_tag_update_sched_shared_tags(q, nr); else blk_mq_tag_resize_shared_tags(set, nr); } else if (!q->elevator) { diff --git a/block/blk-mq.h b/block/blk-mq.h index af42dc018808..c4fccdeb5441 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -186,7 +186,8 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size); -void blk_mq_tag_update_sched_shared_tags(struct request_queue *q); +void blk_mq_tag_update_sched_shared_tags(struct request_queue *q, + unsigned int nr); void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, From 08823e89e3e269bf4c4a20b4c24a8119920cc7a4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 15 Oct 2025 18:30:39 +0800 Subject: [PATCH 227/305] block: Remove elevator_lock usage from blkg_conf frozen operations Remove the acquisition and release of q->elevator_lock in the blkg_conf_open_bdev_frozen() and blkg_conf_exit_frozen() functions. The elevator lock is no longer needed in these code paths since commit 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()") which introduces `disk->rqos_state_mutex` for protecting wbt state change, and not necessary to abuse elevator_lock for this purpose. This change helps to solve the lockdep warning reported from Yu Kuai[1]. Pass blktests/throtl with lockdep enabled. Links: https://lore.kernel.org/linux-block/e5e7ac3f-2063-473a-aafb-4d8d43e5576e@yukuai.org.cn/ [1] Fixes: commit 78c271344b6f ("block: move wbt_enable_default() out of queue freezing from sched ->exit()") Signed-off-by: Ming Lei Reviewed-by: Nilay Shroff Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f93de34fe87d..3cffb68ba5d8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -812,8 +812,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) } /* * Similar to blkg_conf_open_bdev, but additionally freezes the queue, - * acquires q->elevator_lock, and ensures the correct locking order - * between q->elevator_lock and q->rq_qos_mutex. + * ensures the correct locking order between freeze queue and q->rq_qos_mutex. * * This function returns negative error on failure. On success it returns * memflags which must be saved and later passed to blkg_conf_exit_frozen @@ -834,13 +833,11 @@ unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx) * At this point, we haven’t started protecting anything related to QoS, * so we release q->rq_qos_mutex here, which was first acquired in blkg_ * conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing - * the queue and acquiring q->elevator_lock to maintain the correct - * locking order. + * the queue to maintain the correct locking order. */ mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex); memflags = blk_mq_freeze_queue(ctx->bdev->bd_queue); - mutex_lock(&ctx->bdev->bd_queue->elevator_lock); mutex_lock(&ctx->bdev->bd_queue->rq_qos_mutex); return memflags; @@ -995,9 +992,8 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx) EXPORT_SYMBOL_GPL(blkg_conf_exit); /* - * Similar to blkg_conf_exit, but also unfreezes the queue and releases - * q->elevator_lock. Should be used when blkg_conf_open_bdev_frozen - * is used to open the bdev. + * Similar to blkg_conf_exit, but also unfreezes the queue. Should be used + * when blkg_conf_open_bdev_frozen is used to open the bdev. */ void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags) { @@ -1005,7 +1001,6 @@ void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags) struct request_queue *q = ctx->bdev->bd_queue; blkg_conf_exit(ctx); - mutex_unlock(&q->elevator_lock); blk_mq_unfreeze_queue(q, memflags); } } From be7cab44ed099566c605a8dac686c3254db01b35 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Oct 2025 13:07:23 +0100 Subject: [PATCH 228/305] io_uring: protect mem region deregistration io_create_region_mmap_safe() protects publishing of a region against concurrent mmap calls, however we should also protect against it when removing a region. There is a gap io_register_mem_region() where it safely publishes a region, but then copy_to_user goes wrong and it unsafely frees the region. Cc: stable@vger.kernel.org Fixes: 087f997870a94 ("io_uring/memmap: implement mmap for regions") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/register.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/register.c b/io_uring/register.c index 43f04c47522c..58d43d624856 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -613,6 +613,7 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg) if (ret) return ret; if (copy_to_user(rd_uptr, &rd, sizeof(rd))) { + guard(mutex)(&ctx->mmap_lock); io_free_region(ctx, &ctx->param_region); return -EFAULT; } From 437c23357d897f5b5b7d297c477da44b56654d46 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Oct 2025 13:10:31 +0100 Subject: [PATCH 229/305] io_uring: fix unexpected placement on same size resizing There might be many reasons why a user is resizing a ring, e.g. moving to huge pages or for some memory compaction using IORING_SETUP_NO_MMAP. Don't bypass resizing, the user will definitely be surprised seeing 0 while the rings weren't actually moved to a new place. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/register.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/io_uring/register.c b/io_uring/register.c index 58d43d624856..2e4717f1357c 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -421,13 +421,6 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg) if (unlikely(ret)) return ret; - /* nothing to do, but copy params back */ - if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) { - if (copy_to_user(arg, &p, sizeof(p))) - return -EFAULT; - return 0; - } - size = rings_size(p.flags, p.sq_entries, p.cq_entries, &sq_array_offset); if (size == SIZE_MAX) From 95355766e5871e9cdc574be5a3b115392ad33aea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Mon, 22 Sep 2025 13:27:25 +0300 Subject: [PATCH 230/305] drm/i915/psr: Deactivate PSR only on LNL and when selective fetch enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using intel_psr_exit in frontbuffer flush on older platforms seems to be causing problems. Sending single full frame update using intel_psr_force_update is anyways more optimal compared to psr deactivate/activate -> move back to this approach on PSR1, PSR HW tracking and Panel Replay full frame update and use deactivate/activate only on LunarLake and only when selective fetch is enabled. Tested-by: Lemen Tested-by: Koos Vriezen Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14946 Signed-off-by: Jouni Högander Reviewed-by: Mika Kahola Link: https://lore.kernel.org/r/20250922102725.2752742-1-jouni.hogander@intel.com (cherry picked from commit 924adb0bbdd8fef25fd229c76e3f602c3e8752ee) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_psr.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 01bf304c705f..10eb93a34cf2 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -3402,6 +3402,7 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); if (DISPLAY_VER(display) < 20 && intel_dp->psr.psr2_sel_fetch_enabled) { + /* Selective fetch prior LNL */ if (intel_dp->psr.psr2_sel_fetch_cff_enabled) { /* can we turn CFF off? */ if (intel_dp->psr.busy_frontbuffer_bits == 0) @@ -3420,12 +3421,19 @@ static void _psr_flush_handle(struct intel_dp *intel_dp) intel_psr_configure_full_frame_update(intel_dp); intel_psr_force_update(intel_dp); + } else if (!intel_dp->psr.psr2_sel_fetch_enabled) { + /* + * PSR1 on all platforms + * PSR2 HW tracking + * Panel Replay Full frame update + */ + intel_psr_force_update(intel_dp); } else { + /* Selective update LNL onwards */ intel_psr_exit(intel_dp); } - if ((!intel_dp->psr.psr2_sel_fetch_enabled || DISPLAY_VER(display) >= 20) && - !intel_dp->psr.busy_frontbuffer_bits) + if (!intel_dp->psr.active && !intel_dp->psr.busy_frontbuffer_bits) queue_work(display->wq.unordered, &intel_dp->psr.work); } From 0f5878834d6ce97426219b64c02a2c4081419d53 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 13 Oct 2025 02:14:22 +0200 Subject: [PATCH 231/305] rust: bitmap: clean Rust 1.92.0 `unused_unsafe` warning Starting with Rust 1.92.0 (expected 2025-12-11), Rust allows to safely take the address of a union field [1][2]: CLIPPY L rust/kernel.o error: unnecessary `unsafe` block --> rust/kernel/bitmap.rs:169:13 | 169 | unsafe { core::ptr::addr_of!(self.repr.bitmap) } | ^^^^^^ unnecessary `unsafe` block | = note: `-D unused-unsafe` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(unused_unsafe)]` error: unnecessary `unsafe` block --> rust/kernel/bitmap.rs:185:13 | 185 | unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) } | ^^^^^^ unnecessary `unsafe` block Thus allow both instances to clean the warning in newer compilers. Link: https://github.com/rust-lang/rust/issues/141264 [1] Link: https://github.com/rust-lang/rust/pull/141469 [2] Signed-off-by: Miguel Ojeda Reviewed-by: Alice Ryhl Signed-off-by: Yury Norov (NVIDIA) --- rust/kernel/bitmap.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/kernel/bitmap.rs b/rust/kernel/bitmap.rs index f45915694454..711b8368b38f 100644 --- a/rust/kernel/bitmap.rs +++ b/rust/kernel/bitmap.rs @@ -166,6 +166,7 @@ impl core::ops::Deref for BitmapVec { fn deref(&self) -> &Bitmap { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. + #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] unsafe { core::ptr::addr_of!(self.repr.bitmap) } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. @@ -182,6 +183,7 @@ impl core::ops::DerefMut for BitmapVec { fn deref_mut(&mut self) -> &mut Bitmap { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. + #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. From 7e85ac9da1acc591bd5269f2b890ed1994c42e96 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Mon, 13 Oct 2025 09:34:04 +0200 Subject: [PATCH 232/305] PM / devfreq: rockchip-dfi: switch to FIELD_PREP_WM16 macro The era of hand-rolled HIWORD_UPDATE macros is over, at least for those drivers that use constant masks. Like many other Rockchip drivers, rockchip-dfi brings with it its own HIWORD_UPDATE macro. This variant doesn't shift the value (and like the others, doesn't do any checking). Remove it, and replace instances of it with hw_bitfield.h's FIELD_PREP_WM16. Since FIELD_PREP_WM16 requires contiguous masks and shifts the value for us, some reshuffling of definitions needs to happen. This gives us better compile-time error checking, and in my opinion, nicer code. Tested on an RK3568 ODROID-M1 board (LPDDR4X at 1560 MHz, an RK3588 Radxa ROCK 5B board (LPDDR4X at 2112 MHz) and an RK3588 Radxa ROCK 5T board (LPDDR5 at 2400 MHz). perf measurements were consistent with the measurements of stress-ng --stream in all cases. Signed-off-by: Nicolas Frattaroli Signed-off-by: Yury Norov (NVIDIA) --- drivers/devfreq/event/rockchip-dfi.c | 45 ++++++++++++++-------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c index 5a2c9badcc64..5e6e7e900bda 100644 --- a/drivers/devfreq/event/rockchip-dfi.c +++ b/drivers/devfreq/event/rockchip-dfi.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -30,8 +31,6 @@ #define DMC_MAX_CHANNELS 4 -#define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16) - /* DDRMON_CTRL */ #define DDRMON_CTRL 0x04 #define DDRMON_CTRL_LPDDR5 BIT(6) @@ -41,10 +40,6 @@ #define DDRMON_CTRL_LPDDR23 BIT(2) #define DDRMON_CTRL_SOFTWARE_EN BIT(1) #define DDRMON_CTRL_TIMER_CNT_EN BIT(0) -#define DDRMON_CTRL_DDR_TYPE_MASK (DDRMON_CTRL_LPDDR5 | \ - DDRMON_CTRL_DDR4 | \ - DDRMON_CTRL_LPDDR4 | \ - DDRMON_CTRL_LPDDR23) #define DDRMON_CTRL_LP5_BANK_MODE_MASK GENMASK(8, 7) #define DDRMON_CH0_WR_NUM 0x20 @@ -124,27 +119,31 @@ struct rockchip_dfi { unsigned int count_multiplier; /* number of data clocks per count */ }; -static int rockchip_dfi_ddrtype_to_ctrl(struct rockchip_dfi *dfi, u32 *ctrl, - u32 *mask) +static int rockchip_dfi_ddrtype_to_ctrl(struct rockchip_dfi *dfi, u32 *ctrl) { u32 ddrmon_ver; - *mask = DDRMON_CTRL_DDR_TYPE_MASK; - switch (dfi->ddr_type) { case ROCKCHIP_DDRTYPE_LPDDR2: case ROCKCHIP_DDRTYPE_LPDDR3: - *ctrl = DDRMON_CTRL_LPDDR23; + *ctrl = FIELD_PREP_WM16(DDRMON_CTRL_LPDDR23, 1) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR4, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR5, 0); break; case ROCKCHIP_DDRTYPE_LPDDR4: case ROCKCHIP_DDRTYPE_LPDDR4X: - *ctrl = DDRMON_CTRL_LPDDR4; + *ctrl = FIELD_PREP_WM16(DDRMON_CTRL_LPDDR23, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR4, 1) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR5, 0); break; case ROCKCHIP_DDRTYPE_LPDDR5: ddrmon_ver = readl_relaxed(dfi->regs); if (ddrmon_ver < 0x40) { - *ctrl = DDRMON_CTRL_LPDDR5 | dfi->lp5_bank_mode; - *mask |= DDRMON_CTRL_LP5_BANK_MODE_MASK; + *ctrl = FIELD_PREP_WM16(DDRMON_CTRL_LPDDR23, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR4, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_LPDDR5, 1) | + FIELD_PREP_WM16(DDRMON_CTRL_LP5_BANK_MODE_MASK, + dfi->lp5_bank_mode); break; } @@ -172,7 +171,6 @@ static int rockchip_dfi_enable(struct rockchip_dfi *dfi) void __iomem *dfi_regs = dfi->regs; int i, ret = 0; u32 ctrl; - u32 ctrl_mask; mutex_lock(&dfi->mutex); @@ -186,7 +184,7 @@ static int rockchip_dfi_enable(struct rockchip_dfi *dfi) goto out; } - ret = rockchip_dfi_ddrtype_to_ctrl(dfi, &ctrl, &ctrl_mask); + ret = rockchip_dfi_ddrtype_to_ctrl(dfi, &ctrl); if (ret) goto out; @@ -196,15 +194,16 @@ static int rockchip_dfi_enable(struct rockchip_dfi *dfi) continue; /* clear DDRMON_CTRL setting */ - writel_relaxed(HIWORD_UPDATE(0, DDRMON_CTRL_TIMER_CNT_EN | - DDRMON_CTRL_SOFTWARE_EN | DDRMON_CTRL_HARDWARE_EN), + writel_relaxed(FIELD_PREP_WM16(DDRMON_CTRL_TIMER_CNT_EN, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_SOFTWARE_EN, 0) | + FIELD_PREP_WM16(DDRMON_CTRL_HARDWARE_EN, 0), dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); - writel_relaxed(HIWORD_UPDATE(ctrl, ctrl_mask), - dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); + writel_relaxed(ctrl, dfi_regs + i * dfi->ddrmon_stride + + DDRMON_CTRL); /* enable count, use software mode */ - writel_relaxed(HIWORD_UPDATE(DDRMON_CTRL_SOFTWARE_EN, DDRMON_CTRL_SOFTWARE_EN), + writel_relaxed(FIELD_PREP_WM16(DDRMON_CTRL_SOFTWARE_EN, 1), dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); if (dfi->ddrmon_ctrl_single) @@ -234,8 +233,8 @@ static void rockchip_dfi_disable(struct rockchip_dfi *dfi) if (!(dfi->channel_mask & BIT(i))) continue; - writel_relaxed(HIWORD_UPDATE(0, DDRMON_CTRL_SOFTWARE_EN), - dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); + writel_relaxed(FIELD_PREP_WM16(DDRMON_CTRL_SOFTWARE_EN, 0), + dfi_regs + i * dfi->ddrmon_stride + DDRMON_CTRL); if (dfi->ddrmon_ctrl_single) break; From 1f4a222b0e334540343fbb5d3eac4584a6bfe180 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 15 Oct 2025 07:57:28 -0700 Subject: [PATCH 233/305] Remove long-stale ext3 defconfig option Inspired by commit c065b6046b34 ("Use CONFIG_EXT4_FS instead of CONFIG_EXT3_FS in all of the defconfigs") I looked around for any other left-over EXT3 config options, and found some old defconfig files still mentioned CONFIG_EXT3_DEFAULTS_TO_ORDERED. That config option was removed a decade ago in commit c290ea01abb7 ("fs: Remove ext3 filesystem driver"). It had a good run, but let's remove it for good. Signed-off-by: Linus Torvalds --- arch/arm/configs/axm55xx_defconfig | 1 - arch/hexagon/configs/comet_defconfig | 1 - arch/sh/configs/ap325rxa_defconfig | 1 - arch/sh/configs/apsh4a3a_defconfig | 1 - arch/sh/configs/apsh4ad0a_defconfig | 1 - arch/sh/configs/ecovec24_defconfig | 1 - arch/sh/configs/edosk7760_defconfig | 1 - arch/sh/configs/espt_defconfig | 1 - arch/sh/configs/landisk_defconfig | 1 - arch/sh/configs/lboxre2_defconfig | 1 - arch/sh/configs/magicpanelr2_defconfig | 1 - arch/sh/configs/r7780mp_defconfig | 1 - arch/sh/configs/r7785rp_defconfig | 1 - arch/sh/configs/rsk7264_defconfig | 1 - arch/sh/configs/rsk7269_defconfig | 1 - arch/sh/configs/sdk7780_defconfig | 1 - arch/sh/configs/sdk7786_defconfig | 1 - arch/sh/configs/se7343_defconfig | 1 - arch/sh/configs/se7712_defconfig | 1 - arch/sh/configs/se7721_defconfig | 1 - arch/sh/configs/se7722_defconfig | 1 - arch/sh/configs/se7724_defconfig | 1 - arch/sh/configs/sh03_defconfig | 1 - arch/sh/configs/sh7763rdp_defconfig | 1 - arch/sh/configs/sh7785lcr_32bit_defconfig | 1 - arch/sh/configs/sh7785lcr_defconfig | 1 - arch/sh/configs/shx3_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - arch/sh/configs/ul2_defconfig | 1 - arch/sh/configs/urquell_defconfig | 1 - arch/sparc/configs/sparc64_defconfig | 1 - 31 files changed, 31 deletions(-) diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig index 9b263ea9a878..242a61208a0f 100644 --- a/arch/arm/configs/axm55xx_defconfig +++ b/arch/arm/configs/axm55xx_defconfig @@ -195,7 +195,6 @@ CONFIG_PL320_MBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=y diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index b132752693a9..22d7f8ac58a3 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -47,7 +47,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index 336dbacd89bd..48b2e97114f9 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -82,7 +82,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 59daf99ea745..85db9ce42d1a 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -61,7 +61,6 @@ CONFIG_LOGO=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index df2a669ea9d8..e8b3b720578b 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -89,7 +89,6 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index dd7e54c451d6..fcca7cc5a75a 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -110,7 +110,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index 711db47b65ba..98f4611ba553 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -88,7 +88,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_XIP=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_NFS_FS=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index f8cad1e7a333..e5d102cbff89 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -60,7 +60,6 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_AUTOFS_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 08342ceee32e..22177aa8f961 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -94,7 +94,6 @@ CONFIG_USB_EMI26=m CONFIG_USB_SISUSBVGA=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_ISO9660_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 96a21173522d..ff992301622b 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -50,7 +50,6 @@ CONFIG_HW_RANDOM=y CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index af7f777b20be..a29fb912a242 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -65,7 +65,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_PROC_KCORE=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index 11f210517f76..58b792dacfec 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -75,7 +75,6 @@ CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index ae367d7a14a8..7edf18451158 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -70,7 +70,6 @@ CONFIG_RTC_DRV_RS5C372=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index 3aba0102304f..28a81efefb02 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig @@ -60,7 +60,6 @@ CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index f82f280fc55a..f8bfa46643ff 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig @@ -44,7 +44,6 @@ CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 3b51195bf1b5..311817161afb 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -103,7 +103,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index ebb3f4420ae8..2433aa5f44a8 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -162,7 +162,6 @@ CONFIG_STAGING=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_XFS_FS=y CONFIG_BTRFS_FS=y diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 6ef546ee8a32..b0baa5771c26 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -85,7 +85,6 @@ CONFIG_USB_ISP116X_HCD=y CONFIG_UIO=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 4cecf9c06a65..1078c286a610 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -84,7 +84,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index c28057b70ad7..edb9e0d2dce5 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -108,7 +108,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_DNOTIFY is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig index 88bfd953ef89..33daa0a17a32 100644 --- a/arch/sh/configs/se7722_defconfig +++ b/arch/sh/configs/se7722_defconfig @@ -45,7 +45,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index e1b2616ef921..d572655f842d 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -111,7 +111,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 306e1661fbf5..3d194d81c92b 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -58,7 +58,6 @@ CONFIG_SH_WDT=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index 85ec00b7dbd2..e7b72ff377a8 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -62,7 +62,6 @@ CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_AUTOFS_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index e860a20d3f0f..17d2471d8e51 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -114,7 +114,6 @@ CONFIG_DMADEVICES=y CONFIG_UIO=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index 33c98b4a2adb..34c8fe755add 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -91,7 +91,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 3169e4dc7004..52e7a42d66c7 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -85,7 +85,6 @@ CONFIG_RTC_DRV_SH=y CONFIG_UIO=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 6bff00038072..2c474645ec36 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -216,7 +216,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=m CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT4_FS_XATTR is not set CONFIG_XFS_FS=m CONFIG_FUSE_FS=m diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index b89eb8f5cc5c..b0c2ba478353 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -67,7 +67,6 @@ CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 60cb716ef195..e6d807f52253 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -115,7 +115,6 @@ CONFIG_RTC_DRV_SH=y CONFIG_RTC_DRV_GENERIC=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_BTRFS_FS=y CONFIG_MSDOS_FS=y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 200640b93e05..127940aafc39 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -188,7 +188,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y From 0187c08058da3e7f11b356ac27e0c427d36f33f2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 14 Oct 2025 21:28:44 -0700 Subject: [PATCH 234/305] HID: hid-input: only ignore 0 battery events for digitizers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 581c4484769e ("HID: input: map digitizer battery usage") added handling of battery events for digitizers (typically for batteries presented in stylii). Digitizers typically report correct battery levels only when stylus is actively touching the surface, and in other cases they may report battery level of 0. To avoid confusing consumers of the battery information the code was added to filer out reports with 0 battery levels. However there exist other kinds of devices that may legitimately report 0 battery levels. Fix this by filtering out 0-level reports only for digitizer usages, and continue reporting them for other kinds of devices (Smart Batteries, etc). Reported-by: 卢国宏 Fixes: 581c4484769e ("HID: input: map digitizer battery usage") Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 5d7532d79d21..e56e7de53279 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -635,7 +635,10 @@ static void hidinput_update_battery(struct hid_device *dev, unsigned int usage, return; } - if (value == 0 || value < dev->battery_min || value > dev->battery_max) + if ((usage & HID_USAGE_PAGE) == HID_UP_DIGITIZER && value == 0) + return; + + if (value < dev->battery_min || value > dev->battery_max) return; capacity = hidinput_scale_battery_capacity(dev, value); From aa4daea418ee4215dca5c8636090660c545cb233 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 8 Oct 2025 09:40:33 -0300 Subject: [PATCH 235/305] HID: multitouch: fix name of Stylus input devices HID_DG_PEN devices should have a suffix of "Stylus", as pointed out by commit c0ee1d571626 ("HID: hid-input: Add suffix also for HID_DG_PEN"). However, on multitouch devices, these suffixes may be overridden. Before that commit, HID_DG_PEN devices would get the "Stylus" suffix, but after that, multitouch would override them to have an "UNKNOWN" suffix. Just add HID_DG_PEN to the list of non-overriden suffixes in multitouch. Before this fix: [ 0.470981] input: ELAN9008:00 04F3:2E14 UNKNOWN as /devices/pci0000:00/0000:00:15.1/i2c_designware.1/i2c-16/i2c-ELAN9008:00/0018:04F3:2E14.0001/input/input8 ELAN9008:00 04F3:2E14 UNKNOWN After this fix: [ 0.474332] input: ELAN9008:00 04F3:2E14 Stylus as /devices/pci0000:00/0000:00:15.1/i2c_designware.1/i2c-16/i2c-ELAN9008:00/0018:04F3:2E14.0001/input/input8 ELAN9008:00 04F3:2E14 Stylus Fixes: c0ee1d571626 ("HID: hid-input: Add suffix also for HID_DG_PEN") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Mika Westerberg Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 2879e65cf303..513b8673ad8d 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1742,6 +1742,7 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) case HID_CP_CONSUMER_CONTROL: case HID_GD_WIRELESS_RADIO_CTLS: case HID_GD_SYSTEM_MULTIAXIS: + case HID_DG_PEN: /* already handled by hid core */ break; case HID_DG_TOUCHSCREEN: From 46f781e0d151844589dc2125c8cce3300546f92a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 8 Oct 2025 16:06:58 +0200 Subject: [PATCH 236/305] HID: multitouch: fix sticky fingers The sticky fingers quirk (MT_QUIRK_STICKY_FINGERS) was only considering the case when slots were not released during the last report. This can be problematic if the firmware forgets to release a finger while others are still present. This was observed on the Synaptics DLL0945 touchpad found on the Dell XPS 9310 and the Dell Inspiron 5406. Fixes: 4f4001bc76fd ("HID: multitouch: fix rare Win 8 cases when the touch up event gets missing") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 513b8673ad8d..179dc316b4b5 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -94,9 +94,8 @@ enum report_mode { TOUCHPAD_REPORT_ALL = TOUCHPAD_REPORT_BUTTONS | TOUCHPAD_REPORT_CONTACTS, }; -#define MT_IO_FLAGS_RUNNING 0 -#define MT_IO_FLAGS_ACTIVE_SLOTS 1 -#define MT_IO_FLAGS_PENDING_SLOTS 2 +#define MT_IO_SLOTS_MASK GENMASK(7, 0) /* reserve first 8 bits for slot tracking */ +#define MT_IO_FLAGS_RUNNING 32 static const bool mtrue = true; /* default for true */ static const bool mfalse; /* default for false */ @@ -172,7 +171,11 @@ struct mt_device { struct timer_list release_timer; /* to release sticky fingers */ struct hid_haptic_device *haptic; /* haptic related configuration */ struct hid_device *hdev; /* hid_device we're attached to */ - unsigned long mt_io_flags; /* mt flags (MT_IO_FLAGS_*) */ + unsigned long mt_io_flags; /* mt flags (MT_IO_FLAGS_RUNNING) + * first 8 bits are reserved for keeping the slot + * states, this is fine because we only support up + * to 250 slots (MT_MAX_MAXCONTACT) + */ __u8 inputmode_value; /* InputMode HID feature value */ __u8 maxcontacts; bool is_buttonpad; /* is this device a button pad? */ @@ -986,6 +989,7 @@ static void mt_release_pending_palms(struct mt_device *td, for_each_set_bit(slotnum, app->pending_palm_slots, td->maxcontacts) { clear_bit(slotnum, app->pending_palm_slots); + clear_bit(slotnum, &td->mt_io_flags); input_mt_slot(input, slotnum); input_mt_report_slot_inactive(input); @@ -1019,12 +1023,6 @@ static void mt_sync_frame(struct mt_device *td, struct mt_application *app, app->left_button_state = 0; if (td->is_haptic_touchpad) hid_haptic_pressure_reset(td->haptic); - - if (test_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags)) - set_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags); - else - clear_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags); - clear_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags); } static int mt_compute_timestamp(struct mt_application *app, __s32 value) @@ -1202,7 +1200,9 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input, input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major); input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor); - set_bit(MT_IO_FLAGS_ACTIVE_SLOTS, &td->mt_io_flags); + set_bit(slotnum, &td->mt_io_flags); + } else { + clear_bit(slotnum, &td->mt_io_flags); } return 0; @@ -1337,7 +1337,7 @@ static void mt_touch_report(struct hid_device *hid, * defect. */ if (app->quirks & MT_QUIRK_STICKY_FINGERS) { - if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) + if (td->mt_io_flags & MT_IO_SLOTS_MASK) mod_timer(&td->release_timer, jiffies + msecs_to_jiffies(100)); else @@ -1814,6 +1814,7 @@ static void mt_release_contacts(struct hid_device *hid) for (i = 0; i < mt->num_slots; i++) { input_mt_slot(input_dev, i); input_mt_report_slot_inactive(input_dev); + clear_bit(i, &td->mt_io_flags); } input_mt_sync_frame(input_dev); input_sync(input_dev); @@ -1836,7 +1837,7 @@ static void mt_expired_timeout(struct timer_list *t) */ if (test_and_set_bit_lock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags)) return; - if (test_bit(MT_IO_FLAGS_PENDING_SLOTS, &td->mt_io_flags)) + if (td->mt_io_flags & MT_IO_SLOTS_MASK) mt_release_contacts(hdev); clear_bit_unlock(MT_IO_FLAGS_RUNNING, &td->mt_io_flags); } From d9b3014a7f1425011909ad358dc0c8f187853a12 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 8 Oct 2025 16:06:59 +0200 Subject: [PATCH 237/305] selftests/hid: add tests for missing release on the Dell Synaptics Add a simple test for the corner case not currently covered by the sticky fingers quirk. Because it's a corner case test, we only test this on a couple of devices, not on all of them because the value of adding the same test over and over is rather moot. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .../selftests/hid/tests/test_multitouch.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py index 5d2ffa3d5977..ece0ba8e7d34 100644 --- a/tools/testing/selftests/hid/tests/test_multitouch.py +++ b/tools/testing/selftests/hid/tests/test_multitouch.py @@ -1752,6 +1752,52 @@ class TestWin8TSConfidence(BaseTest.TestWin8Multitouch): assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + @pytest.mark.skip_if_uhdev( + lambda uhdev: "Confidence" not in uhdev.fields, + "Device not compatible, missing Confidence usage", + ) + def test_mt_confidence_bad_multi_release(self): + """Check for the sticky finger being properly detected. + + We first inject 3 fingers, then release only the second. + After 100 ms, we should receive a generated event about the + 2 missing fingers being released. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + # send 3 touches + t0 = Touch(1, 50, 10) + t1 = Touch(2, 150, 100) + t2 = Touch(3, 250, 200) + r = uhdev.event([t0, t1, t2]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + # release the second + t1.tipswitch = False + r = uhdev.event([t1]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + # only the second is released + assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1 + assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1 + + # wait for the timer to kick in + time.sleep(0.2) + + events = uhdev.next_sync_events() + self.debug_reports([], uhdev, events) + + # now all 3 fingers are released + assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events + assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + + class TestElanXPS9360(BaseTest.TestWin8Multitouch): def create_device(self): return Digitizer( @@ -2086,3 +2132,12 @@ class Testsynaptics_06cb_ce08(BaseTest.TestPTP): input_info=(BusType.I2C, 0x06CB, 0xCE08), rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 01 09 02 a1 01 85 18 09 01 a1 00 05 09 19 01 29 03 46 00 00 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 06 00 ff 09 02 a1 01 85 20 09 01 a1 00 09 03 15 00 26 ff 00 35 00 46 ff 00 75 08 95 05 81 02 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 45 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 45 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0", ) + +class Testsynaptics_06cb_ce26(TestWin8TSConfidence): + def create_device(self): + return PTP( + "uhid test synaptics_06cb_ce26", + max_contacts=5, + input_info=(BusType.I2C, 0x06CB, 0xCE26), + rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0", + ) From bfdd74166a639930baaba27a8d729edaacd46907 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Tue, 14 Oct 2025 00:47:39 +0000 Subject: [PATCH 238/305] gve: Check valid ts bit on RX descriptor before hw timestamping The device returns a valid bit in the LSB of the low timestamp byte in the completion descriptor that the driver should check before setting the SKB's hardware timestamp. If the timestamp is not valid, do not hardware timestamp the SKB. Cc: stable@vger.kernel.org Fixes: b2c7aeb49056 ("gve: Implement ndo_hwtstamp_get/set for RX timestamping") Reviewed-by: Joshua Washington Signed-off-by: Tim Hostetler Signed-off-by: Harshitha Ramamurthy Reviewed-by: Simon Horman Reviewed-by: Willem de Bruijn Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251014004740.2775957-1-hramamurthy@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 2 ++ drivers/net/ethernet/google/gve/gve_desc_dqo.h | 3 ++- drivers/net/ethernet/google/gve/gve_rx_dqo.c | 16 +++++++++++----- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index bceaf9b05cb4..4cc6dcbfd367 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -100,6 +100,8 @@ */ #define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96 +#define GVE_DQO_RX_HWTSTAMP_VALID 0x1 + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { struct gve_rx_desc *desc_ring; /* the descriptor ring */ diff --git a/drivers/net/ethernet/google/gve/gve_desc_dqo.h b/drivers/net/ethernet/google/gve/gve_desc_dqo.h index d17da841b5a0..f7786b03c744 100644 --- a/drivers/net/ethernet/google/gve/gve_desc_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_desc_dqo.h @@ -236,7 +236,8 @@ struct gve_rx_compl_desc_dqo { u8 status_error1; - __le16 reserved5; + u8 reserved5; + u8 ts_sub_nsecs_low; __le16 buf_id; /* Buffer ID which was sent on the buffer queue. */ union { diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 55393b784317..1aff3bbb8cfc 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -456,14 +456,20 @@ static void gve_rx_skb_hash(struct sk_buff *skb, * Note that this means if the time delta between packet reception and the last * clock read is greater than ~2 seconds, this will provide invalid results. */ -static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, u32 hwts) +static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, + const struct gve_rx_compl_desc_dqo *desc) { u64 last_read = READ_ONCE(rx->gve->last_sync_nic_counter); struct sk_buff *skb = rx->ctx.skb_head; - u32 low = (u32)last_read; - s32 diff = hwts - low; + u32 ts, low; + s32 diff; - skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff); + if (desc->ts_sub_nsecs_low & GVE_DQO_RX_HWTSTAMP_VALID) { + ts = le32_to_cpu(desc->ts); + low = (u32)last_read; + diff = ts - low; + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff); + } } static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) @@ -944,7 +950,7 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); if (rx->gve->ts_config.rx_filter == HWTSTAMP_FILTER_ALL) - gve_rx_skb_hwtstamp(rx, le32_to_cpu(desc->ts)); + gve_rx_skb_hwtstamp(rx, desc); /* RSC packets must set gso_size otherwise the TCP stack will complain * that packets are larger than MTU. From d451a0e88e9fa710df33f8dd5dc7ca63e22ef211 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 15 Oct 2025 17:05:04 +0200 Subject: [PATCH 239/305] smb: client: let smbd_destroy() wait for SMBDIRECT_SOCKET_DISCONNECTED We should wait for the rdma_cm to become SMBDIRECT_SOCKET_DISCONNECTED, it turns out that (at least running some xfstests e.g. cifs/001) often triggers the case where wait_event_interruptible() returns with -ERESTARTSYS instead of waiting for SMBDIRECT_SOCKET_DISCONNECTED to be reached. Or we are already in SMBDIRECT_SOCKET_DISCONNECTING and never wait for SMBDIRECT_SOCKET_DISCONNECTED. Fixes: 050b8c374019 ("smbd: Make upper layer decide when to destroy the transport") Fixes: e8b3bfe9bc65 ("cifs: smbd: Don't destroy transport on RDMA disconnect") Fixes: b0aa92a229ab ("smb: client: make sure smbd_disconnect_rdma_work() doesn't run after smbd_destroy() took over") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 77de85d7cdc3..49e2df3ad1f0 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1575,12 +1575,12 @@ void smbd_destroy(struct TCP_Server_Info *server) disable_work_sync(&sc->disconnect_work); log_rdma_event(INFO, "destroying rdma session\n"); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) smbd_disconnect_rdma_work(&sc->disconnect_work); + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) { log_rdma_event(INFO, "wait for transport being disconnected\n"); - wait_event_interruptible( - sc->status_wait, - sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); + log_rdma_event(INFO, "waited for transport being disconnected\n"); } /* From 2aab1f993c8cb753ccb3d5b848cd758e2e87d965 Mon Sep 17 00:00:00 2001 From: Ankan Biswas Date: Wed, 15 Oct 2025 20:50:57 +0530 Subject: [PATCH 240/305] drm/gpuvm: Fix kernel-doc warning for drm_gpuvm_map_req.map The kernel-doc for struct drm_gpuvm_map_req.map was added as '@op_map' instead of '@map', leading to this warning during htmldocs build: WARNING: include/drm/drm_gpuvm.h:1083 struct member 'map' not described in 'drm_gpuvm_map_req' Fixes: 000a45dce7ad ("drm/gpuvm: Pass map arguments through a struct") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20250821133539.03aa298e@canb.auug.org.au/ Signed-off-by: Ankan Biswas Signed-off-by: Danilo Krummrich --- include/drm/drm_gpuvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h index 8890ded1d907..476990e761f8 100644 --- a/include/drm/drm_gpuvm.h +++ b/include/drm/drm_gpuvm.h @@ -1078,7 +1078,7 @@ struct drm_gpuva_ops { */ struct drm_gpuvm_map_req { /** - * @op_map: struct drm_gpuva_op_map + * @map: struct drm_gpuva_op_map */ struct drm_gpuva_op_map map; }; From e6416c2dfe23c9a6fec881fda22ebb9ae486cfc5 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Sat, 11 Oct 2025 00:59:58 +0800 Subject: [PATCH 241/305] x86/CPU/AMD: Prevent reset reasons from being retained across reboot The S5_RESET_STATUS register is parsed on boot and printed to kmsg. However, this could sometimes be misleading and lead to users wasting a lot of time on meaningless debugging for two reasons: * Some bits are never cleared by hardware. It's the software's responsibility to clear them as per the Processor Programming Reference (see [1]). * Some rare hardware-initiated platform resets do not update the register at all. In both cases, a previous reboot could leave its trace in the register, resulting in users seeing unrelated reboot reasons while debugging random reboots afterward. Write the read value back to the register in order to clear all reason bits since they are write-1-to-clear while the others must be preserved. [1]: https://bugzilla.kernel.org/show_bug.cgi?id=206537#attach_303991 [ bp: Massage commit message. ] Fixes: ab8131028710 ("x86/CPU/AMD: Print the reason for the last reset") Signed-off-by: Rong Zhang Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Yazen Ghannam Cc: Link: https://lore.kernel.org/all/20250913144245.23237-1-i@rong.moe/ --- arch/x86/kernel/cpu/amd.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5398db4dedb4..ccaa51ce63f6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1355,11 +1355,23 @@ static __init int print_s5_reset_status_mmio(void) return 0; value = ioread32(addr); - iounmap(addr); /* Value with "all bits set" is an error response and should be ignored. */ - if (value == U32_MAX) + if (value == U32_MAX) { + iounmap(addr); return 0; + } + + /* + * Clear all reason bits so they won't be retained if the next reset + * does not update the register. Besides, some bits are never cleared by + * hardware so it's software's responsibility to clear them. + * + * Writing the value back effectively clears all reason bits as they are + * write-1-to-clear. + */ + iowrite32(value, addr); + iounmap(addr); for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) From 18d6b1743eafeb3fb1e0ea5a2b7fd0a773d525a8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 15 Oct 2025 13:38:53 -0600 Subject: [PATCH 242/305] io_uring/rw: check for NULL io_br_sel when putting a buffer Both the read and write side use kiocb_done() to finish a request, and kiocb_done() will call io_put_kbuf() in case a provided buffer was used for the request. Provided buffers are not supported for writes, hence NULL is being passed in. This normally works fine, as io_put_kbuf() won't actually use the value unless REQ_F_BUFFER_RING or REQ_F_BUFFER_SELECTED is set in the request flags. But depending on compiler (or whether or not CONFIG_CC_OPTIMIZE_FOR_SIZE is set), that may be done even though the value is never used. This will then cause a NULL pointer dereference. Make it a bit more obvious and check for a NULL io_br_sel, and don't even bother calling io_put_kbuf() for that case. Fixes: 5fda51255439 ("io_uring/kbuf: switch to storing struct io_buffer_list locally") Reported-by: David Howells Tested-by: David Howells Signed-off-by: Jens Axboe --- io_uring/rw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index a0f9d2021e3f..5b2241a5813c 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -655,13 +655,17 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, if (ret >= 0 && req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; if (ret >= 0 && !(req->ctx->flags & IORING_SETUP_IOPOLL)) { + u32 cflags = 0; + __io_complete_rw_common(req, ret); /* * Safe to call io_end from here as we're inline * from the submission path. */ io_req_io_end(req); - io_req_set_res(req, final_ret, io_put_kbuf(req, ret, sel->buf_list)); + if (sel) + cflags = io_put_kbuf(req, ret, sel->buf_list); + io_req_set_res(req, final_ret, cflags); io_req_rw_cleanup(req, issue_flags); return IOU_COMPLETE; } else { From bc384963bc18e4f21cf8615b57cbbc9c5e0d309a Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 14 Oct 2025 19:06:50 +0200 Subject: [PATCH 243/305] MAINTAINERS: new entry for IPv6 IOAM Create a maintainer entry for IPv6 IOAM. Add myself as I authored most if not all of the IPv6 IOAM code in the kernel and actively participate in the related IETF groups. Signed-off-by: Justin Iurman Link: https://patch.msgid.link/20251014170650.27679-1-justin.iurman@uliege.be Signed-off-by: Jakub Kicinski --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8a213950e37e..eba5e091a086 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18012,6 +18012,16 @@ X: net/rfkill/ X: net/wireless/ X: tools/testing/selftests/net/can/ +NETWORKING [IOAM] +M: Justin Iurman +S: Maintained +F: Documentation/networking/ioam6* +F: include/linux/ioam6* +F: include/net/ioam6* +F: include/uapi/linux/ioam6* +F: net/ipv6/ioam6* +F: tools/testing/selftests/net/ioam6* + NETWORKING [IPSEC] M: Steffen Klassert M: Herbert Xu From 0c3f2e62815a43628e748b1e4ad97a1c46cce703 Mon Sep 17 00:00:00 2001 From: Alexey Simakov Date: Tue, 14 Oct 2025 19:47:38 +0300 Subject: [PATCH 244/305] tg3: prevent use of uninitialized remote_adv and local_adv variables Some execution paths that jump to the fiber_setup_done label could leave the remote_adv and local_adv variables uninitialized and then use it. Initialize this variables at the point of definition to avoid this. Fixes: 85730a631f0c ("tg3: Add SGMII phy support for 5719/5718 serdes") Co-developed-by: Alexandr Sapozhnikov Signed-off-by: Alexandr Sapozhnikov Signed-off-by: Alexey Simakov Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20251014164736.5890-1-bigalex934@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 7f00ec7fd7b9..d78cafdb2094 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -5803,7 +5803,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset) u32 current_speed = SPEED_UNKNOWN; u8 current_duplex = DUPLEX_UNKNOWN; bool current_link_up = false; - u32 local_adv, remote_adv, sgsr; + u32 local_adv = 0, remote_adv = 0, sgsr; if ((tg3_asic_rev(tp) == ASIC_REV_5719 || tg3_asic_rev(tp) == ASIC_REV_5720) && @@ -5944,9 +5944,6 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, bool force_reset) else current_duplex = DUPLEX_HALF; - local_adv = 0; - remote_adv = 0; - if (bmcr & BMCR_ANENABLE) { u32 common; From ce5af41e3234425a40974696682163edfd21128c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:16:56 +0200 Subject: [PATCH 245/305] tls: trim encrypted message to match the plaintext on short splice During tls_sw_sendmsg_locked, we pre-allocate the encrypted message for the size we're expecting to send during the current iteration, but we may end up sending less, for example when splicing: if we're getting the data from small fragments of memory, we may fill up all the slots in the skmsg with less data than expected. In this case, we need to trim the encrypted message to only the length we actually need, to avoid pushing uninitialized bytes down the underlying TCP socket. Fixes: fe1e81d4f73b ("tls/sw: Support MSG_SPLICE_PAGES") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/66a0ae99c9efc15f88e9e56c1f58f902f442ce86.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index daac9fd4be7e..36ca3011ab87 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1112,8 +1112,11 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, goto send_end; tls_ctx->pending_open_record_frags = true; - if (sk_msg_full(msg_pl)) + if (sk_msg_full(msg_pl)) { full_record = true; + sk_msg_trim(sk, msg_en, + msg_pl->sg.size + prot->overhead_size); + } if (full_record || eor) goto copied; From b014a4e066c555185b7c367efacdc33f16695495 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:16:57 +0200 Subject: [PATCH 246/305] tls: wait for async encrypt in case of error during latter iterations of sendmsg If we hit an error during the main loop of tls_sw_sendmsg_locked (eg failed allocation), we jump to send_end and immediately return. Previous iterations may have queued async encryption requests that are still pending. We should wait for those before returning, as we could otherwise be reading from memory that userspace believes we're not using anymore, which would be a sort of use-after-free. This is similar to what tls_sw_recvmsg already does: failures during the main loop jump to the "wait for async" code, not straight to the unlock/return. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/c793efe9673b87f808d84fdefc0f732217030c52.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 36ca3011ab87..1478d515badc 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1054,7 +1054,7 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, if (ret == -EINPROGRESS) num_async++; else if (ret != -EAGAIN) - goto send_end; + goto end; } } @@ -1226,8 +1226,9 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, goto alloc_encrypted; } +send_end: if (!num_async) { - goto send_end; + goto end; } else if (num_zc || eor) { int err; @@ -1245,7 +1246,7 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, tls_tx_records(sk, msg->msg_flags); } -send_end: +end: ret = sk_stream_error(sk, msg->msg_flags, ret); return copied > 0 ? copied : ret; } From b6fe4c29bb51cf239ecf48eacf72b924565cb619 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:16:58 +0200 Subject: [PATCH 247/305] tls: always set record_type in tls_process_cmsg When userspace wants to send a non-DATA record (via the TLS_SET_RECORD_TYPE cmsg), we need to send any pending data from a previous MSG_MORE send() as a separate DATA record. If that DATA record is encrypted asynchronously, tls_handle_open_record will return -EINPROGRESS. This is currently treated as an error by tls_process_cmsg, and it will skip setting record_type to the correct value, but the caller (tls_sw_sendmsg_locked) handles that return value correctly and proceeds with sending the new message with an incorrect record_type (DATA instead of whatever was requested in the cmsg). Always set record_type before handling the open record. If tls_handle_open_record returns an error, record_type will be ignored. If it succeeds, whether with synchronous crypto (returning 0) or asynchronous (returning -EINPROGRESS), the caller will proceed correctly. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/0457252e578a10a94e40c72ba6288b3a64f31662.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a3ccb3135e51..39a2ab47fe72 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -255,12 +255,9 @@ int tls_process_cmsg(struct sock *sk, struct msghdr *msg, if (msg->msg_flags & MSG_MORE) return -EINVAL; - rc = tls_handle_open_record(sk, msg->msg_flags); - if (rc) - return rc; - *record_type = *(unsigned char *)CMSG_DATA(cmsg); - rc = 0; + + rc = tls_handle_open_record(sk, msg->msg_flags); break; default: return -EINVAL; From b8a6ff84abbcbbc445463de58704686011edc8e1 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:16:59 +0200 Subject: [PATCH 248/305] tls: wait for pending async decryptions if tls_strp_msg_hold fails Async decryption calls tls_strp_msg_hold to create a clone of the input skb to hold references to the memory it uses. If we fail to allocate that clone, proceeding with async decryption can lead to various issues (UAF on the skb, writing into userspace memory after the recv() call has returned). In this case, wait for all pending decryption requests. Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/b9fe61dcc07dab15da9b35cf4c7d86382a98caf2.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1478d515badc..e3d852091e7a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1641,8 +1641,10 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, if (unlikely(darg->async)) { err = tls_strp_msg_hold(&ctx->strp, &ctx->async_hold); - if (err) - __skb_queue_tail(&ctx->async_hold, darg->skb); + if (err) { + err = tls_decrypt_async_wait(ctx); + darg->async = false; + } return err; } From 7f846c65ca11e63d2409868ff039081f80e42ae4 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:17:00 +0200 Subject: [PATCH 249/305] tls: don't rely on tx_work during send() With async crypto, we rely on tx_work to actually transmit records once encryption completes. But while send() is running, both the tx_lock and socket lock are held, so tx_work_handler cannot process the queue of encrypted records, and simply reschedules itself. During a large send(), this could last a long time, and use a lot of memory. Transmit any pending encrypted records before restarting the main loop of tls_sw_sendmsg_locked. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Reported-by: Jann Horn Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/8396631478f70454b44afb98352237d33f48d34d.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e3d852091e7a..d17135369980 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1152,6 +1152,13 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, } else if (ret != -EAGAIN) goto send_end; } + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, msg->msg_flags); + } + continue; rollback_iter: copied -= try_to_copy; @@ -1207,6 +1214,12 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, goto send_end; } } + + /* Transmit if any encryptions have completed */ + if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { + cancel_delayed_work(&ctx->tx_work.work); + tls_tx_records(sk, msg->msg_flags); + } } continue; From f95fce1e953b8a7af3fbad84aaffe92804196e2d Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:17:01 +0200 Subject: [PATCH 250/305] selftests: net: tls: add tests for cmsg vs MSG_MORE We don't have a test to check that MSG_MORE won't let us merge records of different types across sendmsg calls. Add new tests that check: - MSG_MORE is only allowed for DATA records - a pending DATA record gets closed and pushed before a non-DATA record is processed Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/b34feeadefe8a997f068d5ed5617afd0072df3c0.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e788b84551ca..77e4e30b46cc 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -564,6 +564,40 @@ TEST_F(tls, msg_more) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, cmsg_msg_more) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + + /* we don't allow MSG_MORE with non-DATA records */ + EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, + MSG_MORE), -1); + EXPECT_EQ(errno, EINVAL); +} + +TEST_F(tls, msg_more_then_cmsg) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10 * 2]; + int ret; + + EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); + + ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0); + EXPECT_EQ(ret, send_len); + + /* initial DATA record didn't get merged with the non-DATA record */ + EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); +} + TEST_F(tls, msg_more_unsent) { char const *test_str = "test_read"; From 3667e9b442b95b021189db793b9156552f918e99 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Oct 2025 11:17:02 +0200 Subject: [PATCH 251/305] selftests: tls: add test for short splice due to full skmsg We don't have a test triggering a partial splice caused by a full skmsg. Add one, based on a program by Jann Horn. Use MAX_FRAGS=48 to make sure the skmsg will be full for any allowed value of CONFIG_MAX_SKB_FRAGS (17..45). Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/1d129a15f526ea3602f3a2b368aa0b6f7e0d35d5.1760432043.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 77e4e30b46cc..5c6d8215021c 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -946,6 +946,37 @@ TEST_F(tls, peek_and_splice) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +#define MAX_FRAGS 48 +TEST_F(tls, splice_short) +{ + struct iovec sendchar_iov; + char read_buf[0x10000]; + char sendbuf[0x100]; + char sendchar = 'S'; + int pipefds[2]; + int i; + + sendchar_iov.iov_base = &sendchar; + sendchar_iov.iov_len = 1; + + memset(sendbuf, 's', sizeof(sendbuf)); + + ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0); + ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0); + + for (i = 0; i < MAX_FRAGS; i++) + ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0); + + ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf)); + + EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0), + MAX_FRAGS + sizeof(sendbuf)); + EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf)); + EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EAGAIN); +} +#undef MAX_FRAGS + TEST_F(tls, recvmsg_single) { char const *test_str = "test_recvmsg_single"; From 1a8fed52f7be14e45785e8e54d0d0b50fc17dbd8 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 14 Oct 2025 02:17:25 -0700 Subject: [PATCH 252/305] netdevsim: set the carrier when the device goes up Bringing a linked netdevsim device down and then up causes communication failure because both interfaces lack carrier. Basically a ifdown/ifup on the interface make the link broken. Commit 3762ec05a9fbda ("netdevsim: add NAPI support") added supported for NAPI, calling netif_carrier_off() in nsim_stop(). This patch re-enables the carrier symmetrically on nsim_open(), in case the device is linked and the peer is up. Signed-off-by: Breno Leitao Fixes: 3762ec05a9fbda ("netdevsim: add NAPI support") Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251014-netdevsim_fix-v2-1-53b40590dae1@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index ebc3833e95b4..fa1d97885caa 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -545,6 +545,7 @@ static void nsim_enable_napi(struct netdevsim *ns) static int nsim_open(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + struct netdevsim *peer; int err; netdev_assert_locked(dev); @@ -555,6 +556,12 @@ static int nsim_open(struct net_device *dev) nsim_enable_napi(ns); + peer = rtnl_dereference(ns->peer); + if (peer && netif_running(peer->netdev)) { + netif_carrier_on(dev); + netif_carrier_on(peer->netdev); + } + return 0; } From 8d93ff40d49d70e05c82a74beae31f883fe0eaf8 Mon Sep 17 00:00:00 2001 From: I Viswanath Date: Mon, 13 Oct 2025 23:46:48 +0530 Subject: [PATCH 253/305] net: usb: lan78xx: fix use of improperly initialized dev->chipid in lan78xx_reset dev->chipid is used in lan78xx_init_mac_address before it's initialized: lan78xx_reset() { lan78xx_init_mac_address() lan78xx_read_eeprom() lan78xx_read_raw_eeprom() <- dev->chipid is used here dev->chipid = ... <- dev->chipid is initialized correctly here } Reorder initialization so that dev->chipid is set before calling lan78xx_init_mac_address(). Fixes: a0db7d10b76e ("lan78xx: Add to handle mux control per chip id") Signed-off-by: I Viswanath Reviewed-by: Vadim Fedorenko Reviewed-by: Khalid Aziz Link: https://patch.msgid.link/20251013181648.35153-1-viswanathiyyappan@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 28195d9a8d6b..00397a807393 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3250,10 +3250,6 @@ static int lan78xx_reset(struct lan78xx_net *dev) } } while (buf & HW_CFG_LRST_); - ret = lan78xx_init_mac_address(dev); - if (ret < 0) - return ret; - /* save DEVID for later usage */ ret = lan78xx_read_reg(dev, ID_REV, &buf); if (ret < 0) @@ -3262,6 +3258,10 @@ static int lan78xx_reset(struct lan78xx_net *dev) dev->chipid = (buf & ID_REV_CHIP_ID_MASK_) >> 16; dev->chiprev = buf & ID_REV_CHIP_REV_MASK_; + ret = lan78xx_init_mac_address(dev); + if (ret < 0) + return ret; + /* Respond to the IN token with a NAK */ ret = lan78xx_read_reg(dev, USB_CFG0, &buf); if (ret < 0) From c2b77f42205ef485a647f62082c442c1cd69d3fc Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Thu, 16 Oct 2025 02:52:55 +0000 Subject: [PATCH 254/305] smb: client: Fix refcount leak for cifs_sb_tlink Fix three refcount inconsistency issues related to `cifs_sb_tlink`. Comments for `cifs_sb_tlink` state that `cifs_put_tlink()` needs to be called after successful calls to `cifs_sb_tlink()`. Three calls fail to update refcount accordingly, leading to possible resource leaks. Fixes: 8ceb98437946 ("CIFS: Move rename to ops struct") Fixes: 2f1afe25997f ("cifs: Use smb 2 - 3 and cifsacl mount options getacl functions") Fixes: 366ed846df60 ("cifs: Use smb 2 - 3 and cifsacl mount options setacl function") Cc: stable@vger.kernel.org Signed-off-by: Shuhao Fu Signed-off-by: Steve French --- fs/smb/client/inode.c | 6 ++++-- fs/smb/client/smb2ops.c | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 239dd84a336f..098a79b7a959 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2431,8 +2431,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, tcon = tlink_tcon(tlink); server = tcon->ses->server; - if (!server->ops->rename) - return -ENOSYS; + if (!server->ops->rename) { + rc = -ENOSYS; + goto do_rename_exit; + } /* try path-based rename first */ rc = server->ops->rename(xid, tcon, from_dentry, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 7c392cf5940b..95cd484cfbba 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3212,8 +3212,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; - free_xid(xid); - return ERR_PTR(rc); + goto put_tlink; } oparms = (struct cifs_open_parms) { @@ -3245,6 +3244,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } +put_tlink: cifs_put_tlink(tlink); free_xid(xid); @@ -3285,8 +3285,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; - free_xid(xid); - return rc; + goto put_tlink; } oparms = (struct cifs_open_parms) { @@ -3307,6 +3306,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); } +put_tlink: cifs_put_tlink(tlink); free_xid(xid); return rc; From 6447b0e355562a1ff748c4a2ffb89aae7e84d2c9 Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Mon, 13 Oct 2025 21:39:30 +0300 Subject: [PATCH 255/305] cifs: parse_dfs_referrals: prevent oob on malformed input Malicious SMB server can send invalid reply to FSCTL_DFS_GET_REFERRALS - reply smaller than sizeof(struct get_dfs_referral_rsp) - reply with number of referrals smaller than NumberOfReferrals in the header Processing of such replies will cause oob. Return -EINVAL error on such replies to prevent oob-s. Signed-off-by: Eugene Korenevsky Cc: stable@vger.kernel.org Suggested-by: Nathan Chancellor Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/misc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index dda6dece802a..e10123d8cd7d 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -916,6 +916,14 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, char *data_end; struct dfs_referral_level_3 *ref; + if (rsp_size < sizeof(*rsp)) { + cifs_dbg(VFS | ONCE, + "%s: header is malformed (size is %u, must be %zu)\n", + __func__, rsp_size, sizeof(*rsp)); + rc = -EINVAL; + goto parse_DFS_referrals_exit; + } + *num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals); if (*num_of_nodes < 1) { @@ -925,6 +933,15 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, goto parse_DFS_referrals_exit; } + if (sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3) > rsp_size) { + cifs_dbg(VFS | ONCE, + "%s: malformed buffer (size is %u, must be at least %zu)\n", + __func__, rsp_size, + sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3)); + rc = -EINVAL; + goto parse_DFS_referrals_exit; + } + ref = (struct dfs_referral_level_3 *) &(rsp->referrals); if (ref->VersionNumber != cpu_to_le16(3)) { cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n", From af5fea51411224cae61d54064a55fe22020bd2b7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:31 -0700 Subject: [PATCH 256/305] smb: client: Use SHA-512 library for SMB3.1.1 preauth hash Convert smb311_update_preauth_hash() to use the SHA-512 library instead of a "sha512" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the SHA-512 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. Remove the call to smb311_crypto_shash_allocate() from smb311_update_preauth_hash(), since it appears to have been needed only to allocate the "sha512" crypto_shash. (It also had the side effect of allocating the "cmac(aes)" crypto_shash, but that's also done in generate_key() which is where the AES-CMAC key is initialized.) For now the "sha512" crypto_shash is still being allocated elsewhere. It will be removed in a later commit. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/Kconfig | 1 + fs/smb/client/smb2misc.c | 53 +++++++++------------------------------ fs/smb/client/smb2proto.h | 6 ++--- 3 files changed, 16 insertions(+), 44 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index a4c02199fef4..4ac79ff5649b 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -16,6 +16,7 @@ config CIFS select CRYPTO_ECB select CRYPTO_AES select CRYPTO_LIB_ARC4 + select CRYPTO_LIB_SHA512 select KEYS select DNS_RESOLVER select ASN1 diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index 89d933b4a8bc..96bfe4c63ccf 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -7,6 +7,7 @@ * Pavel Shilovsky (pshilovsky@samba.org) 2012 * */ +#include #include #include "cifsglob.h" #include "cifsproto.h" @@ -888,13 +889,13 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve * @iov: array containing the SMB request we will send to the server * @nvec: number of array entries for the iov */ -int +void smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, struct kvec *iov, int nvec) { - int i, rc; + int i; struct smb2_hdr *hdr; - struct shash_desc *sha512 = NULL; + struct sha512_ctx sha_ctx; hdr = (struct smb2_hdr *)iov[0].iov_base; /* neg prot are always taken */ @@ -907,52 +908,22 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, * and we can test it. Preauth requires 3.1.1 for now. */ if (server->dialect != SMB311_PROT_ID) - return 0; + return; if (hdr->Command != SMB2_SESSION_SETUP) - return 0; + return; /* skip last sess setup response */ if ((hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR) && (hdr->Status == NT_STATUS_OK || (hdr->Status != cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED)))) - return 0; + return; ok: - rc = smb311_crypto_shash_allocate(server); - if (rc) - return rc; - - sha512 = server->secmech.sha512; - rc = crypto_shash_init(sha512); - if (rc) { - cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__); - return rc; - } - - rc = crypto_shash_update(sha512, ses->preauth_sha_hash, - SMB2_PREAUTH_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__); - return rc; - } - - for (i = 0; i < nvec; i++) { - rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update sha512 shash\n", - __func__); - return rc; - } - } - - rc = crypto_shash_final(sha512, ses->preauth_sha_hash); - if (rc) { - cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n", - __func__); - return rc; - } - - return 0; + sha512_init(&sha_ctx); + sha512_update(&sha_ctx, ses->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE); + for (i = 0; i < nvec; i++) + sha512_update(&sha_ctx, iov[i].iov_base, iov[i].iov_len); + sha512_final(&sha_ctx, ses->preauth_sha_hash); } diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index b3f1398c9f79..e7cda885c39f 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -296,9 +296,9 @@ extern void smb2_copy_fs_info_to_kstatfs( struct smb2_fs_full_size_info *pfs_inf, struct kstatfs *kst); extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); -extern int smb311_update_preauth_hash(struct cifs_ses *ses, - struct TCP_Server_Info *server, - struct kvec *iov, int nvec); +extern void smb311_update_preauth_hash(struct cifs_ses *ses, + struct TCP_Server_Info *server, + struct kvec *iov, int nvec); extern int smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, const char *path, u32 desired_access, From 4b4c6fdb25de4edc0a34b1b93cccb439e00e1f35 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:32 -0700 Subject: [PATCH 257/305] smb: client: Use HMAC-SHA256 library for key generation Convert generate_key() to use the HMAC-SHA256 library instead of a "hmac(sha256)" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the HMAC-SHA256 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. Also remove the unnecessary 'hashptr' variable. For now smb3_crypto_shash_allocate() still allocates a "hmac(sha256)" crypto_shash. It will be removed in a later commit. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/Kconfig | 1 + fs/smb/client/smb2transport.c | 68 ++++++++--------------------------- 2 files changed, 15 insertions(+), 54 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index 4ac79ff5649b..f0c1ff8544f6 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -16,6 +16,7 @@ config CIFS select CRYPTO_ECB select CRYPTO_AES select CRYPTO_LIB_ARC4 + select CRYPTO_LIB_SHA256 select CRYPTO_LIB_SHA512 select KEYS select DNS_RESOLVER diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 33f33013b392..bde96eace8c9 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "cifsglob.h" #include "cifsproto.h" #include "smb2proto.h" @@ -336,8 +337,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, __u8 L256[4] = {0, 0, 1, 0}; int rc = 0; unsigned char prfhash[SMB2_HMACSHA256_SIZE]; - unsigned char *hashptr = prfhash; struct TCP_Server_Info *server = ses->server; + struct hmac_sha256_ctx hmac_ctx; memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); memset(key, 0x0, key_size); @@ -345,67 +346,26 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, rc = smb3_crypto_shash_allocate(server); if (rc) { cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__); - goto smb3signkey_ret; + return rc; } - rc = crypto_shash_setkey(server->secmech.hmacsha256->tfm, - ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not set with session key\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_init(server->secmech.hmacsha256); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not init sign hmac\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, i, 4); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with n\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, label.iov_base, label.iov_len); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with label\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, &zero, 1); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with zero\n", __func__); - goto smb3signkey_ret; - } - - rc = crypto_shash_update(server->secmech.hmacsha256, context.iov_base, context.iov_len); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with context\n", __func__); - goto smb3signkey_ret; - } + hmac_sha256_init_usingrawkey(&hmac_ctx, ses->auth_key.response, + SMB2_NTLMV2_SESSKEY_SIZE); + hmac_sha256_update(&hmac_ctx, i, 4); + hmac_sha256_update(&hmac_ctx, label.iov_base, label.iov_len); + hmac_sha256_update(&hmac_ctx, &zero, 1); + hmac_sha256_update(&hmac_ctx, context.iov_base, context.iov_len); if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) { - rc = crypto_shash_update(server->secmech.hmacsha256, L256, 4); + hmac_sha256_update(&hmac_ctx, L256, 4); } else { - rc = crypto_shash_update(server->secmech.hmacsha256, L128, 4); - } - if (rc) { - cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__); - goto smb3signkey_ret; + hmac_sha256_update(&hmac_ctx, L128, 4); } + hmac_sha256_final(&hmac_ctx, prfhash); - rc = crypto_shash_final(server->secmech.hmacsha256, hashptr); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__); - goto smb3signkey_ret; - } - - memcpy(key, hashptr, key_size); - -smb3signkey_ret: - return rc; + memcpy(key, prfhash, key_size); + return 0; } struct derivation { From e05b3115e75381369be84abe5d46565ce0fcedc8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:33 -0700 Subject: [PATCH 258/305] smb: client: Use HMAC-SHA256 library for SMB2 signature calculation Convert smb2_calc_signature() to use the HMAC-SHA256 library instead of a "hmac(sha256)" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the HMAC-SHA256 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. To make this possible, make __cifs_calc_signature() support both the HMAC-SHA256 library and crypto_shash. (crypto_shash is still needed for HMAC-MD5 and AES-CMAC. A later commit will switch HMAC-MD5 from shash to the library. I'd like to eventually do the same for AES-CMAC, but it doesn't have a library API yet. So for now, shash is still needed.) Also remove the unnecessary 'sigptr' variable. For now smb3_crypto_shash_allocate() still allocates a "hmac(sha256)" crypto_shash. It will be removed in a later commit. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 52 +++++++++++++++++++++++----------- fs/smb/client/cifsproto.h | 9 ++++-- fs/smb/client/smb2transport.c | 53 ++++++++--------------------------- 3 files changed, 53 insertions(+), 61 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 7b7c8c38fdd0..9522088a1cfb 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -24,14 +24,34 @@ #include #include #include +#include -static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len, - void *priv, void *priv2) +static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx, + const u8 *data, size_t len) { - struct shash_desc *shash = priv; + if (ctx->hmac) { + hmac_sha256_update(ctx->hmac, data, len); + return 0; + } + return crypto_shash_update(ctx->shash, data, len); +} + +static int cifs_sig_final(struct cifs_calc_sig_ctx *ctx, u8 *out) +{ + if (ctx->hmac) { + hmac_sha256_final(ctx->hmac, out); + return 0; + } + return crypto_shash_final(ctx->shash, out); +} + +static size_t cifs_sig_step(void *iter_base, size_t progress, size_t len, + void *priv, void *priv2) +{ + struct cifs_calc_sig_ctx *ctx = priv; int ret, *pret = priv2; - ret = crypto_shash_update(shash, iter_base, len); + ret = cifs_sig_update(ctx, iter_base, len); if (ret < 0) { *pret = ret; return len; @@ -42,21 +62,20 @@ static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len, /* * Pass the data from an iterator into a hash. */ -static int cifs_shash_iter(const struct iov_iter *iter, size_t maxsize, - struct shash_desc *shash) +static int cifs_sig_iter(const struct iov_iter *iter, size_t maxsize, + struct cifs_calc_sig_ctx *ctx) { struct iov_iter tmp_iter = *iter; int err = -EIO; - if (iterate_and_advance_kernel(&tmp_iter, maxsize, shash, &err, - cifs_shash_step) != maxsize) + if (iterate_and_advance_kernel(&tmp_iter, maxsize, ctx, &err, + cifs_sig_step) != maxsize) return err; return 0; } -int __cifs_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, char *signature, - struct shash_desc *shash) +int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + char *signature, struct cifs_calc_sig_ctx *ctx) { int i; ssize_t rc; @@ -82,8 +101,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst, return -EIO; } - rc = crypto_shash_update(shash, - iov[i].iov_base, iov[i].iov_len); + rc = cifs_sig_update(ctx, iov[i].iov_base, iov[i].iov_len); if (rc) { cifs_dbg(VFS, "%s: Could not update with payload\n", __func__); @@ -91,11 +109,11 @@ int __cifs_calc_signature(struct smb_rqst *rqst, } } - rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash); + rc = cifs_sig_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), ctx); if (rc < 0) return rc; - rc = crypto_shash_final(shash, signature); + rc = cifs_sig_final(ctx, signature); if (rc) cifs_dbg(VFS, "%s: Could not generate hash\n", __func__); @@ -134,7 +152,9 @@ static int cifs_calc_signature(struct smb_rqst *rqst, return rc; } - return __cifs_calc_signature(rqst, server, signature, server->secmech.md5); + return __cifs_calc_signature( + rqst, server, signature, + &(struct cifs_calc_sig_ctx){ .shash = server->secmech.md5 }); } /* must be called with server->srv_mutex held */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index e8fba98690ce..3bb74eea0e4f 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -632,9 +632,12 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const unsigned char *path, char *pbuf, unsigned int *pbytes_written); -int __cifs_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, char *signature, - struct shash_desc *shash); +struct cifs_calc_sig_ctx { + struct hmac_sha256_ctx *hmac; + struct shash_desc *shash; +}; +int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, + char *signature, struct cifs_calc_sig_ctx *ctx); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index bde96eace8c9..89258accc220 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -254,10 +254,9 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, { int rc; unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; - unsigned char *sigptr = smb2_signature; struct kvec *iov = rqst->rq_iov; struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; - struct shash_desc *shash = NULL; + struct hmac_sha256_ctx hmac_ctx; struct smb_rqst drqst; __u64 sid = le64_to_cpu(shdr->SessionId); u8 key[SMB2_NTLMV2_SESSKEY_SIZE]; @@ -272,30 +271,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); - if (allocate_crypto) { - rc = cifs_alloc_hash("hmac(sha256)", &shash); - if (rc) { - cifs_server_dbg(VFS, - "%s: sha256 alloc failed\n", __func__); - goto out; - } - } else { - shash = server->secmech.hmacsha256; - } - - rc = crypto_shash_setkey(shash->tfm, key, sizeof(key)); - if (rc) { - cifs_server_dbg(VFS, - "%s: Could not update with response\n", - __func__); - goto out; - } - - rc = crypto_shash_init(shash); - if (rc) { - cifs_server_dbg(VFS, "%s: Could not init sha256", __func__); - goto out; - } + hmac_sha256_init_usingrawkey(&hmac_ctx, key, sizeof(key)); /* * For SMB2+, __cifs_calc_signature() expects to sign only the actual @@ -306,25 +282,17 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, */ drqst = *rqst; if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { - rc = crypto_shash_update(shash, iov[0].iov_base, - iov[0].iov_len); - if (rc) { - cifs_server_dbg(VFS, - "%s: Could not update with payload\n", - __func__); - goto out; - } + hmac_sha256_update(&hmac_ctx, iov[0].iov_base, iov[0].iov_len); drqst.rq_iov++; drqst.rq_nvec--; } - rc = __cifs_calc_signature(&drqst, server, sigptr, shash); + rc = __cifs_calc_signature( + &drqst, server, smb2_signature, + &(struct cifs_calc_sig_ctx){ .hmac = &hmac_ctx }); if (!rc) - memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); + memcpy(shdr->Signature, smb2_signature, SMB2_SIGNATURE_SIZE); -out: - if (allocate_crypto) - cifs_free_hash(&shash); return rc; } @@ -542,7 +510,6 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, { int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; - unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; struct shash_desc *shash = NULL; @@ -603,9 +570,11 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, drqst.rq_nvec--; } - rc = __cifs_calc_signature(&drqst, server, sigptr, shash); + rc = __cifs_calc_signature( + &drqst, server, smb3_signature, + &(struct cifs_calc_sig_ctx){ .shash = shash }); if (!rc) - memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); + memcpy(shdr->Signature, smb3_signature, SMB2_SIGNATURE_SIZE); out: if (allocate_crypto) From ae04b1bb06f8c1738d01dc2f9b9391c4480544e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:34 -0700 Subject: [PATCH 259/305] smb: client: Use MD5 library for M-F symlink hashing Convert parse_mf_symlink() and format_mf_symlink() to use the MD5 library instead of a "md5" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the MD5 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. This also fixes an issue where these functions did not work on kernels booted in FIPS mode. The use of MD5 here is for data integrity rather than a security purpose, so it can use a non-FIPS-approved algorithm. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/Kconfig | 1 + fs/smb/client/link.c | 31 +++---------------------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index f0c1ff8544f6..f5a980bdfc93 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -16,6 +16,7 @@ config CIFS select CRYPTO_ECB select CRYPTO_AES select CRYPTO_LIB_ARC4 + select CRYPTO_LIB_MD5 select CRYPTO_LIB_SHA256 select CRYPTO_LIB_SHA512 select KEYS diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index fe80e711cd75..70f3c0c67eeb 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -5,6 +5,7 @@ * Author(s): Steve French (sfrench@us.ibm.com) * */ +#include #include #include #include @@ -36,23 +37,6 @@ #define CIFS_MF_SYMLINK_MD5_FORMAT "%16phN\n" #define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) md5_hash -static int -symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) -{ - int rc; - struct shash_desc *md5 = NULL; - - rc = cifs_alloc_hash("md5", &md5); - if (rc) - return rc; - - rc = crypto_shash_digest(md5, link_str, link_len, md5_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); - cifs_free_hash(&md5); - return rc; -} - static int parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, char **_link_str) @@ -77,11 +61,7 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) return -EINVAL; - rc = symlink_hash(link_len, link_str, md5_hash); - if (rc) { - cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); - return rc; - } + md5(link_str, link_len, md5_hash); scnprintf(md5_str2, sizeof(md5_str2), CIFS_MF_SYMLINK_MD5_FORMAT, @@ -103,7 +83,6 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, static int format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str) { - int rc; unsigned int link_len; unsigned int ofs; u8 md5_hash[16]; @@ -116,11 +95,7 @@ format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str) if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) return -ENAMETOOLONG; - rc = symlink_hash(link_len, link_str, md5_hash); - if (rc) { - cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); - return rc; - } + md5(link_str, link_len, md5_hash); scnprintf(buf, buf_len, CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, From c04e55b257b42f5eb5a2c5e92ebd043fd75fe3ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:35 -0700 Subject: [PATCH 260/305] smb: client: Use MD5 library for SMB1 signature calculation Convert cifs_calc_signature() to use the MD5 library instead of a "md5" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the MD5 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. To preserve the existing behavior of MD5 signature support being disabled when the kernel is booted with "fips=1", make cifs_calc_signature() check fips_enabled itself. Previously it relied on the error from cifs_alloc_hash("md5", &server->secmech.md5). Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 34 +++++++++++++++++----------------- fs/smb/client/cifsproto.h | 1 + 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 9522088a1cfb..80215ba7a574 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -24,11 +24,16 @@ #include #include #include +#include #include static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx, const u8 *data, size_t len) { + if (ctx->md5) { + md5_update(ctx->md5, data, len); + return 0; + } if (ctx->hmac) { hmac_sha256_update(ctx->hmac, data, len); return 0; @@ -38,6 +43,10 @@ static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx, static int cifs_sig_final(struct cifs_calc_sig_ctx *ctx, u8 *out) { + if (ctx->md5) { + md5_final(ctx->md5, out); + return 0; + } if (ctx->hmac) { hmac_sha256_final(ctx->hmac, out); return 0; @@ -130,31 +139,22 @@ int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, static int cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, char *signature) { - int rc; + struct md5_ctx ctx; if (!rqst->rq_iov || !signature || !server) return -EINVAL; - - rc = cifs_alloc_hash("md5", &server->secmech.md5); - if (rc) - return -1; - - rc = crypto_shash_init(server->secmech.md5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init md5\n", __func__); - return rc; + if (fips_enabled) { + cifs_dbg(VFS, + "MD5 signature support is disabled due to FIPS\n"); + return -EOPNOTSUPP; } - rc = crypto_shash_update(server->secmech.md5, - server->session_key.response, server->session_key.len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response\n", __func__); - return rc; - } + md5_init(&ctx); + md5_update(&ctx, server->session_key.response, server->session_key.len); return __cifs_calc_signature( rqst, server, signature, - &(struct cifs_calc_sig_ctx){ .shash = server->secmech.md5 }); + &(struct cifs_calc_sig_ctx){ .md5 = &ctx }); } /* must be called with server->srv_mutex held */ diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 3bb74eea0e4f..4976be2c47c1 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -633,6 +633,7 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, const unsigned char *path, char *pbuf, unsigned int *pbytes_written); struct cifs_calc_sig_ctx { + struct md5_ctx *md5; struct hmac_sha256_ctx *hmac; struct shash_desc *shash; }; From 395a77b030a878a353465386e8618b5272a480ca Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:36 -0700 Subject: [PATCH 261/305] smb: client: Use HMAC-MD5 library for NTLMv2 For the HMAC-MD5 computations in NTLMv2, use the HMAC-MD5 library instead of a "hmac(md5)" crypto_shash. This is simpler and faster. With the library there's no need to allocate memory, no need to handle errors, and the HMAC-MD5 code is accessed directly without inefficient indirect calls and other unnecessary API overhead. To preserve the existing behavior of NTLMv2 support being disabled when the kernel is booted with "fips=1", make setup_ntlmv2_rsp() check fips_enabled itself. Previously it relied on the error from cifs_alloc_hash("hmac(md5)", &hmacmd5). Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 114 +++++++----------------------------- 1 file changed, 22 insertions(+), 92 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 80215ba7a574..bbcf3b05c19a 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -425,11 +425,11 @@ static __le64 find_timestamp(struct cifs_ses *ses) } static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, - const struct nls_table *nls_cp, struct shash_desc *hmacmd5) + const struct nls_table *nls_cp) { - int rc = 0; int len; char nt_hash[CIFS_NTHASH_SIZE]; + struct hmac_md5_ctx hmac_ctx; __le16 *user; wchar_t *domain; wchar_t *server; @@ -437,17 +437,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, /* calculate md4 hash of password */ E_md4hash(ses->password, nt_hash, nls_cp); - rc = crypto_shash_setkey(hmacmd5->tfm, nt_hash, CIFS_NTHASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NT hash as a key, rc=%d\n", __func__, rc); - return rc; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - return rc; - } + hmac_md5_init_usingrawkey(&hmac_ctx, nt_hash, CIFS_NTHASH_SIZE); /* convert ses->user_name to unicode */ len = ses->user_name ? strlen(ses->user_name) : 0; @@ -462,12 +452,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, *(u16 *)user = 0; } - rc = crypto_shash_update(hmacmd5, (char *)user, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)user, 2 * len); kfree(user); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with user, rc=%d\n", __func__, rc); - return rc; - } /* convert ses->domainName to unicode and uppercase */ if (ses->domainName) { @@ -479,12 +465,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len, nls_cp); - rc = crypto_shash_update(hmacmd5, (char *)domain, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)domain, 2 * len); kfree(domain); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with domain, rc=%d\n", __func__, rc); - return rc; - } } else { /* We use ses->ip_addr if no domain name available */ len = strlen(ses->ip_addr); @@ -494,25 +476,16 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, return -ENOMEM; len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len, nls_cp); - rc = crypto_shash_update(hmacmd5, (char *)server, 2 * len); + hmac_md5_update(&hmac_ctx, (const u8 *)server, 2 * len); kfree(server); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with server, rc=%d\n", __func__, rc); - return rc; - } } - rc = crypto_shash_final(hmacmd5, ntlmv2_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); - - return rc; + hmac_md5_final(&hmac_ctx, ntlmv2_hash); + return 0; } -static int -CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_desc *hmacmd5) +static void CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) { - int rc; struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); unsigned int hash_len; @@ -521,35 +494,15 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_ hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE + offsetof(struct ntlmv2_resp, challenge.key[0])); - rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc); - return rc; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - return rc; - } - if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) memcpy(ntlmv2->challenge.key, ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); else memcpy(ntlmv2->challenge.key, ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); - rc = crypto_shash_update(hmacmd5, ntlmv2->challenge.key, hash_len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc); - return rc; - } - - /* Note that the MD5 digest over writes anon.challenge_key.key */ - rc = crypto_shash_final(hmacmd5, ntlmv2->ntlmv2_hash); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); - - return rc; + /* Note that the HMAC-MD5 value overwrites ntlmv2->challenge.key */ + hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ntlmv2->challenge.key, hash_len, + ntlmv2->ntlmv2_hash); } /* @@ -606,7 +559,6 @@ static int set_auth_key_response(struct cifs_ses *ses) int setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) { - struct shash_desc *hmacmd5 = NULL; unsigned char *tiblob = NULL; /* target info blob */ struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; @@ -677,51 +629,29 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ntlmv2->client_chal = cc; ntlmv2->reserved2 = 0; - rc = cifs_alloc_hash("hmac(md5)", &hmacmd5); - if (rc) { - cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc); + if (fips_enabled) { + cifs_dbg(VFS, "NTLMv2 support is disabled due to FIPS\n"); + rc = -EOPNOTSUPP; goto unlock; } /* calculate ntlmv2_hash */ - rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp, hmacmd5); + rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp); if (rc) { cifs_dbg(VFS, "Could not get NTLMv2 hash, rc=%d\n", rc); goto unlock; } /* calculate first part of the client response (CR1) */ - rc = CalcNTLMv2_response(ses, ntlmv2_hash, hmacmd5); - if (rc) { - cifs_dbg(VFS, "Could not calculate CR1, rc=%d\n", rc); - goto unlock; - } + CalcNTLMv2_response(ses, ntlmv2_hash); /* now calculate the session key for NTLMv2 */ - rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_init(hmacmd5); - if (rc) { - cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_update(hmacmd5, ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc); - goto unlock; - } - - rc = crypto_shash_final(hmacmd5, ses->auth_key.response); - if (rc) - cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc); + hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE, + ses->auth_key.response); + rc = 0; unlock: cifs_server_unlock(ses->server); - cifs_free_hash(&hmacmd5); setup_ntlmv2_rsp_ret: kfree_sensitive(tiblob); From 2c09630d09c64b6b46e3d59a0031bc1807f742c4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:37 -0700 Subject: [PATCH 262/305] smb: client: Remove obsolete crypto_shash allocations Now that the SMB client accesses MD5, HMAC-MD5, HMAC-SHA256, and SHA-512 only via the library API and not via crypto_shash, allocating crypto_shash objects for these algorithms is no longer necessary. Remove all these allocations, their corresponding kconfig selections, and their corresponding module soft dependencies. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/Kconfig | 4 ---- fs/smb/client/cifsencrypt.c | 3 --- fs/smb/client/cifsfs.c | 4 ---- fs/smb/client/cifsglob.h | 3 --- fs/smb/client/smb2transport.c | 35 ++--------------------------------- 5 files changed, 2 insertions(+), 47 deletions(-) diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index f5a980bdfc93..17bd368574e9 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -5,11 +5,7 @@ config CIFS select NLS select NLS_UCS2_UTILS select CRYPTO - select CRYPTO_MD5 - select CRYPTO_SHA256 - select CRYPTO_SHA512 select CRYPTO_CMAC - select CRYPTO_HMAC select CRYPTO_AEAD2 select CRYPTO_CCM select CRYPTO_GCM diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index bbcf3b05c19a..801824825ecf 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -693,9 +693,6 @@ void cifs_crypto_secmech_release(struct TCP_Server_Info *server) { cifs_free_hash(&server->secmech.aes_cmac); - cifs_free_hash(&server->secmech.hmacsha256); - cifs_free_hash(&server->secmech.md5); - cifs_free_hash(&server->secmech.sha512); if (server->secmech.enc) { crypto_free_aead(server->secmech.enc); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 05b1fa76e8cc..4f959f1e08d2 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -2139,13 +2139,9 @@ MODULE_DESCRIPTION "also older servers complying with the SNIA CIFS Specification)"); MODULE_VERSION(CIFS_VERSION); MODULE_SOFTDEP("ecb"); -MODULE_SOFTDEP("hmac"); -MODULE_SOFTDEP("md5"); MODULE_SOFTDEP("nls"); MODULE_SOFTDEP("aes"); MODULE_SOFTDEP("cmac"); -MODULE_SOFTDEP("sha256"); -MODULE_SOFTDEP("sha512"); MODULE_SOFTDEP("aead2"); MODULE_SOFTDEP("ccm"); MODULE_SOFTDEP("gcm"); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index c5034cf9ac9e..16a00a61fd2c 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -222,9 +222,6 @@ struct session_key { /* crypto hashing related structure/fields, not specific to a sec mech */ struct cifs_secmech { - struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */ - struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */ - struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */ struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */ diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 89258accc220..cd689bc27bfd 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -31,49 +31,18 @@ static int smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; - int rc; - rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256); - if (rc) - goto err; - - rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac); - if (rc) - goto err; - - return 0; -err: - cifs_free_hash(&p->hmacsha256); - return rc; + return cifs_alloc_hash("cmac(aes)", &p->aes_cmac); } int smb311_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; - int rc = 0; - rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256); - if (rc) - return rc; - - rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac); - if (rc) - goto err; - - rc = cifs_alloc_hash("sha512", &p->sha512); - if (rc) - goto err; - - return 0; - -err: - cifs_free_hash(&p->aes_cmac); - cifs_free_hash(&p->hmacsha256); - return rc; + return cifs_alloc_hash("cmac(aes)", &p->aes_cmac); } - static int smb3_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { From 3c15a6df61bab034b087f00181408b1537a535bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 11 Oct 2025 18:57:38 -0700 Subject: [PATCH 263/305] smb: client: Consolidate cmac(aes) shash allocation Now that smb3_crypto_shash_allocate() and smb311_crypto_shash_allocate() are identical and only allocate "cmac(aes)", delete the latter and replace the call to it with the former. Reviewed-by: Stefan Metzmacher Acked-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Steve French --- fs/smb/client/sess.c | 2 +- fs/smb/client/smb2proto.h | 2 +- fs/smb/client/smb2transport.c | 10 +--------- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 0a8c2fcc9ded..ef3b498b0a02 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -584,7 +584,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, * to sign packets before we generate the channel signing key * (we sign with the session key) */ - rc = smb311_crypto_shash_allocate(chan->server); + rc = smb3_crypto_shash_allocate(chan->server); if (rc) { cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); mutex_unlock(&ses->session_mutex); diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index e7cda885c39f..6eb86d134abc 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -295,7 +295,7 @@ extern int smb2_validate_and_copy_iov(unsigned int offset, extern void smb2_copy_fs_info_to_kstatfs( struct smb2_fs_full_size_info *pfs_inf, struct kstatfs *kst); -extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server); +extern int smb3_crypto_shash_allocate(struct TCP_Server_Info *server); extern void smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, struct kvec *iov, int nvec); diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index cd689bc27bfd..ad6068e17a2a 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -27,16 +27,8 @@ #include "../common/smb2status.h" #include "smb2glob.h" -static int -smb3_crypto_shash_allocate(struct TCP_Server_Info *server) -{ - struct cifs_secmech *p = &server->secmech; - - return cifs_alloc_hash("cmac(aes)", &p->aes_cmac); -} - int -smb311_crypto_shash_allocate(struct TCP_Server_Info *server) +smb3_crypto_shash_allocate(struct TCP_Server_Info *server) { struct cifs_secmech *p = &server->secmech; From 7987b93e3a11a7a95ddf2b21563d3286661b999c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 10 Oct 2025 12:41:48 +0200 Subject: [PATCH 264/305] drm/xe/svm: Ensure data will be migrated to system if indicated by madvise. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the location madvise() is set to DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM, the drm_pagemap in the SVM gpu fault handler will be set to NULL. However there is nothing that explicitly migrates the data to system if it is already present in device memory. In that case, set the device memory owner to NULL to ensure data gets properly migrated to system on page-fault. v2: - Remove redundant dpagemap assignment (Himal Prasad Ghimiray) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v1 Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251010104149.72783-2-thomas.hellstrom@linux.intel.com Fixes: 10aa5c806030 ("drm/gpusvm, drm/xe: Fix userptr to not allow device private pages") (cherry picked from commit 2cfcea7a745794f9b8e265a309717ca6ba335fc4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index b268ee0d2271..da2a412f80c0 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1034,6 +1034,9 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, if (err) return err; + dpagemap = xe_vma_resolve_pagemap(vma, tile); + if (!dpagemap && !ctx.devmem_only) + ctx.device_private_page_owner = NULL; range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); if (IS_ERR(range)) @@ -1054,7 +1057,6 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, range_debug(range, "PAGE FAULT"); - dpagemap = xe_vma_resolve_pagemap(vma, tile); if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) { ktime_t migrate_start = xe_svm_stats_ktime_get(); From 6d36f65ba551d28710c3e1aaceecacf19df0cd8f Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 13 Oct 2025 08:30:15 -0700 Subject: [PATCH 265/305] drm/xe/kunit: Fix kerneldoc for parameterized tests Kunit's generate_params() was recently updated to take an additional test context parameter. Xe's IP and platform parameter generators were updated accordingly at the same time, but the new parameter was not added to the functions' kerneldoc, resulting in the following warnings: Warning: drivers/gpu/drm/xe/tests/xe_pci.c:78 function parameter 'test' not described in 'xe_pci_fake_data_gen_params' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:254 function parameter 'test' not described in 'xe_pci_graphics_ip_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:278 function parameter 'test' not described in 'xe_pci_media_ip_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:302 function parameter 'test' not described in 'xe_pci_id_gen_param' Warning: drivers/gpu/drm/xe/tests/xe_pci.c:390 function parameter 'test' not described in 'xe_pci_live_device_gen_param' 5 warnings as errors Document the new parameter to eliminate the warnings and make CI happy. Fixes: b9a214b5f6aa ("kunit: Pass parameterized test context to generate_params()") Reviewed-by: Shuicheng Lin Link: https://lore.kernel.org/r/20251013153014.2362879-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit 89e347f8a70165d1e8d88a93d875da7742c902ce) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 69e2840c7ef0..663a79ec960d 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -66,6 +66,7 @@ KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc); /** * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -242,6 +243,7 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); /** * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -266,6 +268,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param); /** * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -290,6 +293,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); /** * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @test: test context object * @prev: the pointer to the previous parameter to iterate from or NULL * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE * @@ -376,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init); /** * xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters + * @test: test context object * @prev: the previously returned value, or NULL for the first iteration * @desc: the buffer for a parameter name * From 6a91af25cdbce2086d85cc4994cf791bda3a2c90 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 10 Oct 2025 17:20:21 +0100 Subject: [PATCH 266/305] drm/xe/migrate: don't misalign current bytes If current bytes exceeds the max copy size, ensure the clamped size still accounts for the XE_CACHELINE_BYTES alignment, otherwise we trigger the assert in xe_migrate_vram with the size now being out of alignment. Fixes: 8c2d61e0e916 ("drm/xe/migrate: don't overflow max copy size") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6212 Signed-off-by: Matthew Auld Cc: Stuart Summers Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251010162020.190962-2-matthew.auld@intel.com (cherry picked from commit 641bcf8731d21b56760e3646a39a65f471e9efd1) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 569869a2b339..a36ce7dce8cc 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -2113,7 +2113,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (current_bytes & ~PAGE_MASK) { int pitch = 4; - current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + current_bytes = min_t(int, current_bytes, + round_down(S16_MAX * pitch, + XE_CACHELINE_BYTES)); } __fence = xe_migrate_vram(m, current_bytes, From 225bc03d85427e7e3821d6f99f4f2d4a09350dda Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 10 Oct 2025 16:24:58 +0100 Subject: [PATCH 267/305] drm/xe/evict: drop bogus assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This assert can trigger here with non pin_map users that select LATE_RESTORE, since the vmap is allowed to be NULL given that save/restore can now use the blitter instead. The check here doesn't seem to have much value anymore given that we no longer move pinned memory, so any existing vmap is left well alone, and doesn't need to be recreated upon restore, so just drop the assert here. Fixes: 86f69c26113c ("drm/xe: use backup object for pinned save/restore") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6213 Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20251010152457.177884-2-matthew.auld@intel.com (cherry picked from commit a10b4a69c7f8f596d2c5218fbe84430734fab3b2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo_evict.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index d5dbc51e8612..bc5b4c5fab81 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -182,7 +182,6 @@ int xe_bo_evict_all(struct xe_device *xe) static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) { - struct xe_device *xe = xe_bo_device(bo); int ret; ret = xe_bo_restore_pinned(bo); @@ -201,13 +200,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo) } } - /* - * We expect validate to trigger a move VRAM and our move code - * should setup the iosys map. - */ - xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) || - !iosys_map_is_null(&bo->vmap)); - return 0; } From 5801e65206b065b0b2af032f7f1eef222aa2fd83 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 15 Oct 2025 09:40:15 +0100 Subject: [PATCH 268/305] drm/sched: Fix potential double free in drm_sched_job_add_resv_dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding dependencies with drm_sched_job_add_dependency(), that function consumes the fence reference both on success and failure, so in the latter case the dma_fence_put() on the error path (xarray failed to expand) is a double free. Interestingly this bug appears to have been present ever since commit ebd5f74255b9 ("drm/sched: Add dependency tracking"), since the code back then looked like this: drm_sched_job_add_implicit_dependencies(): ... for (i = 0; i < fence_count; i++) { ret = drm_sched_job_add_dependency(job, fences[i]); if (ret) break; } for (; i < fence_count; i++) dma_fence_put(fences[i]); Which means for the failing 'i' the dma_fence_put was already a double free. Possibly there were no users at that time, or the test cases were insufficient to hit it. The bug was then only noticed and fixed after commit 9c2ba265352a ("drm/scheduler: use new iterator in drm_sched_job_add_implicit_dependencies v2") landed, with its fixup of commit 4eaf02d6076c ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies"). At that point it was a slightly different flavour of a double free, which commit 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder") noticed and attempted to fix. But it only moved the double free from happening inside the drm_sched_job_add_dependency(), when releasing the reference not yet obtained, to the caller, when releasing the reference already released by the former in the failure case. As such it is not easy to identify the right target for the fixes tag so lets keep it simple and just continue the chain. While fixing we also improve the comment and explain the reason for taking the reference and not dropping it. Signed-off-by: Tvrtko Ursulin Fixes: 963d0b356935 ("drm/scheduler: fix drm_sched_job_add_implicit_dependencies harder") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/dri-devel/aNFbXq8OeYl3QSdm@stanley.mountain/ Cc: Christian König Cc: Rob Clark Cc: Daniel Vetter Cc: Matthew Brost Cc: Danilo Krummrich Cc: Philipp Stanner Cc: Christian König Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v5.16+ Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20251015084015.6273-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/scheduler/sched_main.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 46119aacb809..c39f0245e3a9 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -965,13 +965,14 @@ int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job, dma_resv_assert_held(resv); dma_resv_for_each_fence(&cursor, resv, usage, fence) { - /* Make sure to grab an additional ref on the added fence */ - dma_fence_get(fence); - ret = drm_sched_job_add_dependency(job, fence); - if (ret) { - dma_fence_put(fence); + /* + * As drm_sched_job_add_dependency always consumes the fence + * reference (even when it fails), and dma_resv_for_each_fence + * is not obtaining one, we need to grab one before calling. + */ + ret = drm_sched_job_add_dependency(job, dma_fence_get(fence)); + if (ret) return ret; - } } return 0; } From 86f54f9b6c17d6567c69e3a6fed52fdf5d7dbe93 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Wed, 15 Oct 2025 22:16:42 +0800 Subject: [PATCH 269/305] slab: reset slab->obj_ext when freeing and it is OBJEXTS_ALLOC_FAIL If obj_exts allocation failed, slab->obj_exts is set to OBJEXTS_ALLOC_FAIL, But we do not clear it when freeing the slab. Since OBJEXTS_ALLOC_FAIL and MEMCG_DATA_OBJEXTS currently share the same bit position, during the release of the associated folio, a VM_BUG_ON_FOLIO() check in folio_memcg_kmem() is triggered because the OBJEXTS_ALLOC_FAIL flag was not cleared, causing it to be interpreted as a kmem folio (non-slab) with MEMCG_OBJEXTS_DATA flag set, which is invalid because MEMCG_OBJEXTS_DATA is supposed to be set only on slabs. Another problem that predates sharing the OBJEXTS_ALLOC_FAIL and MEMCG_DATA_OBJEXTS bits is that on configurations with is_check_pages_enabled(), the non-cleared bit in page->memcg_data will trigger a free_page_is_bad() failure "page still charged to cgroup" When freeing a slab, we clear slab->obj_exts if the obj_ext array has been successfully allocated. So let's clear it also when the allocation has failed. Fixes: 09c46563ff6d ("codetag: debug: introduce OBJEXTS_ALLOC_FAIL to mark failed slab_ext allocations") Fixes: 7612833192d5 ("slab: Reuse first bit for OBJEXTS_ALLOC_FAIL") Link: https://lore.kernel.org/all/20251015141642.700170-1-hao.ge@linux.dev/ Cc: Signed-off-by: Hao Ge Reviewed-by: Suren Baghdasaryan Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- mm/slub.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 13ae4491136a..a8fcc7e6f25a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2170,8 +2170,15 @@ static inline void free_slab_obj_exts(struct slab *slab) struct slabobj_ext *obj_exts; obj_exts = slab_obj_exts(slab); - if (!obj_exts) + if (!obj_exts) { + /* + * If obj_exts allocation failed, slab->obj_exts is set to + * OBJEXTS_ALLOC_FAIL. In this case, we end up here and should + * clear the flag. + */ + slab->obj_exts = 0; return; + } /* * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its From 6de1dec1c166c7f7324ce52ccfdf43e2fa743b19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Oct 2025 05:27:15 +0000 Subject: [PATCH 270/305] udp: do not use skb_release_head_state() before skb_attempt_defer_free() Michal reported and bisected an issue after recent adoption of skb_attempt_defer_free() in UDP. The issue here is that skb_release_head_state() is called twice per skb, one time from skb_consume_udp(), then a second time from skb_defer_free_flush() and napi_consume_skb(). As Sabrina suggested, remove skb_release_head_state() call from skb_consume_udp(). Add a DEBUG_NET_WARN_ON_ONCE(skb_nfct(skb)) in skb_attempt_defer_free() Many thanks to Michal, Sabrina, Paolo and Florian for their help. Fixes: 6471658dc66c ("udp: use skb_attempt_defer_free()") Reported-and-bisected-by: Michal Kubecek Closes: https://lore.kernel.org/netdev/gpjh4lrotyephiqpuldtxxizrsg6job7cvhiqrw72saz2ubs3h@g6fgbvexgl3r/ Signed-off-by: Eric Dumazet Tested-by: Michal Kubecek Cc: Sabrina Dubroca Cc: Florian Westphal Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251015052715.4140493-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/core/skbuff.c | 1 + net/ipv4/udp.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bc12790017b0..6be01454f262 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -7200,6 +7200,7 @@ nodefer: kfree_skb_napi_cache(skb); DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); DEBUG_NET_WARN_ON_ONCE(skb->destructor); + DEBUG_NET_WARN_ON_ONCE(skb_nfct(skb)); sdn = per_cpu_ptr(net_hotdata.skb_defer_nodes, cpu) + numa_node_id(); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 95241093b7f0..30dfbf73729d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1851,8 +1851,6 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) sk_peek_offset_bwd(sk, len); if (!skb_shared(skb)) { - if (unlikely(udp_skb_has_head_state(skb))) - skb_release_head_state(skb); skb_attempt_defer_free(skb); return; } From ed80cc4667ac997b84546e6d35f0a0ae525d239c Mon Sep 17 00:00:00 2001 From: Stuart Hayhurst Date: Mon, 6 Oct 2025 02:05:49 +0100 Subject: [PATCH 271/305] HID: logitech-hidpp: Add HIDPP_QUIRK_RESET_HI_RES_SCROLL The Logitech G502 Hero Wireless's high resolution scrolling resets after being unplugged without notifying the driver, causing extremely slow scrolling. The only indication of this is a battery update packet, so add a quirk to detect when the device is unplugged and re-enable the scrolling. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218037 Signed-off-by: Stuart Hayhurst Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index aaef405a717e..5e763de4b94f 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -75,6 +75,7 @@ MODULE_PARM_DESC(disable_tap_to_click, #define HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS BIT(27) #define HIDPP_QUIRK_HI_RES_SCROLL_1P0 BIT(28) #define HIDPP_QUIRK_WIRELESS_STATUS BIT(29) +#define HIDPP_QUIRK_RESET_HI_RES_SCROLL BIT(30) /* These are just aliases for now */ #define HIDPP_QUIRK_KBD_SCROLL_WHEEL HIDPP_QUIRK_HIDPP_WHEELS @@ -193,6 +194,7 @@ struct hidpp_device { void *private_data; struct work_struct work; + struct work_struct reset_hi_res_work; struct kfifo delayed_work_fifo; struct input_dev *delayed_input; @@ -3836,6 +3838,7 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, struct hidpp_report *answer = hidpp->send_receive_buf; struct hidpp_report *report = (struct hidpp_report *)data; int ret; + int last_online; /* * If the mutex is locked then we have a pending answer from a @@ -3877,6 +3880,7 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, "See: https://gitlab.freedesktop.org/jwrdegoede/logitech-27mhz-keyboard-encryption-setup/\n"); } + last_online = hidpp->battery.online; if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) { ret = hidpp20_battery_event_1000(hidpp, data, size); if (ret != 0) @@ -3901,6 +3905,11 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, return ret; } + if (hidpp->quirks & HIDPP_QUIRK_RESET_HI_RES_SCROLL) { + if (last_online == 0 && hidpp->battery.online == 1) + schedule_work(&hidpp->reset_hi_res_work); + } + if (hidpp->quirks & HIDPP_QUIRK_HIDPP_WHEELS) { ret = hidpp10_wheel_raw_event(hidpp, data, size); if (ret != 0) @@ -4274,6 +4283,13 @@ static void hidpp_connect_event(struct work_struct *work) hidpp->delayed_input = input; } +static void hidpp_reset_hi_res_handler(struct work_struct *work) +{ + struct hidpp_device *hidpp = container_of(work, struct hidpp_device, reset_hi_res_work); + + hi_res_scroll_enable(hidpp); +} + static DEVICE_ATTR(builtin_power_supply, 0000, NULL, NULL); static struct attribute *sysfs_attrs[] = { @@ -4404,6 +4420,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id) } INIT_WORK(&hidpp->work, hidpp_connect_event); + INIT_WORK(&hidpp->reset_hi_res_work, hidpp_reset_hi_res_handler); mutex_init(&hidpp->send_mutex); init_waitqueue_head(&hidpp->wait); @@ -4499,6 +4516,7 @@ static void hidpp_remove(struct hid_device *hdev) hid_hw_stop(hdev); cancel_work_sync(&hidpp->work); + cancel_work_sync(&hidpp->reset_hi_res_work); mutex_destroy(&hidpp->send_mutex); } @@ -4546,6 +4564,9 @@ static const struct hid_device_id hidpp_devices[] = { { /* Keyboard MX5500 (Bluetooth-receiver in HID proxy mode) */ LDJ_DEVICE(0xb30b), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, + { /* Logitech G502 Lightspeed Wireless Gaming Mouse */ + LDJ_DEVICE(0x407f), + .driver_data = HIDPP_QUIRK_RESET_HI_RES_SCROLL }, { LDJ_DEVICE(HID_ANY_ID) }, From 0c1999ed33722f85476a248186d6e0eb2bf3dd2a Mon Sep 17 00:00:00 2001 From: Xing Guo Date: Thu, 16 Oct 2025 11:53:30 +0800 Subject: [PATCH 272/305] selftests: arg_parsing: Ensure data is flushed to disk before reading. test_parse_test_list_file writes some data to /tmp/bpf_arg_parsing_test.XXXXXX and parse_test_list_file() will read the data back. However, after writing data to that file, we forget to call fsync() and it's causing testing failure in my laptop. This patch helps fix it by adding the missing fsync() call. Fixes: 64276f01dce8 ("selftests/bpf: Test_progs can read test lists from file") Signed-off-by: Xing Guo Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20251016035330.3217145-1-higuoxing@gmail.com --- tools/testing/selftests/bpf/prog_tests/arg_parsing.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c index fbf0d9c2f58b..e27d66b75fb1 100644 --- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -144,6 +144,9 @@ static void test_parse_test_list_file(void) if (!ASSERT_OK(ferror(fp), "prepare tmp")) goto out_fclose; + if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp")) + goto out_fclose; + init_test_filter_set(&set); if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file")) From f6fddc6df3fc0cffce329b87927db4eb5989728d Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Thu, 16 Oct 2025 12:03:30 +0530 Subject: [PATCH 273/305] bpf: Fix memory leak in __lookup_instance error path When __lookup_instance() allocates a func_instance structure but fails to allocate the must_write_set array, it returns an error without freeing the previously allocated func_instance. This causes a memory leak of 192 bytes (sizeof(struct func_instance)) each time this error path is triggered. Fix by freeing 'result' on must_write_set allocation failure. Fixes: b3698c356ad9 ("bpf: callchain sensitive stack liveness tracking using CFG") Reported-by: BPF Runtime Fuzzer (BRF) Signed-off-by: Shardul Bankar Signed-off-by: Martin KaFai Lau Acked-by: Eduard Zingerman Link: https://patch.msgid.link/20251016063330.4107547-1-shardulsb08@gmail.com --- kernel/bpf/liveness.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c index 3c611aba7f52..1e6538f59a78 100644 --- a/kernel/bpf/liveness.c +++ b/kernel/bpf/liveness.c @@ -195,8 +195,10 @@ static struct func_instance *__lookup_instance(struct bpf_verifier_env *env, return ERR_PTR(-ENOMEM); result->must_write_set = kvcalloc(subprog_sz, sizeof(*result->must_write_set), GFP_KERNEL_ACCOUNT); - if (!result->must_write_set) + if (!result->must_write_set) { + kvfree(result); return ERR_PTR(-ENOMEM); + } memcpy(&result->callchain, callchain, sizeof(*callchain)); result->insn_cnt = subprog_sz; hash_add(liveness->func_instances, &result->hl_node, key); From 5a869d017793399fd1d2609ff27e900534173eb3 Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Fri, 10 Oct 2025 17:19:42 +1000 Subject: [PATCH 274/305] nvme/tcp: handle tls partially sent records in write_space() With TLS enabled, records that are encrypted and appended to TLS TX list can fail to see a retry if the underlying TCP socket is busy, for example, hitting an EAGAIN from tcp_sendmsg_locked(). This is not known to the NVMe TCP driver, as the TLS layer successfully generated a record. Typically, the TLS write_space() callback would ensure such records are retried, but in the NVMe TCP Host driver, write_space() invokes nvme_tcp_write_space(). This causes a partially sent record in the TLS TX list to timeout after not being retried. This patch fixes the above by calling queue->write_space(), which calls into the TLS layer to retry any pending records. Fixes: be8e82caa685 ("nvme-tcp: enable TLS handshake upcall") Signed-off-by: Wilfred Mallawa Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1413788ca7d5..9a96df1a511c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1081,6 +1081,9 @@ static void nvme_tcp_write_space(struct sock *sk) queue = sk->sk_user_data; if (likely(queue && sk_stream_is_writeable(sk))) { clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + /* Ensure pending TLS partial records are retried */ + if (nvme_tcp_queue_tls(queue)) + queue->write_space(sk); queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } read_unlock_bh(&sk->sk_callback_lock); From 4a9cb2eecc78fa9d388481762dd798fa770e1971 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:49 +0200 Subject: [PATCH 275/305] docs: rust: add section on imports formatting `rustfmt`, by default, formats imports in a way that is prone to conflicts while merging and rebasing, since in some cases it condenses several items into the same line. For instance, Linus mentioned [1] that the following case: use crate::{ fmt, page::AsPageIter, }; is compressed by `rustfmt` into: use crate::{fmt, page::AsPageIter}; which is undesirable. Similarly, `rustfmt` may put several items in the same line even if the braces span already multiple lines, e.g.: use kernel::{ acpi, c_str, device::{property, Core}, of, platform, }; The options that control the formatting behavior around imports are generally unstable, and `rustfmt` releases do not allow to use nightly features, unlike the compiler and other Rust tooling [2]. For the moment, we can introduce a workaround to prevent `rustfmt` from compressing the example above -- the "trailing empty comment": use crate::{ fmt, page::AsPageIter, // }; which is reminiscent of the trailing comma behavior in other formatters. We already used empty comments for formatting purposes in the past, e.g. in commit b9b701fce49a ("rust: clarify the language unstable features in use"). In addition, `rustfmt` actually reformats with a vertical layout (i.e. it does not put two items in the same line) when seeing such a comment, i.e. it doesn't just preserve the formatting, which is good in the sense that we can use it to easily reformat some imports, since it matches the style we generally want to have. A Git merge driver would help (suggested by Gary and Wedson), though maintainers would need to set it up, the diffs would still be larger and the formatting rules for imports would remain hard to predict. Thus document the style that we will follow in the coding guidelines by introducing a new section and explain how the trailing empty comment works there too. We discussed the issue with upstream Rust in our usual Rust <-> Rust for Linux meeting [3], and there have also been a few other discussions in parallel in issues [4][5] and Zulip [6]. We will see what happens, but upstream Rust has already created a subteam of `rustfmt` to try to overcome the bandwidth issue [7], which is a good signal, and some organization work has already started (e.g. tracking issues). We will continue our discussions with them about it. Cc: Caleb Cartwright Cc: Yacin Tmimi Cc: Manish Goregaokar Cc: Deadbeef Cc: Cameron Steffen Cc: Jieyou Xu Link: https://lore.kernel.org/all/CAHk-=wgO7S_FZUSBbngG5vtejWOpzDfTTBkVvP3_yjJmFddbzA@mail.gmail.com/ [1] Link: https://github.com/rust-lang/rustfmt/issues/4884 [2] Link: https://hackmd.io/iSCyY3JTTz-g8YM-nnzTTA [3] Link: https://github.com/rust-lang/rustfmt/issues/4991 [4] Link: https://github.com/rust-lang/rustfmt/issues/3361 [5] Link: https://rust-lang.zulipchat.com/#narrow/channel/392734-council/topic/rustfmt.20maintenance/near/543815381 [6] Link: https://github.com/rust-lang/team/pull/2017 [7] Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- Documentation/rust/coding-guidelines.rst | 75 ++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/Documentation/rust/coding-guidelines.rst b/Documentation/rust/coding-guidelines.rst index 6ff9e754755d..3198be3a6d63 100644 --- a/Documentation/rust/coding-guidelines.rst +++ b/Documentation/rust/coding-guidelines.rst @@ -38,6 +38,81 @@ Like ``clang-format`` for the rest of the kernel, ``rustfmt`` works on individual files, and does not require a kernel configuration. Sometimes it may even work with broken code. +Imports +~~~~~~~ + +``rustfmt``, by default, formats imports in a way that is prone to conflicts +while merging and rebasing, since in some cases it condenses several items into +the same line. For instance: + +.. code-block:: rust + + // Do not use this style. + use crate::{ + example1, + example2::{example3, example4, example5}, + example6, example7, + example8::example9, + }; + +Instead, the kernel uses a vertical layout that looks like this: + +.. code-block:: rust + + use crate::{ + example1, + example2::{ + example3, + example4, + example5, // + }, + example6, + example7, + example8::example9, // + }; + +That is, each item goes into its own line, and braces are used as soon as there +is more than one item in a list. + +The trailing empty comment allows to preserve this formatting. Not only that, +``rustfmt`` will actually reformat imports vertically when the empty comment is +added. That is, it is possible to easily reformat the original example into the +expected style by running ``rustfmt`` on an input like: + +.. code-block:: rust + + // Do not use this style. + use crate::{ + example1, + example2::{example3, example4, example5, // + }, + example6, example7, + example8::example9, // + }; + +The trailing empty comment works for nested imports, as shown above, as well as +for single item imports -- this can be useful to minimize diffs within patch +series: + +.. code-block:: rust + + use crate::{ + example1, // + }; + +The trailing empty comment works in any of the lines within the braces, but it +is preferred to keep it in the last item, since it is reminiscent of the +trailing comma in other formatters. Sometimes it may be simpler to avoid moving +the comment several times within a patch series due to changes in the list. + +There may be cases where exceptions may need to be made, i.e. none of this is +a hard rule. There is also code that is not migrated to this style yet, but +please do not introduce code in other styles. + +Eventually, the goal is to get ``rustfmt`` to support this formatting style (or +a similar one) automatically in a stable release without requiring the trailing +empty comment. Thus, at some point, the goal is to remove those comments. + Comments -------- From 8a7c601e14576a22c2bbf7f67455ccf3f3d2737f Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:50 +0200 Subject: [PATCH 276/305] rust: alloc: employ a trailing comment to keep vertical layout Apply the formatting guidelines introduced in the previous commit to make the file `rustfmt`-clean again. Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/kvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index e94aebd084c8..ac8d6f763ae8 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -9,7 +9,7 @@ }; use crate::{ fmt, - page::AsPageIter, + page::AsPageIter, // }; use core::{ borrow::{Borrow, BorrowMut}, From 32f072d9eaf9c31c2b0527a4a3370570a731e3cc Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:51 +0200 Subject: [PATCH 277/305] rust: cpufreq: fix formatting We do our best to keep the repository `rustfmt`-clean, thus run the tool to fix the formatting issue. Link: https://docs.kernel.org/rust/coding-guidelines.html#style-formatting Link: https://rust-for-linux.com/contributing#submit-checklist-addendum Fixes: f97aef092e19 ("cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency") Acked-by: Viresh Kumar Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- rust/kernel/cpufreq.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 21b5b9b8acc1..1a555fcb120a 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -38,8 +38,7 @@ const CPUFREQ_NAME_LEN: usize = bindings::CPUFREQ_NAME_LEN as usize; /// Default transition latency value in nanoseconds. -pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = - bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; +pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; /// CPU frequency driver flags. pub mod flags { From 1f1d3e1d094db732d22b892227bf1e1ac3a8ca04 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 17 Oct 2025 00:57:54 +0200 Subject: [PATCH 278/305] rust: bitmap: fix formatting We do our best to keep the repository `rustfmt`-clean, thus run the tool to fix the formatting issue. Link: https://docs.kernel.org/rust/coding-guidelines.html#style-formatting Link: https://rust-for-linux.com/contributing#submit-checklist-addendum Fixes: 0f5878834d6c ("rust: bitmap: clean Rust 1.92.0 `unused_unsafe` warning") Reviewed-by: Burak Emir Signed-off-by: Miguel Ojeda --- rust/kernel/bitmap.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/kernel/bitmap.rs b/rust/kernel/bitmap.rs index 711b8368b38f..aa8fc7bf06fc 100644 --- a/rust/kernel/bitmap.rs +++ b/rust/kernel/bitmap.rs @@ -167,7 +167,9 @@ fn deref(&self) -> &Bitmap { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] - unsafe { core::ptr::addr_of!(self.repr.bitmap) } + unsafe { + core::ptr::addr_of!(self.repr.bitmap) + } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. unsafe { self.repr.ptr.as_ptr() } @@ -184,7 +186,9 @@ fn deref_mut(&mut self) -> &mut Bitmap { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] - unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) } + unsafe { + core::ptr::addr_of_mut!(self.repr.bitmap) + } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. unsafe { self.repr.ptr.as_ptr() } From e433110eb5bf067f74d3d15c5fb252206c66ae0b Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Tue, 14 Oct 2025 09:46:07 +0800 Subject: [PATCH 279/305] PCI: vmd: Override irq_startup()/irq_shutdown() in vmd_init_dev_msi_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 54f45a30c0d0 ("PCI/MSI: Add startup/shutdown for per device domains") set callback irq_startup() and irq_shutdown() of the struct pci_msi[x]_template, __irq_startup() will always invokes irq_startup() callback instead of irq_enable() callback overridden in vmd_init_dev_msi_info(). This will not start the IRQ correctly. Also override irq_startup()/irq_shutdown() in vmd_init_dev_msi_info(), so the irq_startup() can invoke the real logic. Fixes: 54f45a30c0d0 ("PCI/MSI: Add startup/shutdown for per device domains") Reported-by: Kenneth Crudup Closes: https://lore.kernel.org/r/8a923590-5b3a-406f-a324-7bd1cf894d8f@panix.com/ Reported-by: Genes Lists Closes: https://lore.kernel.org/r/4b392af8847cc19720ffcd53865f60ab3edc56b3.camel@sapience.com Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220658 Reported-by: Oliver Hartkopp Closes: https://lore.kernel.org/r/8d6887a5-60bc-423c-8f7a-87b4ab739f6a@hartkopp.net Reported-by: Hervé Signed-off-by: Inochi Amaoto Signed-off-by: Bjorn Helgaas Tested-by: Kenneth R. Crudup Tested-by: Genes Lists Tested-by: Oliver Hartkopp Tested-by: Todd Brandt Tested-by: Hervé Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251014014607.612586-1-inochiama@gmail.com --- drivers/pci/controller/vmd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 1bd5bf4a6097..b4b62b9ccc45 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -192,6 +192,12 @@ static void vmd_pci_msi_enable(struct irq_data *data) data->chip->irq_unmask(data); } +static unsigned int vmd_pci_msi_startup(struct irq_data *data) +{ + vmd_pci_msi_enable(data); + return 0; +} + static void vmd_irq_disable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; @@ -210,6 +216,11 @@ static void vmd_pci_msi_disable(struct irq_data *data) vmd_irq_disable(data->parent_data); } +static void vmd_pci_msi_shutdown(struct irq_data *data) +{ + vmd_pci_msi_disable(data); +} + static struct irq_chip vmd_msi_controller = { .name = "VMD-MSI", .irq_compose_msi_msg = vmd_compose_msi_msg, @@ -309,6 +320,8 @@ static bool vmd_init_dev_msi_info(struct device *dev, struct irq_domain *domain, if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) return false; + info->chip->irq_startup = vmd_pci_msi_startup; + info->chip->irq_shutdown = vmd_pci_msi_shutdown; info->chip->irq_enable = vmd_pci_msi_enable; info->chip->irq_disable = vmd_pci_msi_disable; return true; From a78835b86a4414230e4cf9a9f16d22302cdb8388 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Mon, 13 Oct 2025 17:08:26 -0500 Subject: [PATCH 280/305] PCI/VGA: Select SCREEN_INFO on X86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 337bf13aa9dda ("PCI/VGA: Replace vga_is_firmware_default() with a screen info check") introduced an implicit dependency upon SCREEN_INFO by removing the open coded implementation. If a user didn't have CONFIG_SCREEN_INFO set, vga_is_firmware_default() would now return false. SCREEN_INFO is only used on X86 so add a conditional select for SCREEN_INFO to ensure that the VGA arbiter works as intended. Fixes: 337bf13aa9dda ("PCI/VGA: Replace vga_is_firmware_default() with a screen info check") Reported-by: Eric Biggers Closes: https://lore.kernel.org/linux-pci/20251012182302.GA3412@sol/ Suggested-by: Thomas Zimmermann Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Zimmermann Reviewed-by: Ilpo Järvinen Tested-by: Eric Biggers Link: https://patch.msgid.link/20251013220829.1536292-1-superm1@kernel.org --- drivers/pci/Kconfig | 1 + drivers/pci/vgaarb.c | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7065a8e5f9b1..f94f5d384362 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -306,6 +306,7 @@ config VGA_ARB bool "VGA Arbitration" if EXPERT default y depends on (PCI && !S390) + select SCREEN_INFO if X86 help Some "legacy" VGA devices implemented on PCI typically have the same hard-decoded addresses as they did on ISA. When multiple PCI devices diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index b58f94ee4891..436fa7f4c387 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -556,10 +556,8 @@ EXPORT_SYMBOL(vga_put); static bool vga_is_firmware_default(struct pci_dev *pdev) { -#ifdef CONFIG_SCREEN_INFO - struct screen_info *si = &screen_info; - - return pdev == screen_info_pci_dev(si); +#if defined CONFIG_X86 + return pdev == screen_info_pci_dev(&screen_info); #else return false; #endif From e9ad390a4812fd60c1da46823f7a6f84f2411f0c Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 7 Oct 2025 12:26:00 +0200 Subject: [PATCH 281/305] arm64/sysreg: Fix GIC CDEOI instruction encoding The GIC CDEOI system instruction requires the Rt field to be set to 0b11111 otherwise the instruction behaviour becomes CONSTRAINED UNPREDICTABLE. Currenly, its usage is encoded as a system register write, with a constant 0 value: write_sysreg_s(0, GICV5_OP_GIC_CDEOI) While compiling with GCC, the 0 constant value, through these asm constraints and modifiers ('x' modifier and 'Z' constraint combo): asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); forces the compiler to issue the XZR register for the MSR operation (ie that corresponds to Rt == 0b11111) issuing the right instruction encoding. Unfortunately LLVM does not yet understand that modifier/constraint combo so it ends up issuing a different register from XZR for the MSR source, which in turns means that it encodes the GIC CDEOI instruction wrongly and the instruction behaviour becomes CONSTRAINED UNPREDICTABLE that we must prevent. Add a conditional to write_sysreg_s() macro that detects whether it is passed a constant 0 value and issues an MSR write with XZR as source register - explicitly doing what the asm modifier/constraint is meant to achieve through constraints/modifiers, fixing the LLVM compilation issue. Fixes: 7ec80fb3f025 ("irqchip/gic-v5: Add GICv5 PPI support") Suggested-by: Catalin Marinas Signed-off-by: Lorenzo Pieralisi Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Cc: Sascha Bischoff Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6455db1b54fd..c231d2a3e515 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1220,10 +1220,19 @@ __val; \ }) +/* + * The "Z" constraint combined with the "%x0" template should be enough + * to force XZR generation if (v) is a constant 0 value but LLVM does not + * yet understand that modifier/constraint combo so a conditional is required + * to nudge the compiler into using XZR as a source for a 0 constant value. + */ #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ u32 __maybe_unused __check_r = (u32)(r); \ - asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ + if (__builtin_constant_p(__val) && __val == 0) \ + asm volatile(__msr_s(r, "xzr")); \ + else \ + asm volatile(__msr_s(r, "%x0") : : "r" (__val)); \ } while (0) /* From ea0d55ae4b3207c33691a73da3443b1fd379f1d2 Mon Sep 17 00:00:00 2001 From: Ada Couprie Diaz Date: Tue, 14 Oct 2025 10:25:36 +0100 Subject: [PATCH 282/305] arm64: debug: always unmask interrupts in el0_softstp() We intend that EL0 exception handlers unmask all DAIF exceptions before calling exit_to_user_mode(). When completing single-step of a suspended breakpoint, we do not call local_daif_restore(DAIF_PROCCTX) before calling exit_to_user_mode(), leaving all DAIF exceptions masked. When pseudo-NMIs are not in use this is benign. When pseudo-NMIs are in use, this is unsound. At this point interrupts are masked by both DAIF.IF and PMR_EL1, and subsequent irq flag manipulation may not work correctly. For example, a subsequent local_irq_enable() within exit_to_user_mode_loop() will only unmask interrupts via PMR_EL1 (leaving those masked via DAIF.IF), and anything depending on interrupts being unmasked (e.g. delivery of signals) will not work correctly. This was detected by CONFIG_ARM64_DEBUG_PRIORITY_MASKING. Move the call to `try_step_suspended_breakpoints()` outside of the check so that interrupts can be unmasked even if we don't call the step handler. Fixes: 0ac7584c08ce ("arm64: debug: split single stepping exception entry") Cc: # 6.17 Signed-off-by: Ada Couprie Diaz Acked-by: Mark Rutland [catalin.marinas@arm.com: added Mark's rewritten commit log and some whitespace] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index f546a914f041..a9c81715ce59 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -697,6 +697,8 @@ static void noinstr el0_breakpt(struct pt_regs *regs, unsigned long esr) static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) { + bool step_done; + if (!is_ttbr0_addr(regs->pc)) arm64_apply_bp_hardening(); @@ -707,10 +709,10 @@ static void noinstr el0_softstp(struct pt_regs *regs, unsigned long esr) * If we are stepping a suspended breakpoint there's nothing more to do: * the single-step is complete. */ - if (!try_step_suspended_breakpoints(regs)) { - local_daif_restore(DAIF_PROCCTX); + step_done = try_step_suspended_breakpoints(regs); + local_daif_restore(DAIF_PROCCTX); + if (!step_done) do_el0_softstep(esr, regs); - } arm64_exit_to_user_mode(regs); } From 7c33e97a6ef5d84e98b892c3e00c6d1678d20395 Mon Sep 17 00:00:00 2001 From: Sahil Chandna Date: Wed, 15 Oct 2025 00:26:35 +0530 Subject: [PATCH 283/305] bpf: Do not disable preemption in bpf_test_run(). The timer mode is initialized to NO_PREEMPT mode by default, this disables preemption and force execution in atomic context causing issue on PREEMPT_RT configurations when invoking spin_lock_bh(), leading to the following warning: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 6107, name: syz.0.17 preempt_count: 1, expected: 0 RCU nest depth: 1, expected: 1 Preemption disabled at: [] bpf_test_timer_enter+0xf8/0x140 net/bpf/test_run.c:42 Fix this, by removing NO_PREEMPT/NO_MIGRATE mode check. Also, the test timer context no longer needs explicit calls to migrate_disable()/migrate_enable() with rcu_read_lock()/rcu_read_unlock(). Use helpers rcu_read_lock_dont_migrate() and rcu_read_unlock_migrate() instead. Reported-by: syzbot+1f1fbecb9413cdbfbef8@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1f1fbecb9413cdbfbef8 Suggested-by: Yonghong Song Suggested-by: Menglong Dong Acked-by: Yonghong Song Tested-by: syzbot+1f1fbecb9413cdbfbef8@syzkaller.appspotmail.com Co-developed-by: Brahmajit Das Signed-off-by: Brahmajit Das Signed-off-by: Sahil Chandna Link: https://lore.kernel.org/r/20251014185635.10300-1-chandna.sahil@gmail.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 1782e83de2cb..8b7d0b90fea7 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -29,7 +29,6 @@ #include struct bpf_test_timer { - enum { NO_PREEMPT, NO_MIGRATE } mode; u32 i; u64 time_start, time_spent; }; @@ -37,12 +36,7 @@ struct bpf_test_timer { static void bpf_test_timer_enter(struct bpf_test_timer *t) __acquires(rcu) { - rcu_read_lock(); - if (t->mode == NO_PREEMPT) - preempt_disable(); - else - migrate_disable(); - + rcu_read_lock_dont_migrate(); t->time_start = ktime_get_ns(); } @@ -50,12 +44,7 @@ static void bpf_test_timer_leave(struct bpf_test_timer *t) __releases(rcu) { t->time_start = 0; - - if (t->mode == NO_PREEMPT) - preempt_enable(); - else - migrate_enable(); - rcu_read_unlock(); + rcu_read_unlock_migrate(); } static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations, @@ -374,7 +363,7 @@ static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx, { struct xdp_test_data xdp = { .batch_size = batch_size }; - struct bpf_test_timer t = { .mode = NO_MIGRATE }; + struct bpf_test_timer t = {}; int ret; if (!repeat) @@ -404,7 +393,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, struct bpf_prog_array_item item = {.prog = prog}; struct bpf_run_ctx *old_ctx; struct bpf_cg_run_ctx run_ctx; - struct bpf_test_timer t = { NO_MIGRATE }; + struct bpf_test_timer t = {}; enum bpf_cgroup_storage_type stype; int ret; @@ -1377,7 +1366,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_test_timer t = {}; u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; @@ -1445,7 +1434,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_test_timer t = {}; struct bpf_prog_array *progs = NULL; struct bpf_sk_lookup_kern ctx = {}; u32 repeat = kattr->test.repeat; From a1e83d4c0361f4b0e3b7ef8b603bf5e5ef60af86 Mon Sep 17 00:00:00 2001 From: Brahmajit Das Date: Fri, 17 Oct 2025 22:45:51 +0530 Subject: [PATCH 284/305] selftests/bpf: Fix redefinition of 'off' as different kind of symbol This fixes the following build error CLNG-BPF [test_progs] verifier_global_ptr_args.bpf.o progs/verifier_global_ptr_args.c:228:5: error: redefinition of 'off' as different kind of symbol 228 | u32 off; | ^ The symbol 'off' was previously defined in tools/testing/selftests/bpf/tools/include/vmlinux.h, which includes an enum i40e_ptp_gpio_pin_state from drivers/net/ethernet/intel/i40e/i40e_ptp.c: enum i40e_ptp_gpio_pin_state { end = -2, invalid = -1, off = 0, in_A = 1, in_B = 2, out_A = 3, out_B = 4, }; This enum is included when CONFIG_I40E is enabled. As of commit 032676ff8217 ("LoongArch: Update Loongson-3 default config file"), CONFIG_I40E is set in the defconfig, which leads to the conflict. Renaming the local variable avoids the redefinition and allows the build to succeed. Suggested-by: Yonghong Song Signed-off-by: Brahmajit Das Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20251017171551.53142-1-listout@listout.xyz Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_global_ptr_args.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 6630a92b1b47..1204fbc58178 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -225,7 +225,7 @@ int trusted_to_untrusted(void *ctx) } char mem[16]; -u32 off; +u32 offset; SEC("tp_btf/sys_enter") __success @@ -240,9 +240,9 @@ int anything_to_untrusted(void *ctx) /* scalar to untrusted */ subprog_untrusted(0); /* variable offset to untrusted (map) */ - subprog_untrusted((void *)mem + off); + subprog_untrusted((void *)mem + offset); /* variable offset to untrusted (trusted) */ - subprog_untrusted((void *)bpf_get_current_task_btf() + off); + subprog_untrusted((void *)bpf_get_current_task_btf() + offset); return 0; } @@ -298,12 +298,12 @@ int anything_to_untrusted_mem(void *ctx) /* scalar to untrusted mem */ subprog_void_untrusted(0); /* variable offset to untrusted mem (map) */ - subprog_void_untrusted((void *)mem + off); + subprog_void_untrusted((void *)mem + offset); /* variable offset to untrusted mem (trusted) */ - subprog_void_untrusted(bpf_get_current_task_btf() + off); + subprog_void_untrusted(bpf_get_current_task_btf() + offset); /* variable offset to untrusted char/enum (map) */ - subprog_char_untrusted(mem + off); - subprog_enum_untrusted((void *)mem + off); + subprog_char_untrusted(mem + offset); + subprog_enum_untrusted((void *)mem + offset); return 0; } From dbfdaeb381a49a7bc753d18e2876bc56a15e01cc Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Sat, 18 Oct 2025 14:25:18 +0300 Subject: [PATCH 285/305] tpm_crb: Add idle support for the Arm FF-A start method According to the CRB over FF-A specification [1], a TPM that implements the ABI must comply with the TCG PTP specification. This requires support for the Idle and Ready states. This patch implements CRB control area requests for goIdle and cmdReady on FF-A based TPMs. The FF-A message used to notify the TPM of CRB updates includes a locality parameter, which provides a hint to the TPM about which locality modified the CRB. This patch adds a locality parameter to __crb_go_idle() and __crb_cmd_ready() to support this. [1] https://developer.arm.com/documentation/den0138/latest/ Signed-off-by: Stuart Yoder Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_crb.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index ed97344f2324..c75a531cfb98 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -133,8 +133,7 @@ static inline bool tpm_crb_has_idle(u32 start_method) { return !(start_method == ACPI_TPM2_START_METHOD || start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD || - start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC || - start_method == ACPI_TPM2_CRB_WITH_ARM_FFA); + start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC); } static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value, @@ -191,7 +190,7 @@ static int crb_try_pluton_doorbell(struct crb_priv *priv, bool wait_for_complete * * Return: 0 always */ -static int __crb_go_idle(struct device *dev, struct crb_priv *priv) +static int __crb_go_idle(struct device *dev, struct crb_priv *priv, int loc) { int rc; @@ -200,6 +199,12 @@ static int __crb_go_idle(struct device *dev, struct crb_priv *priv) iowrite32(CRB_CTRL_REQ_GO_IDLE, &priv->regs_t->ctrl_req); + if (priv->sm == ACPI_TPM2_CRB_WITH_ARM_FFA) { + rc = tpm_crb_ffa_start(CRB_FFA_START_TYPE_COMMAND, loc); + if (rc) + return rc; + } + rc = crb_try_pluton_doorbell(priv, true); if (rc) return rc; @@ -220,7 +225,7 @@ static int crb_go_idle(struct tpm_chip *chip) struct device *dev = &chip->dev; struct crb_priv *priv = dev_get_drvdata(dev); - return __crb_go_idle(dev, priv); + return __crb_go_idle(dev, priv, chip->locality); } /** @@ -238,7 +243,7 @@ static int crb_go_idle(struct tpm_chip *chip) * * Return: 0 on success -ETIME on timeout; */ -static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) +static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv, int loc) { int rc; @@ -247,6 +252,12 @@ static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) iowrite32(CRB_CTRL_REQ_CMD_READY, &priv->regs_t->ctrl_req); + if (priv->sm == ACPI_TPM2_CRB_WITH_ARM_FFA) { + rc = tpm_crb_ffa_start(CRB_FFA_START_TYPE_COMMAND, loc); + if (rc) + return rc; + } + rc = crb_try_pluton_doorbell(priv, true); if (rc) return rc; @@ -267,7 +278,7 @@ static int crb_cmd_ready(struct tpm_chip *chip) struct device *dev = &chip->dev; struct crb_priv *priv = dev_get_drvdata(dev); - return __crb_cmd_ready(dev, priv); + return __crb_cmd_ready(dev, priv, chip->locality); } static int __crb_request_locality(struct device *dev, @@ -444,7 +455,7 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t bufsiz, size_t len) /* Seems to be necessary for every command */ if (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) - __crb_cmd_ready(&chip->dev, priv); + __crb_cmd_ready(&chip->dev, priv, chip->locality); memcpy_toio(priv->cmd, buf, len); @@ -672,7 +683,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, * PTT HW bug w/a: wake up the device to access * possibly not retained registers. */ - ret = __crb_cmd_ready(dev, priv); + ret = __crb_cmd_ready(dev, priv, 0); if (ret) goto out_relinquish_locality; @@ -744,7 +755,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, if (!ret) priv->cmd_size = cmd_size; - __crb_go_idle(dev, priv); + __crb_go_idle(dev, priv, 0); out_relinquish_locality: From 211ddde0823f1442e4ad052a2f30f050145ccada Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Oct 2025 15:19:16 -1000 Subject: [PATCH 286/305] Linux 6.18-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 17cfa11ca716..d14824792227 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* From 248adfe32bfd75afbcb8f6d4b68f7e0a9fb2c438 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Fri, 17 Oct 2025 17:15:28 +0100 Subject: [PATCH 287/305] ASoC: cs530x: Correct log message with expected variable The function used one parameter for the switch statement, but logged a different parameter when it defaulted. Signed-off-by: Simon Trimmer Signed-off-by: Vitaly Rodionov Link: https://patch.msgid.link/20251017161543.214235-2-vitalyr@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs530x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs530x.c b/sound/soc/codecs/cs530x.c index b9eff240b929..535387cd7aa3 100644 --- a/sound/soc/codecs/cs530x.c +++ b/sound/soc/codecs/cs530x.c @@ -793,7 +793,7 @@ static int cs530x_set_sysclk(struct snd_soc_component *component, int clk_id, case CS530X_SYSCLK_SRC_PLL: break; default: - dev_err(component->dev, "Invalid clock id %d\n", clk_id); + dev_err(component->dev, "Invalid sysclk source: %d\n", source); return -EINVAL; } From ec20584f25233bfe292c8e18f9a429dfaff58a49 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 16 Oct 2025 10:48:44 +0100 Subject: [PATCH 288/305] ASoC: cs-amp-lib-test: Fix missing include of kunit/test-bug.h cs-amp-lib-test uses functions from kunit/test-bug.h but wasn't including it. This error was found by smatch. Fixes: 177862317a98 ("ASoC: cs-amp-lib: Add KUnit test for calibration helpers") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20251016094844.92796-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs-amp-lib-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs-amp-lib-test.c b/sound/soc/codecs/cs-amp-lib-test.c index 2fde84309338..3406887cdfa2 100644 --- a/sound/soc/codecs/cs-amp-lib-test.c +++ b/sound/soc/codecs/cs-amp-lib-test.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include From ef30cb1304f033eaee3b46e22b8f523446db8f53 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 16 Oct 2025 15:08:37 +0000 Subject: [PATCH 289/305] ASoC: amd: acp: Add ACP7.0 match entries for cs35l56 and cs42l43 This adds some match entries for a few system configurations: cs42l43 link 0 UID 0 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 cs42l43 link 1 UID 0 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20251016150837.320886-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/amd-acp70-acpi-match.c | 157 +++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/sound/soc/amd/acp/amd-acp70-acpi-match.c b/sound/soc/amd/acp/amd-acp70-acpi-match.c index dcecac792e6d..871b4f054a84 100644 --- a/sound/soc/amd/acp/amd-acp70-acpi-match.c +++ b/sound/soc/amd/acp/amd-acp70-acpi-match.c @@ -30,6 +30,20 @@ static const struct snd_soc_acpi_endpoint spk_r_endpoint = { .group_id = 1 }; +static const struct snd_soc_acpi_endpoint spk_2_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 2, + .group_id = 1 +}; + +static const struct snd_soc_acpi_endpoint spk_3_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 3, + .group_id = 1 +}; + static const struct snd_soc_acpi_adr_device rt711_rt1316_group_adr[] = { { .adr = 0x000030025D071101ull, @@ -112,6 +126,134 @@ static const struct snd_soc_acpi_adr_device rt1320_1_single_adr[] = { } }; +static const struct snd_soc_acpi_endpoint cs42l43_endpoints[] = { + { /* Jack Playback Endpoint */ + .num = 0, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* DMIC Capture Endpoint */ + .num = 1, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Jack Capture Endpoint */ + .num = 2, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Speaker Playback Endpoint */ + .num = 3, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, +}; + +static const struct snd_soc_acpi_adr_device cs42l43_0_adr[] = { + { + .adr = 0x00003001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43_1_cs35l56x4_1_adr[] = { + { + .adr = 0x00013001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l56x4_1_adr[] = { + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_link_adr acp70_cs42l43_l1_cs35l56x4_l1[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l43_1_cs35l56x4_1_adr), + .adr_d = cs42l43_1_cs35l56x4_1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp70_cs42l43_l0_cs35l56x4_l1[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43_0_adr), + .adr_d = cs42l43_0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_1_adr), + .adr_d = cs35l56x4_1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp70_cs35l56x4_l1[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_1_adr), + .adr_d = cs35l56x4_1_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr acp70_rt722_only[] = { { .mask = BIT(0), @@ -151,6 +293,21 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { .links = acp70_4_in_1_sdca, .drv_name = "amd_sdw", }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp70_cs42l43_l0_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp70_cs42l43_l1_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp70_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, {}, }; EXPORT_SYMBOL(snd_soc_acpi_amd_acp70_sdw_machines); From bf6fb4a272739e0d1b2c570276324142517d1905 Mon Sep 17 00:00:00 2001 From: Sharique Mohammad Date: Thu, 16 Oct 2025 17:11:52 +0200 Subject: [PATCH 290/305] ASOC: max98090/91: fix for filter configuration: AHPF removed DMIC2_HPF added The filter configuration register(0x26) has AHPF(bit6) for primary record path, which is common in max98090 and max98091 and has been defined as DAPM suppy widget as "AHPF" in "struct snd_soc_dapm_widget max98090_dapm_widget[]". It is the DC-Blocking filter for the primary record path. But the same functionality for secondary record path in the configuration register(0x26) is DMIC2_HPF(bit2). It is not present as a DAPM supply widget in the current code. With this patch adding it as a DAPM supply widget. In the current code, the mics on secondary record path in code are named as "DMIC3" and "DMIC4", so accordingly naming DMIC2_HPF(bit2) as "DMIC34_HPF", and declaring it as a DAPM supply widget in "struct snd_soc_dapm_widget max98091_dapm_widget[]". Also it is specific to max98091, and should be visible or working only when max98091 codec chip is used. Therefore, written in "max98091_dapm_widget[]". As "AHPF" is not part of secondary record path, replacing it with "DMIC34_HPF" in the ALSA routes to "DMIC3" and "DMIC4" in "max98091_dapm_routes[]". Signed-off-by: Sharique Mohammad Link: https://patch.msgid.link/20251016151152.1107083-1-sharq0406@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index cb1508fc99f8..5aff5a459a43 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -1239,6 +1239,8 @@ static const struct snd_soc_dapm_widget max98091_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("DMIC4_ENA", M98090_REG_DIGITAL_MIC_ENABLE, M98090_DIGMIC4_SHIFT, 0, max98090_shdn_event, SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_SUPPLY("DMIC34_HPF", M98090_REG_FILTER_CONFIG, + M98090_FLT_DMIC34HPF_SHIFT, 0, NULL, 0), }; static const struct snd_soc_dapm_route max98090_dapm_routes[] = { @@ -1427,8 +1429,8 @@ static const struct snd_soc_dapm_route max98091_dapm_routes[] = { /* DMIC inputs */ {"DMIC3", NULL, "DMIC3_ENA"}, {"DMIC4", NULL, "DMIC4_ENA"}, - {"DMIC3", NULL, "AHPF"}, - {"DMIC4", NULL, "AHPF"}, + {"DMIC3", NULL, "DMIC34_HPF"}, + {"DMIC4", NULL, "DMIC34_HPF"}, }; static int max98090_add_widgets(struct snd_soc_component *component) From 3bcdbc221d676f871e23da30fd485a76728f55c7 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 16 Oct 2025 11:26:01 +0000 Subject: [PATCH 291/305] ASoC: Intel: soc-acpi-intel-ptl-match: Remove cs42l43 match from sdw link3 Removing this match entry ensures that a PTL system comprising of a cs42l43 codec on link3 will use function topologies. Previously the behaviour would be use the monolithic topology associated with this codec match table entry in preference to function topologies and if the system had a number of smart amplifiers then they would not be instantiated. Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20251016112601.187020-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- .../intel/common/soc-acpi-intel-ptl-match.c | 52 ------------------- 1 file changed, 52 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 3c8b10e21ceb..4853f4f31786 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -227,33 +227,6 @@ static const struct snd_soc_acpi_endpoint cs42l43_amp_spkagg_endpoints[] = { }, }; -static const struct snd_soc_acpi_endpoint cs42l43_endpoints[] = { - { /* Jack Playback Endpoint */ - .num = 0, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* DMIC Capture Endpoint */ - .num = 1, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* Jack Capture Endpoint */ - .num = 2, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* Speaker Playback Endpoint */ - .num = 3, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, -}; - static const struct snd_soc_acpi_adr_device cs42l43_2_adr[] = { { .adr = 0x00023001fa424301ull, @@ -305,15 +278,6 @@ static const struct snd_soc_acpi_adr_device cs35l56_3_3amp_adr[] = { } }; -static const struct snd_soc_acpi_adr_device cs42l43_3_adr[] = { - { - .adr = 0x00033001FA424301ull, - .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), - .endpoints = cs42l43_endpoints, - .name_prefix = "cs42l43" - } -}; - static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = { { .adr = 0x000030025D071101ull, @@ -486,15 +450,6 @@ static const struct snd_soc_acpi_link_adr ptl_cs42l43_l2_cs35l56x6_l13[] = { {} }; -static const struct snd_soc_acpi_link_adr ptl_cs42l43_l3[] = { - { - .mask = BIT(3), - .num_adr = ARRAY_SIZE(cs42l43_3_adr), - .adr_d = cs42l43_3_adr, - }, - {} -}; - static const struct snd_soc_acpi_link_adr ptl_rt721_l0[] = { { .mask = BIT(0), @@ -712,13 +667,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { .sof_tplg_filename = "sof-ptl-rt722.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, - { - .link_mask = BIT(3), - .links = ptl_cs42l43_l3, - .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-ptl-cs42l43-l3.tplg", - .get_function_tplg_files = sof_sdw_get_tplg_files, - }, { .link_mask = BIT(3), .links = ptl_sdw_rt712_vb_l3_rt1320_l3, From fdbb53d318aa94a094434e5f226617f0eb1e8f22 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 17 Oct 2025 09:52:56 +0100 Subject: [PATCH 292/305] ASoC: qdsp6: q6asm: do not sleep while atomic For some reason we ended up kfree between spinlock lock and unlock, which can sleep. move the kfree out of spinlock section. Fixes: a2a5d30218fd ("ASoC: qdsp6: q6asm: Add support to memory map and unmap") Cc: Stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20251017085307.4325-2-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c index 06a802f9dba5..67e9ca18883c 100644 --- a/sound/soc/qcom/qdsp6/q6asm.c +++ b/sound/soc/qcom/qdsp6/q6asm.c @@ -377,9 +377,9 @@ static void q6asm_audio_client_free_buf(struct audio_client *ac, spin_lock_irqsave(&ac->lock, flags); port->num_periods = 0; + spin_unlock_irqrestore(&ac->lock, flags); kfree(port->buf); port->buf = NULL; - spin_unlock_irqrestore(&ac->lock, flags); } /** From 3293d3d7b08872cf174bb768b890655f1b22526a Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 22 Oct 2025 15:39:52 +0800 Subject: [PATCH 293/305] ASoC: sdw_utils: add name_prefix for rt1321 part id This patch adds name_prefix for rt1321 part id in the codec_info_list. Signed-off-by: Shuming Fan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20251022073952.327451-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 270c66b90228..d717d4143932 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -313,6 +313,7 @@ struct asoc_sdw_codec_info codec_info_list[] = { }, { .part_id = 0x1321, + .name_prefix = "rt1320", .dais = { { .direction = {true, false}, From cfca1637bc2b6b1e4f191d2f0b25f12402fbbb26 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 23 Oct 2025 11:23:46 +0200 Subject: [PATCH 294/305] ASoC: Intel: avs: Unprepare a stream when XRUN occurs The pcm->prepare() function may be called multiple times in a row by the userspace, as mentioned in the documentation. The driver shall take that into account and prevent redundancy. However, the exact same function is called during XRUNs and in such case, the particular stream shall be reset and setup anew. Fixes: 9114700b496c ("ASoC: Intel: avs: Generic PCM FE operations") Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20251023092348.3119313-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index d31058e2de5b..501466bd1f7f 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -754,6 +754,8 @@ static int avs_dai_fe_prepare(struct snd_pcm_substream *substream, struct snd_so data = snd_soc_dai_get_dma_data(dai, substream); host_stream = data->host_stream; + if (runtime->state == SNDRV_PCM_STATE_XRUN) + hdac_stream(host_stream)->prepared = false; if (hdac_stream(host_stream)->prepared) return 0; From 845f716dc5f354c719f6fda35048b6c2eca99331 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 23 Oct 2025 11:23:47 +0200 Subject: [PATCH 295/305] ASoC: Intel: avs: Disable periods-elapsed work when closing PCM avs_dai_fe_shutdown() handles the shutdown procedure for HOST HDAudio stream while period-elapsed work services its IRQs. As the former frees the DAI's private context, these two operations shall be synchronized to avoid slab-use-after-free or worse errors. Fixes: 0dbb186c3510 ("ASoC: Intel: avs: Update stream status in a separate thread") Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20251023092348.3119313-3-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index 501466bd1f7f..80c001120cdd 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -651,6 +651,7 @@ static void avs_dai_fe_shutdown(struct snd_pcm_substream *substream, struct snd_ data = snd_soc_dai_get_dma_data(dai, substream); + disable_work_sync(&data->period_elapsed_work); snd_hdac_ext_stream_release(data->host_stream, HDAC_EXT_STREAM_TYPE_HOST); avs_dai_shutdown(substream, dai); } From 64007ad3e2a0e0a0ded8b2c6a72c0bb7883d3a33 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 23 Oct 2025 11:23:48 +0200 Subject: [PATCH 296/305] ASoC: Intel: avs: Use snd_codec format when initializing probe The data probing is a debug feature. Currently parameters channels and rate specified by the application are read while the format is ignored. More robust approach is to read all of them. Audio format, while not used by the Probe module for PCM streaming, takes part in the gateway initialization on the DSP side. With full parametrization we gain better coverage with the data probing feature. Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20251023092348.3119313-4-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/probes.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/avs/probes.c b/sound/soc/intel/avs/probes.c index 693ecfe68fd0..74096236984a 100644 --- a/sound/soc/intel/avs/probes.c +++ b/sound/soc/intel/avs/probes.c @@ -14,8 +14,8 @@ #include "debug.h" #include "messages.h" -static int avs_dsp_init_probe(struct avs_dev *adev, union avs_connector_node_id node_id, - size_t buffer_size) +static int avs_dsp_init_probe(struct avs_dev *adev, struct snd_compr_params *params, int bps, + union avs_connector_node_id node_id, size_t buffer_size) { struct avs_probe_cfg cfg = {{0}}; struct avs_module_entry mentry; @@ -27,12 +27,16 @@ static int avs_dsp_init_probe(struct avs_dev *adev, union avs_connector_node_id return ret; /* - * Probe module uses no cycles, audio data format and input and output - * frame sizes are unused. It is also not owned by any pipeline. + * Probe module uses no cycles, input and output frame sizes are unused. + * It is also not owned by any pipeline. */ cfg.base.ibs = 1; /* BSS module descriptor is always segment of index=2. */ cfg.base.is_pages = mentry.segments[2].flags.length; + cfg.base.audio_fmt.sampling_freq = params->codec.sample_rate; + cfg.base.audio_fmt.bit_depth = bps; + cfg.base.audio_fmt.num_channels = params->codec.ch_out; + cfg.base.audio_fmt.valid_bit_depth = bps; cfg.gtw_cfg.node_id = node_id; cfg.gtw_cfg.dma_buffer_size = buffer_size; @@ -128,8 +132,6 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, struct hdac_ext_stream *host_stream = avs_compr_get_host_stream(cstream); struct snd_compr_runtime *rtd = cstream->runtime; struct avs_dev *adev = to_avs_dev(dai->dev); - /* compr params do not store bit depth, default to S32_LE. */ - snd_pcm_format_t format = SNDRV_PCM_FORMAT_S32_LE; unsigned int format_val; int bps, ret; @@ -142,7 +144,7 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, ret = snd_compr_malloc_pages(cstream, rtd->buffer_size); if (ret < 0) return ret; - bps = snd_pcm_format_physical_width(format); + bps = snd_pcm_format_physical_width(params->codec.format); if (bps < 0) return bps; format_val = snd_hdac_stream_format(params->codec.ch_out, bps, params->codec.sample_rate); @@ -166,7 +168,7 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, node_id.vindex = hdac_stream(host_stream)->stream_tag - 1; node_id.dma_type = AVS_DMA_HDA_HOST_INPUT; - ret = avs_dsp_init_probe(adev, node_id, rtd->dma_bytes); + ret = avs_dsp_init_probe(adev, params, bps, node_id, rtd->dma_bytes); if (ret < 0) { dev_err(dai->dev, "probe init failed: %d\n", ret); avs_dsp_enable_d0ix(adev); From d9fbe5b0bf7e2d1e20d53e4e2274f9f61bdcca98 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 23 Oct 2025 14:45:37 +0800 Subject: [PATCH 297/305] ASoC: fsl_sai: fix bit order for DSD format The DSD little endian format requires the msb first, because oldest bit is in msb. found this issue by testing with pipewire. Fixes: c111c2ddb3fd ("ASoC: fsl_sai: Add PDM daifmt support") Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20251023064538.368850-2-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 757e7868e322..65093325a6b6 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -353,7 +353,6 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai, break; case SND_SOC_DAIFMT_PDM: val_cr2 |= FSL_SAI_CR2_BCP; - val_cr4 &= ~FSL_SAI_CR4_MF; sai->is_pdm_mode = true; break; case SND_SOC_DAIFMT_RIGHT_J: @@ -638,7 +637,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr5 |= FSL_SAI_CR5_WNW(slot_width); val_cr5 |= FSL_SAI_CR5_W0W(slot_width); - if (sai->is_lsb_first || sai->is_pdm_mode) + if (sai->is_lsb_first) val_cr5 |= FSL_SAI_CR5_FBT(0); else val_cr5 |= FSL_SAI_CR5_FBT(word_width - 1); From ba3a5e1aeaa01ea67067d725710a839114214fc6 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 23 Oct 2025 14:45:38 +0800 Subject: [PATCH 298/305] ASoC: fsl_micfil: correct the endian format for DSD The DSD format supported by micfil is that oldest bit is in bit 31, so the format should be DSD little endian format. Fixes: 21aa330fec31 ("ASoC: fsl_micfil: Add decimation filter bypass mode support") Signed-off-by: Shengjiu Wang Reviewed-by: Daniel Baluta Link: https://patch.msgid.link/20251023064538.368850-3-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_micfil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index aabd90a8b3ec..cac26ba0aa4b 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -131,7 +131,7 @@ static struct fsl_micfil_soc_data fsl_micfil_imx943 = { .fifos = 8, .fifo_depth = 32, .dataline = 0xf, - .formats = SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_DSD_U32_BE, + .formats = SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_DSD_U32_LE, .use_edma = true, .use_verid = true, .volume_sx = false, @@ -823,7 +823,7 @@ static int fsl_micfil_hw_params(struct snd_pcm_substream *substream, break; } - if (format == SNDRV_PCM_FORMAT_DSD_U32_BE) { + if (format == SNDRV_PCM_FORMAT_DSD_U32_LE) { micfil->dec_bypass = true; /* * According to equation 29 in RM: From 79a6f2da168543c0431ade57428f673c19c5b72f Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Tue, 21 Oct 2025 01:04:40 +0800 Subject: [PATCH 299/305] ASoC: mediatek: Fix double pm_runtime_disable in remove functions Both mt8195-afe-pcm and mt8365-afe-pcm drivers use devm_pm_runtime_enable() in probe function, which automatically calls pm_runtime_disable() on device removal via devres mechanism. However, the remove callbacks explicitly call pm_runtime_disable() again, resulting in double pm_runtime_disable() calls. Fix by removing the redundant pm_runtime_disable() calls from remove functions, letting the devres framework handle it automatically. Fixes: 2ca0ec01d49c ("ASoC: mediatek: mt8195-afe-pcm: Simplify runtime PM during probe") Fixes: e1991d102bc2 ("ASoC: mediatek: mt8365: Add the AFE driver support") Signed-off-by: Haotian Zhang Link: https://patch.msgid.link/20251020170440.585-1-vulab@iscas.ac.cn Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8195/mt8195-afe-pcm.c | 1 - sound/soc/mediatek/mt8365/mt8365-afe-pcm.c | 1 - 2 files changed, 2 deletions(-) diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c index 5d025ad72263..c63b3444bc17 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c +++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c @@ -3176,7 +3176,6 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev) static void mt8195_afe_pcm_dev_remove(struct platform_device *pdev) { - pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8195_afe_runtime_suspend(&pdev->dev); } diff --git a/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c b/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c index 10793bbe9275..d48252cd96ac 100644 --- a/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c +++ b/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c @@ -2238,7 +2238,6 @@ static void mt8365_afe_pcm_dev_remove(struct platform_device *pdev) mt8365_afe_disable_top_cg(afe, MT8365_TOP_CG_AFE); - pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8365_afe_runtime_suspend(&pdev->dev); } From d914ec6f07548f7c13a231a4f526e043e736e82e Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 27 Oct 2025 18:33:33 +0800 Subject: [PATCH 300/305] ASoC: rt721: fix prepare clock stop failed This patch adds settings to prevent the 'prepare clock stop failed' error. Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20251027103333.38353-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt721-sdca.c | 4 ++++ sound/soc/codecs/rt721-sdca.h | 1 + 2 files changed, 5 insertions(+) diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index a4bd29d7220b..5f7b505d5414 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -281,6 +281,10 @@ static void rt721_sdca_jack_preset(struct rt721_sdca_priv *rt721) rt_sdca_index_write(rt721->mbq_regmap, RT721_BOOST_CTRL, RT721_BST_4CH_TOP_GATING_CTRL1, 0x002a); regmap_write(rt721->regmap, 0x2f58, 0x07); + + regmap_write(rt721->regmap, 0x2f51, 0x00); + rt_sdca_index_write(rt721->mbq_regmap, RT721_HDA_SDCA_FLOAT, + RT721_MISC_CTL, 0x0004); } static void rt721_sdca_jack_init(struct rt721_sdca_priv *rt721) diff --git a/sound/soc/codecs/rt721-sdca.h b/sound/soc/codecs/rt721-sdca.h index 71fac9cd8739..24ce188562ba 100644 --- a/sound/soc/codecs/rt721-sdca.h +++ b/sound/soc/codecs/rt721-sdca.h @@ -137,6 +137,7 @@ struct rt721_sdca_dmic_kctrl_priv { #define RT721_HDA_LEGACY_UAJ_CTL 0x02 #define RT721_HDA_LEGACY_CTL1 0x05 #define RT721_HDA_LEGACY_RESET_CTL 0x06 +#define RT721_MISC_CTL 0x07 #define RT721_XU_REL_CTRL 0x0c #define RT721_GE_REL_CTRL1 0x0d #define RT721_HDA_LEGACY_GPIO_WAKE_EN_CTL 0x0e From c8b8804760eb0c4c0c7c2b500380ab3fa9f92b5a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 28 Oct 2025 10:20:30 +0000 Subject: [PATCH 301/305] ASoC: Fix build for sdw_utils Revert 3293d3d7b0 ("ASoC: sdw_utils: add name_prefix for rt1321 part id") due to dependencies on -next which for some reason don't show up in my builds. Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index d717d4143932..270c66b90228 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -313,7 +313,6 @@ struct asoc_sdw_codec_info codec_info_list[] = { }, { .part_id = 0x1321, - .name_prefix = "rt1320", .dais = { { .direction = {true, false}, From b2dd1d0d322dce5f331961c927e775b84014d5ab Mon Sep 17 00:00:00 2001 From: Maarten Zanders Date: Fri, 24 Oct 2025 15:57:15 +0200 Subject: [PATCH 302/305] ASoC: fsl_sai: Fix sync error in consumer mode When configured for default synchronisation (Rx syncs to Tx) and the SAI operates in consumer mode (clocks provided externally to Tx), a synchronisation error occurs on Tx on the first attempt after device initialisation when the playback stream is started while a capture stream is already active. This results in channel shift/swap on the playback stream. Subsequent streams (ie after that first failing one) always work correctly, no matter the order, with or without the other stream active. This issue was observed (and fix tested) on an i.MX6UL board connected to an ADAU1761 codec, where the codec provides both frame and bit clock (connected to TX pins). To fix this, always initialize the 'other' xCR4 and xCR5 registers when we're starting a stream which is synced to the opposite one, irregardless of the producer/consumer status. Fixes: 51659ca069ce ("ASoC: fsl-sai: set xCR4/xCR5/xMR for SAI master mode") Signed-off-by: Maarten Zanders Reviewed-by: Shengjiu Wang Link: https://patch.msgid.link/20251024135716.584265-1-maarten@zanders.be Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 65093325a6b6..72bfc91e21b9 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -652,12 +652,12 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr4 |= FSL_SAI_CR4_CHMOD; /* - * For SAI provider mode, when Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will - * generate bclk and frame clock for Tx(Rx), we should set RCR4(TCR4), - * RCR5(TCR5) for playback(capture), or there will be sync error. + * When Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will provide bclk and + * frame clock for Tx(Rx). We should set RCR4(TCR4), RCR5(TCR5) + * for playback(capture), or there will be sync error. */ - if (!sai->is_consumer_mode[tx] && fsl_sai_dir_is_synced(sai, adir)) { + if (fsl_sai_dir_is_synced(sai, adir)) { regmap_update_bits(sai->regmap, FSL_SAI_xCR4(!tx, ofs), FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK | FSL_SAI_CR4_CHMOD_MASK, From 45f5c9eec43a9bf448f46562f146810831916cc9 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 27 Oct 2025 22:00:12 +0800 Subject: [PATCH 303/305] ASoC: soc_sdw_utils: remove cs42l43 component_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "spk:cs42l43-spk" component string will be added conditionally by asoc_sdw_cs42l43_spk_rtd_init(). We should not add "spk:cs42l43" unconditionally. Fixes: c61da55412a0 ("ASoC: sdw_utils: Add missed component_name strings for speaker amps") Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20251027140012.966306-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 270c66b90228..f7c8c16308de 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -638,7 +638,6 @@ struct asoc_sdw_codec_info codec_info_list[] = { { .direction = {true, false}, .dai_name = "cs42l43-dp6", - .component_name = "cs42l43", .dai_type = SOC_SDW_DAI_TYPE_AMP, .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID}, .init = asoc_sdw_cs42l43_spk_init, From 22897e568646de5907d4981eae6cc895be2978d1 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 29 Oct 2025 16:11:34 +0200 Subject: [PATCH 304/305] ASoC: renesas: rz-ssi: Use proper dma_buffer_pos after resume When the driver supports DMA, it enqueues four DMA descriptors per substream before the substream is started. New descriptors are enqueued in the DMA completion callback, and each time a new descriptor is queued, the dma_buffer_pos is incremented. During suspend, the DMA transactions are terminated. There might be cases where the four extra enqueued DMA descriptors are not completed and are instead canceled on suspend. However, the cancel operation does not take into account that the dma_buffer_pos was already incremented. Previously, the suspend code reinitialized dma_buffer_pos to zero, but this is not always correct. To avoid losing any audio periods during suspend/resume and to prevent clip sound, save the completed DMA buffer position in the DMA callback and reinitialize dma_buffer_pos on resume. Cc: stable@vger.kernel.org Fixes: 1fc778f7c833a ("ASoC: renesas: rz-ssi: Add suspend to RAM support") Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20251029141134.2556926-3-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Mark Brown --- sound/soc/renesas/rz-ssi.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c index e00940814157..81b883e8ac92 100644 --- a/sound/soc/renesas/rz-ssi.c +++ b/sound/soc/renesas/rz-ssi.c @@ -85,6 +85,7 @@ struct rz_ssi_stream { struct snd_pcm_substream *substream; int fifo_sample_size; /* sample capacity of SSI FIFO */ int dma_buffer_pos; /* The address for the next DMA descriptor */ + int completed_dma_buf_pos; /* The address of the last completed DMA descriptor. */ int period_counter; /* for keeping track of periods transferred */ int sample_width; int buffer_pos; /* current frame position in the buffer */ @@ -215,6 +216,7 @@ static void rz_ssi_stream_init(struct rz_ssi_stream *strm, rz_ssi_set_substream(strm, substream); strm->sample_width = samples_to_bytes(runtime, 1); strm->dma_buffer_pos = 0; + strm->completed_dma_buf_pos = 0; strm->period_counter = 0; strm->buffer_pos = 0; @@ -437,6 +439,10 @@ static void rz_ssi_pointer_update(struct rz_ssi_stream *strm, int frames) snd_pcm_period_elapsed(strm->substream); strm->period_counter = current_period; } + + strm->completed_dma_buf_pos += runtime->period_size; + if (strm->completed_dma_buf_pos >= runtime->buffer_size) + strm->completed_dma_buf_pos = 0; } static int rz_ssi_pio_recv(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) @@ -778,10 +784,14 @@ static int rz_ssi_dma_request(struct rz_ssi_priv *ssi, struct device *dev) return -ENODEV; } -static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi) +static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) { + struct snd_pcm_substream *substream = strm->substream; + struct snd_pcm_runtime *runtime = substream->runtime; int ret; + strm->dma_buffer_pos = strm->completed_dma_buf_pos + runtime->period_size; + if (rz_ssi_is_stream_running(&ssi->playback) || rz_ssi_is_stream_running(&ssi->capture)) return 0; @@ -794,16 +804,6 @@ static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi) ssi->hw_params_cache.channels); } -static void rz_ssi_streams_suspend(struct rz_ssi_priv *ssi) -{ - if (rz_ssi_is_stream_running(&ssi->playback) || - rz_ssi_is_stream_running(&ssi->capture)) - return; - - ssi->playback.dma_buffer_pos = 0; - ssi->capture.dma_buffer_pos = 0; -} - static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { @@ -813,7 +813,7 @@ static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, switch (cmd) { case SNDRV_PCM_TRIGGER_RESUME: - ret = rz_ssi_trigger_resume(ssi); + ret = rz_ssi_trigger_resume(ssi, strm); if (ret) return ret; @@ -852,7 +852,6 @@ static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, case SNDRV_PCM_TRIGGER_SUSPEND: rz_ssi_stop(ssi, strm); - rz_ssi_streams_suspend(ssi); break; case SNDRV_PCM_TRIGGER_STOP: From 5e5c8aa73d99f1daa9f2ec1474b7fc1a6952764b Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 29 Oct 2025 14:46:36 +0000 Subject: [PATCH 305/305] ASoC: dt-bindings: pm4125-sdw: correct number of soundwire ports For some reason we ended up limiting the number of soundwire ports to 2 in the bindings, the actual codec supports 4 rx and 5 tx ports. Fixes: 88d0d17192c5 ("ASoC: dt-bindings: add bindings for pm4125 audio codec") Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20251029144636.357203-1-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml b/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml index 23624f32ac30..769e4cb5b99b 100644 --- a/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml @@ -32,7 +32,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 - maxItems: 2 + maxItems: 4 items: enum: [1, 2, 3, 4] @@ -48,7 +48,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 - maxItems: 2 + maxItems: 5 items: enum: [1, 2, 3, 4, 5]